Name: Video Processing
Author: Zurybr

スキルを検索.../

Video Processing | Skills Pool

import cv2

cap = cv2.VideoCapture('video.mp4')
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"FPS: {fps}, Frames: {frame_count}, Resolution: {width}x{height}")

def save_debug_frame(frame, frame_num, detections, output_dir):
    """Save annotated frame for visual verification."""
    annotated = frame.copy()
    for det in detections:
        cv2.rectangle(annotated, det['bbox'][:2], det['bbox'][2:], (0, 255, 0), 2)
        cv2.putText(annotated, f"y={det['lowest_y']}",
                    (det['bbox'][0], det['bbox'][1]-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
    cv2.imwrite(f"{output_dir}/frame_{frame_num:04d}.png", annotated)

class VideoAnalyzerConfig:
    blur_kernel_size: tuple = (21, 21)      # Document why this size
    binary_threshold: int = 25               # Threshold for foreground detection
    dilation_iterations: int = 2             # Morphological operations
    min_contour_area: int = 500              # Minimum detection size in pixels
    smoothing_window: int = 5                # Temporal smoothing for metrics

def handle_detection_gap(frame_data, gap_start, gap_end):
    """Interpolate or use secondary metrics during detection gaps."""
    # Option 1: Linear interpolation of position
    # Option 2: Use motion magnitude as proxy
    # Option 3: Flag gap for manual review
    pass

# After background subtraction
cv2.imwrite("debug/01_background_diff.png", diff_frame)

# After thresholding
cv2.imwrite("debug/02_thresholded.png", thresh_frame)

# After morphological operations
cv2.imwrite("debug/03_morphed.png", morph_frame)

# After contour detection
cv2.imwrite("debug/04_contours.png", contour_frame)

def validate_detection(detection, frame_num, video_props):
    """Verify detection makes physical sense."""
    assert detection['area'] > 0, f"Zero area detection at frame {frame_num}"
    assert 0 <= detection['center_x'] <= video_props['width']
    assert 0 <= detection['center_y'] <= video_props['height']

    # Domain-specific checks
    if frame_num > 0:
        max_reasonable_movement = video_props['fps'] * 50  # pixels per frame
        assert abs(detection['center_x'] - prev_x) < max_reasonable_movement

import matplotlib.pyplot as plt

def plot_metrics(frame_data, output_path):
    """Visualize metrics for anomaly detection."""
    frames = [d['frame'] for d in frame_data]
    y_positions = [d.get('lowest_y', None) for d in frame_data]
    motion = [d.get('motion_magnitude', None) for d in frame_data]

    fig, axes = plt.subplots(2, 1, figsize=(12, 8))
    axes[0].plot(frames, y_positions, 'b-', label='Y Position')
    axes[0].set_ylabel('Y Position (pixels)')
    axes[1].plot(frames, motion, 'r-', label='Motion')
    axes[1].set_ylabel('Motion Magnitude')
    plt.savefig(output_path)

# CORRECT: Finding highest point (lowest Y value)
peak_frame = min(detections, key=lambda d: d['lowest_y'])

# WRONG: Assuming higher Y = higher position
# peak_frame = max(detections, key=lambda d: d['lowest_y'])

# WRONG: Will fail with "Object of type int64 is not JSON serializable"
result = {'frame': detection['frame'], 'y': detection['y']}

# CORRECT: Explicit conversion
result = {'frame': int(detection['frame']), 'y': int(detection['y'])}

def frame_difference(frame1, frame2, threshold=25):
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    diff = cv2.absdiff(gray1, gray2)
    _, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
    return thresh

# MOG2 handles lighting changes better
bg_subtractor = cv2.createBackgroundSubtractorMOG2(
    history=500, varThreshold=16, detectShadows=True
)

# KNN for more stable backgrounds
bg_subtractor = cv2.createBackgroundSubtractorKNN(
    history=500, dist2Threshold=400.0, detectShadows=True
)

def compute_optical_flow(prev_gray, curr_gray):
    flow = cv2.calcOpticalFlowFarneback(
        prev_gray, curr_gray, None,
        pyr_scale=0.5, levels=3, winsize=15,
        iterations=3, poly_n=5, poly_sigma=1.2, flags=0
    )
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    return magnitude, angle

def detect_event(frame_data, event_type='takeoff'):
    """
    Detect event using multiple signals for robustness.

    Returns: frame_number, confidence_score, supporting_evidence
    """
    candidates = []

    for i, data in enumerate(frame_data):
        signals = {
            'y_derivative': compute_y_velocity(frame_data, i),
            'motion_spike': data['motion'] > motion_threshold,
            'position_threshold': data['y'] < y_threshold,
            'acceleration': compute_acceleration(frame_data, i)
        }

        # Require multiple confirming signals
        confidence = sum(signals.values()) / len(signals)
        if confidence > 0.6:
            candidates.append({
                'frame': data['frame'],
                'confidence': confidence,
                'signals': signals
            })

    # Return highest confidence candidate
    return max(candidates, key=lambda c: c['confidence'])

result = {
    'takeoff_frame': int(takeoff),
    'takeoff_confidence': 0.85,
    'takeoff_range': [93, 97],
    'landing_frame': int(landing),
    'landing_confidence': 0.92,
    'landing_range': [112, 116],
    'assumptions': [
        'First frame contains no subject',
        'Single subject in frame',
        'Camera is stationary'
    ],
    'debug_frames_exported': True
}

Video Processing

Overview

Core Workflow

Phase 1: Establish Ground Truth Before Implementation

Video Processing

Overview

Core Workflow

Phase 1: Establish Ground Truth Before Implementation

Phase 2: Build Visualization Infrastructure Early

Phase 3: Implement Detection with Parameterization

Phase 4: Handle Detection Gaps and Edge Cases

Verification Strategies

Strategy 1: Visual Verification Checkpoints

Strategy 2: Sanity Check Assertions

Strategy 3: Metric Continuity Analysis

Common Pitfalls

Pitfall 1: Coordinate System Confusion

Pitfall 2: Numpy Type Serialization

Pitfall 3: Bounding Box vs Actual Position

Pitfall 4: Single-Video Overfitting

Pitfall 5: Incomplete Heredoc/Script Writes

Motion Detection Approaches

Approach 1: Frame Differencing

Approach 2: Background Subtraction

Approach 3: Optical Flow

Event Detection Pattern

Output Requirements

Songsee

Video Frames

Gifgrep

Qqbot Media

Camsnap

Openai Whisper Api