Search code examples
pythonopencvffmpegwindows-10h.264

OpenCV output file empty


I want to display 12 videos in a format of 4:3 in one video. The below code runs without errors but the output file is always empty. I have tried following encoder formats:

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fourcc = cv2.VideoWriter_fourcc(*'avc1')

Note: the duration of the videos is not exactly identical for all.

I am on Windows10 with Pycharm, Python 3.9, opencv-python=4.6.066, ffmpeg is installed with PATH variable set, 264 library seems to load fine.

import cv2
import numpy as np
import math
from tqdm import tqdm

# Set the dimensions of the merged video
video_width = 1920
video_height = 1080

# Set the dimensions of each video
sub_video_width = 480
sub_video_height = 360

# Set the margin between each video
margin = 20

# Create a black background frame
bg_frame = np.zeros((video_height, video_width, 3), dtype=np.uint8)

file_name_list = ['CAM010000000_compressed.mp4', 'CAM020000000_compressed.mp4', 'CAM030000000_compressed.mp4', 'CAM040000000_compressed.mp4', 'CAM050000000_compressed.mp4', 'CAM060000000_compressed.mp4', 'CAM070000000_compressed.mp4', 'CAM080000000_compressed.mp4', 'CAM090000000_compressed.mp4', 'CAM100000000_compressed.mp4', 'CAM110000000_compressed.mp4', 'CAM120000000_compressed.mp4']

# Calculate the number of rows and columns
num_videos = len(file_name_list)
num_rows = 3
num_cols = 4
num_total = num_rows * num_cols

# Determine the size of each sub-clip
sub_video_x = sub_video_width + margin
sub_video_y = sub_video_height + margin

# Calculate the total video duration
total_duration = 0
for filename in file_name_list:
    cap = cv2.VideoCapture(filename)
    total_duration += cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS)
    cap.release()

# Initialize the final video
#fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#out = cv2.VideoWriter('merged_video.mp4', fourcc, 30, (video_width, video_height))

fourcc = cv2.VideoWriter_fourcc(*'avc1')
out = cv2.VideoWriter('merged_video.mp4', fourcc, 30, (video_width, video_height))

# Iterate over each row and column
for i in tqdm(range(num_rows), desc='Processing rows'):
    for j in tqdm(range(num_cols), desc='Processing columns'):
        # Calculate the index of the video to be inserted
        video_index = i * num_cols + j

        # If there is no video at this index, skip to the next one
        if video_index >= num_videos:
            continue

        # Load the sub-clip and resize it
        cap = cv2.VideoCapture(file_name_list[video_index])
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()
        clip = cv2.resize(frames[0], (sub_video_width, sub_video_height))
        for frame in frames[1:]:
            frame = cv2.resize(frame, (sub_video_width, sub_video_height))
            clip = np.concatenate((clip, frame), axis=1)

        # Calculate the position of the sub-clip
        x = sub_video_x * j + margin
        y = sub_video_y * i + margin

        # Add the filename to the sub-clip
        font = cv2.FONT_HERSHEY_SIMPLEX
        text = file_name_list[video_index]
        textsize = cv2.getTextSize(text, font, 1, 2)[0]
        text_x = x + sub_video_width - textsize[0] - margin
        text_y = y + sub_video_height - textsize[1] - margin
        cv2.putText(clip, text, (text_x, text_y), font, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Add the sub-clip to the final video
        out.write(clip) # write the sub-clip to the output video

# Release the final video
out.release()

Solution

  • The output video file is empty because clip.shape doesn't match the resolution of out.

    The resolution of out is defined in out = cv2.VideoWriter('merged_video.mp4', fourcc, 30, (video_width, video_height)) to be 1920x1080.
    The shape of clip when performing out.write(clip) must be (1080, 1920, 3).

    clip = np.concatenate((clip, frame), axis=1) concatenates the sub-frames along the horizontal axis, so the result frame is a lot wider than 1920 pixels.


    Instead of trying to put all the frames in a list, and then concatenating all the frames, we better open 12 cv2.VideoCapture objects.

    • Read one frame from each video file - one from CAM010000000_compressed.mp4, one from CAM020000000_compressed.mp4... and one from CAM120000000_compressed.mp4
    • Order the sub-frames side by side to form 4x3 clip mosaic frame.
    • Write the mosaic clip frame to the output video file.

    Assuming the 12 files may have different durations, the solution may be a bit more complicated - we have to use the previous frame in case one file ends while others are not.

    Note that the main loop can't iterate rows and columns - we have to iterate the frames of the output video file (we can't go back and update video frames that were already written).


    Updated code sample:

    import cv2
    import numpy as np
    #import math
    #from tqdm import tqdm
    
    # Set the dimensions of the merged video
    video_width = 1920
    video_height = 1080
    
    # Set the dimensions of each video
    sub_video_width = 480
    sub_video_height = 360
    
    # Set the margin between each video
    margin = 20
    
    # Create a black background frame
    clip = np.zeros((video_height, video_width, 3), dtype=np.uint8)
    
    file_name_list = ['CAM010000000_compressed.mp4', 'CAM020000000_compressed.mp4', 'CAM030000000_compressed.mp4', 'CAM040000000_compressed.mp4', 'CAM050000000_compressed.mp4', 'CAM060000000_compressed.mp4', 'CAM070000000_compressed.mp4', 'CAM080000000_compressed.mp4', 'CAM090000000_compressed.mp4', 'CAM100000000_compressed.mp4', 'CAM110000000_compressed.mp4', 'CAM120000000_compressed.mp4']
    
    # Calculate the number of rows and columns
    num_videos = len(file_name_list)
    num_rows = 3
    num_cols = 4
    num_total = num_rows * num_cols
    
    # Determine the size of each sub-clip
    sub_video_x = sub_video_width + margin
    sub_video_y = sub_video_height + margin
    
    # Build a list of 12 cv2.VideoCapture objects 
    cap_list = []
    for filename in file_name_list:
        cap = cv2.VideoCapture(filename)
        cap_list.append(cap)
    
    # Initialize the final video
    #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    #out = cv2.VideoWriter('merged_video.mp4', fourcc, 1, (video_width, video_height))
    
    fourcc = cv2.VideoWriter_fourcc(*'avc1')  # In Windows, requires openh264-1.8.0-win64.dll
    out = cv2.VideoWriter('merged_video.mp4', fourcc, 1, (video_width, video_height))  # Use 1 fps for testing
    
    
    # Iterate over each row and column
    sub_frames = [None]*num_total  # Initialize with Nones
    while True:
        any_ret = False
        for i in range(num_rows):
            for j in range(num_cols):
                # Calculate the index of the video to be inserted
                video_index = i * num_cols + j
    
                ret, frame = cap_list[video_index].read()  # Read frame from "cap object" that matches video_index
    
                if ret:
                    frame = cv2.resize(frame, (sub_video_width, sub_video_height))  # Resize the "sub-frame" to the desired resolution.
                    sub_frames[video_index] = frame  # Store frame in the list.
    
                any_ret |= ret  # Going to be False only when ret of from all videos is false. 
    
        if not any_ret:
            break  # Break the main loop when all video files are ended.
    
        for i in range(num_rows):
            for j in range(num_cols):
                # Calculate the index of the video to be inserted
                video_index = i * num_cols + j
            
                # Place the sub_frame in the correct location in "clip 4x3 mosaic frame"
                clip[i*sub_video_height:i*sub_video_height+sub_video_height, j*sub_video_width:j*sub_video_width+sub_video_width, :] = sub_frames[video_index]
    
                # Calculate the position of the sub-clip
                x = sub_video_width * j + margin//4  # sub_video_x * j + margin
                y = sub_video_height * i + margin  # sub_video_y * i + margin
    
                # Add the filename to the sub-clip
                font = cv2.FONT_HERSHEY_SIMPLEX
                text = file_name_list[video_index]
                textsize = cv2.getTextSize(text, font, 1, 2)[0]
                text_x = x #+ sub_video_width - textsize[0] - margin ???
                text_y = y + textsize[1] #+ sub_video_height - textsize[1] - margin ???
                cv2.putText(clip, text, (text_x, text_y), font, 1, (255, 255, 255), 2, cv2.LINE_AA)
    
        # Add the sub-clip to the final video
        out.write(clip)  # write the clip 4x3 mosaic frame to the output video
    
    # Release the final video
    out.release()
    
    for cap in cap_list:
        cap.release()
    

    For testing we may use FFmpeg CLI for building synthetic video files.
    In case the code sample is not working, use the synthetic video files - they makes the solution reproducible (not dependent on video files that only you can access).

    Creating 12 sample video files:

    ffmpeg -y -f lavfi -i testsrc=size=480x360:rate=1 -t 12 -vcodec libx264 CAM010000000_compressed.mp4
    ffmpeg -y -f lavfi -i mandelbrot=size=480x360:rate=1 -t 10 -vcodec libx264 CAM020000000_compressed.mp4
    ffmpeg -y -f lavfi -i color=blue:size=480x360:rate=1 -t 10 -vcodec libx264 CAM030000000_compressed.mp4
    ffmpeg -y -f lavfi -i color=black:size=480x360:rate=1 -t 11 -vcodec libx264 CAM040000000_compressed.mp4
    ffmpeg -y -f lavfi -i mandelbrot=size=480x360:rate=1 -t 9 -vcodec libx264 CAM050000000_compressed.mp4
    ffmpeg -y -f lavfi -i mandelbrot=size=480x360:rate=1 -t 8 -vcodec libx264 CAM060000000_compressed.mp4
    ffmpeg -y -f lavfi -i mandelbrot=size=480x360:rate=1 -t 10 -vcodec libx264 CAM070000000_compressed.mp4
    ffmpeg -y -f lavfi -i mandelbrot=size=480x360:rate=1 -t 12 -vcodec libx264 CAM080000000_compressed.mp4
    ffmpeg -y -f lavfi -i testsrc=size=480x360:rate=1 -t 10 -vcodec libx264 CAM090000000_compressed.mp4
    ffmpeg -y -f lavfi -i testsrc=size=480x360:rate=1 -t 10 -vcodec libx264 CAM100000000_compressed.mp4
    ffmpeg -y -f lavfi -i testsrc=size=480x360:rate=1 -t 10 -vcodec libx264 CAM110000000_compressed.mp4
    ffmpeg -y -f lavfi -i testsrc=size=480x360:rate=1 -t 10 -vcodec libx264 CAM120000000_compressed.mp4


    Sample output frame:
    enter image description here