Search code examples
opencvcompressionvideo-compressionlossy-compression

The overall size of three images stitched together is greater than the sum of the individual images, why?


The following code takes input from three USB cameras and combines them into one wide image which is stored as one .AVI file.

szPlacename = 'Station A'
iCaptureDuration = 600
iFramesPerSec = 3
szFramesPerSec = str(iFramesPerSec)

# Create VideoCapture objects for all three cameras
cap0 = cv2.VideoCapture(0)
cap1 = cv2.VideoCapture(1)
cap2 = cv2.VideoCapture(2)

# Set the pause duration during application execution (in seconds)
iSleepDuration = 0

# Set the frame width and height
iFrameWidth = 1024
iFrameHeight = 768
szFrameWidth = str(iFrameWidth)
szFrameHeight = str(iFrameHeight)

# Define a smaller size for displaying on the screen
iDisplayWidth = 400
iDisplayHeight = 300

# Get the computer name
szComputerName = socket.gethostname()

# Calculate percentage remaining hard drive space
iTotal, iUsed, iFree = shutil.disk_usage("/")
iPercent = 100 * iUsed / iTotal
iPercent = round(iPercent, 1)
szPercent = str(iPercent) + "%"
# Set up font for text display on frames
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
# Check if cameras opened successfully
if not all([cap0.isOpened(), cap1.isOpened(), cap2.isOpened()]):
     print("Unable to read camera feed")
# Create directory structure for saving videos
output_path = pathlib.Path('C:\\LZ\\') / f"{szPlacename}-{datetime.datetime.now().strftime('%Y%m%d')}"
output_path.mkdir(parents=True, exist_ok=True)
# Set up VideoWriter object for recording
out = cv2.VideoWriter(
    str(output_path / f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}_{szPlacename}_StarLink.avi"),
    cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
    iFramesPerSec, (iFrameWidth * 3, iFrameHeight)
)
# Capture video for the specified duration
iStartTime = time.time()
while int(time.time() - iStartTime) < iCaptureDuration:
    ret0, frame0 = cap0.read()
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()

    if not all([ret0, ret1, ret2]):
        break

    szDateTime = str(datetime.datetime.now())
    szDateTime = szDateTime[0:22]

    # Resize frames for display
    display_frame0 = imutils.resize(frame0, width=iDisplayWidth)
    display_frame1 = imutils.resize(frame1, width=iDisplayWidth)
    display_frame2 = imutils.resize(frame2, width=iDisplayWidth)

    # Resize frames for storage
    # Descriptive text only in frame 0
    frame0 = imutils.resize(frame0, width=iFrameWidth)
    frame0 = cv2.putText(frame0, f"Laptop: " + szComputerName + ", HD used= " + szPercent + ", FPS= " + szFramesPerSec + ", Size(px)= " + szFrameWidth + "x" + szFrameHeight, (10, 30), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)# black
    frame0 = cv2.putText(frame0, f"Location: " + szPlacename + ", Time: " + szDateTime,(10, 60), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)# black
    frame0 = cv2.putText(frame0, f"Laptop: " + szComputerName + ", HD used= " + szPercent + ", FPS= " + szFramesPerSec + ", Size(px)= " + szFrameWidth + "x" + szFrameHeight, (10, 90), font, 0.5, (0, 255, 255), 1, cv2.LINE_8)# yellow
    frame0 = cv2.putText(frame0, f"Location: " + szPlacename + ", Time: " + szDateTime,(10, 120), font, 0.5, (0, 255, 255), 1, cv2.LINE_8)# yellow
    
    frame1 = imutils.resize(frame1, width=iFrameWidth)
    frame2 = imutils.resize(frame2, width=iFrameWidth)

# Add text to display frames
    display_frame0 = cv2.putText(display_frame0, f"Laptop: " + szComputerName + ", HD used= " + szPercent + ", FPS= " + szFramesPerSec + ", Size(px)= " + szFrameWidth + "x" + szFrameHeight, (10, 30), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)# black
    display_frame0 = cv2.putText(display_frame0, f"Location: " + szPlacename + ", Time: " + szDateTime,(10, 60), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)# black
    display_frame0 = cv2.putText(display_frame0, f"Laptop: " + szComputerName + ", HD used= " + szPercent + ", FPS= " + szFramesPerSec + ", Size(px)= " + szFrameWidth + "x" + szFrameHeight, (10, 90), font, 0.5, (0, 255, 255), 1, cv2.LINE_8)# yellow
    display_frame0 = cv2.putText(display_frame0, f"Location: " + szPlacename + ", Time: " + szDateTime,(10, 120), font, 0.5, (0, 255, 255), 1, cv2.LINE_8)# yellow
    display_frame1 = cv2.putText(display_frame1, f"Laptop: {szComputerName}", (5, 15), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)
    display_frame2 = cv2.putText(display_frame2, f"Laptop: {szComputerName}", (5, 15), font, 0.5, (0, 0, 0), 1, cv2.LINE_8)

    # Combine frames horizontally
    combined_frame = np.hstack([display_frame0, display_frame1, display_frame2])

    # Display frames
    cv2.imshow('Three Cameras Side by Side (Display)', combined_frame)

    # Write original resolution frames to the video
    out.write(np.hstack([frame0, frame1, frame2]))

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap0.release()
cap1.release()
cap2.release()
out.release()
cv2.destroyAllWindows()

Curiously, when AVI files are made using a single USB camera, the file size is approximately 180MB. However, when the three usb cameras are combined the created file is much larger than anticipated, often over 1.5GB. Why is this? Thanks in advance.


Solution

  • This resolved the issue. Combined images now equal the sum of individual images.

    import cv2
    import numpy as np
    import time
    import datetime
    import pathlib
    import imutils
    import socket
    
    szPlacename = 'Station A'
    iCaptureDuration = 30
    iFramesPerSec = 3
    iFrameWidth, iFrameHeight = 1024, 768
    iDisplayWidth, iDisplayHeight = 1200, 300
    
    cap0, cap1, cap2 = cv2.VideoCapture(0), cv2.VideoCapture(1),    cv2.VideoCapture(2)
    
    out_combined = cv2.VideoWriter(f'C:\\LZ\\{szPlacename}-  {datetime.datetime.now():%Y%m%d}/'
                              f'{datetime.datetime.now():%Y%m%d%H%M%S} {szPlacename}_Combined.avi',
                              cv2.VideoWriter_fourcc('m', 'j', 'p', 'g'), iFramesPerSec, (iFrameWidth * 3, iFrameHeight))
    
    iPrev = 0
    iStartTime = time.time()
    while int(time.time() - iStartTime) < iCaptureDuration:
    iTimeElapsed = time.time() - iPrev
    while iTimeElapsed > 1. / iFramesPerSec:
        ret0, frame0 = cap0.read()
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()
        if not all([ret0, ret1, ret2]):
            break
        if iTimeElapsed > 1. / iFramesPerSec:
            iPrev = time.time()
            szDateTime = str(datetime.datetime.now())[0:22]
        if ret0:
            frames = [imutils.resize(frame, width=iFrameWidth) for frame in [frame0, frame1, frame2]]
            combined_frame = np.hstack(frames)
            resized_combined_frame = cv2.resize(combined_frame, (iDisplayWidth, iDisplayHeight))
            cv2.imshow('Three Cameras Side by Side (Display)', resized_combined_frame)
            out_combined.write(np.hstack(frames))
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        else:
            break
    
    # Release resources
    cap0.release()
    cap1.release()
    cap2.release()
    out_combined.release()
    cv2.destroyAllWindows()