Search code examples
pythonrgbdepth

Saving RGBD as single image


i used this code https://www.programmersought.com/article/8773686326/ to create RGBD by integrating RGB and depth image now i wonder if that RGBD file could be saved as single image (jpeg,png...) i tried it, but unsuccessfully, by using imageio.imwrite(), plt.imsave(), cv2.imwrite()... likely due dimension [4,64,1216], so is there a way to make it happen?

scale = (64, 1216)
 
resize_img = transforms.Resize(scale, Image.BILINEAR)
resize_depth = transforms.Resize(scale, Image.NEAREST)
to_tensor = transforms.ToTensor()
 
img_id = 0
 
# load image and resize
img = Image.open('RGB_image.jpg')
img = resize_img(img)
img = np.array(img)
 
# load depth and resize
depth = Image.open('depth_image.png')
depth = resize_depth(depth)
depth = np.array(depth)
depth = depth[:, :, np.newaxis]
 
# tensor shape and value, normalization
img = Image.fromarray(img).convert('RGB')
img = to_tensor(img).float()
 
depth = depth / 65535
depth = to_tensor(depth).float()

rgbd = torch.cat((img, depth), 0)
print("\n\nRGBD shape")
print(rgbd.shape)

Solution

  • We may save the depth as an alpha channel of an image in RGBA pixel format.

    The alpha channel applies transparency channel, but we may use it as 4'th channel for storing RGB and Depth.

    Since the depth may require high precision - may require float32 precision, I suggest using OpenEXR image format.
    For compatibility with OpenEXR format we may convert all channels to float32 in range [0, 1].

    Note:

    • I realized that Open3D supports RGBD images, but it looks like it doesn't support reading and writing the RGB and depth to a single file.

    The following code sample uses OpenCV instead of Pillow.
    I thought OpenCV supports EXR file format, but my OpenCV Python version is not built with EXR support. I used ImageIO package instead.


    Stages for converting and writing RGB and depth to an EXR file:

    • Load RGB image, resize it and convert to float:

       img = cv2.imread('RGB_image.jpg')  # The channels order is BGR due to OpenCV conventions.
       img = cv2.resize(img, scale, interpolation=cv2.INTER_LINEAR)
       img = img.astype(np.float32) / 255  # Convert to float in range [0, 1]
      
    • Load depth image, resize and convert to float:

       depth = cv2.imread('depth_image.png', cv2.IMREAD_UNCHANGED)  # Assume depth_image.png is 16 bits grayscale.
       depth = cv2.resize(depth, scale, interpolation=cv2.INTER_NEAREST)
       depth = depth.astype(np.float32) / 65535  # Convert to float in range [0, 1]
      
    • Merge img (3 channels) and depth (1 channel) to 4 channels:
      The shape is going to be (1216, 64, 4) (applies OpenCV BGRA color convention).

       bgrd = np.dstack((img, depth))
      
    • Writing bgrd to EXR file:
      If OpenCV is build with OpenEXR, we may use: cv2.imwrite('rgbd.exr', bgrd).
      If we use ImageIO, we better to convert from BGRA to RGBA before saving:

       rgbd = cv2.cvtColor(bgrd, cv2.COLOR_BGRA2RGBA)
       imageio.imwrite('rgbd.exr', rgbd)
      

    Code sample (convert RGB and Range to RGBA EXR file, then read and convert back):

    import numpy as np
    import cv2
    import imageio
    
    scale = (64, 1216)
     
    # load image and resize
    img = cv2.imread('RGB_image.jpg')  # The channels order is BGR due to OpenCV conventions.
    img = cv2.resize(img, scale, interpolation=cv2.INTER_LINEAR)
    img = img.astype(np.float32) / 255  # Convert to float in range [0, 1]
     
    # load depth and resize
    depth = cv2.imread('depth_image.png', cv2.IMREAD_UNCHANGED)  # Assume depth_image.png is 16 bits grayscale.
    depth = cv2.resize(depth, scale, interpolation=cv2.INTER_NEAREST)
    
    if depth.ndim == 3:
        depth = depth[:, :, 0]  # Keep one channel if depth has 3 channels?  depth = depth[:, :, np.newaxis]
     
    depth = depth.astype(np.float32) / 65535  # Convert to float in range [0, 1]
    
    # Use the depth channel as alpha channel (the channel order is BGRA - applies OpenCV conventions).
    bgrd = np.dstack((img, depth))
    
    print("\n\nRGBD shape")
    print(bgrd.shape)
    
    # Save the data to exr file (the color format of the exr file is RGBA).
    # Error: cv::initOpenEXR imgcodecs: OpenEXR codec is disabled.
    #cv2.imwrite('rgbd.exr', bgrd)
    
    # https://stackoverflow.com/questions/45482307/save-float-array-to-image-with-exr-format
    rgbd = cv2.cvtColor(bgrd, cv2.COLOR_BGRA2RGBA)
    imageio.imwrite('rgbd.exr', rgbd)
    
    ################################################################################
    # Reading the data:  
    
    #bgrd = cv2.imread('rgbd.exr')  # Error: cv::initOpenEXR imgcodecs: OpenEXR codec is disabled.
    rgbd = imageio.imread('rgbd.exr')
    
    img = bgrd[:, :, 0:3]  # First 3 channels are the image.
    depth = bgrd[:, :, 3]  # Last channel is the depth
    
    img = (img*255).astype(np.uint8)  # Convert back to uint8
    #depth = (depth*65535).astype(np.uint16)  # Convert back to uint16 (if required).
    
    # Show images for testing:
    cv2.imshow('img', cv2.cvtColor(img, cv2.COLOR_RGBA2RGB))
    cv2.imshow('depth', depth)
    cv2.waitKey()
    cv2.destroyAllWindows()
    

    Note:

    • You may have to make few modifications - I was not sure about the dimensions (64x1216 or 1216x64), and not sure about the code depth = depth[:, :, np.newaxis].
      I may be wrong about the format of depth_image.png.

    Update:

    Saving 16 bits RGBA to PNG file:

    Instead of using EXR file and float32 pixel format...
    We may use PNG file and uint16 pixel format.

    The pixel format of the PNG file is going to be RGBA (RGB and Alpha - transparency channel).
    Each color channel is going to be 16 bits (2 bytes).
    The alpha channel stores the depth map (in uint16 format).

    • Convert img to uint16 (we may choose not to scale by 256):

       img = img.astype(np.uint16)*256
      
    • Merge img (3 channels) and depth (1 channel) to 4 channels:

       bgrd = np.dstack((img, depth))
      
    • Save the merged image to PNG file:

       cv2.imwrite('rgbd.png', bgrd)
      

    Code sample (the second part reads and display for testing):

    import numpy as np
    import cv2
    
    scale = (64, 1216)
    
    # load image and resize
    img = cv2.imread('RGB_image.jpg')  # The channels order is BGR due to OpenCV conventions.
    img = cv2.resize(img, scale, interpolation=cv2.INTER_LINEAR)
    
    # Convert the image to from 8 bits per color channel to 16 bits per color channel
    # Notes:
    # 1. We may choose not to scale by 256, the scaling is used only for viewers that expects [0, 65535] range.
    # 2. Consider that most image viewers refers the alpha (transparency) channel, so image is going to look strange.
    img = img.astype(np.uint16)*256
    
    # load depth and resize
    depth = cv2.imread('depth_image.png', cv2.IMREAD_UNCHANGED)  # Assume depth_image.png is 16 bits grayscale.
    depth = cv2.resize(depth, scale, interpolation=cv2.INTER_NEAREST)
    
    if depth.ndim == 3:
        depth = depth[:, :, 0]  # Keep one channel if depth has 3 channels?  depth = depth[:, :, np.newaxis]
    
    if depth.dtype != np.uint16:
        depth = depth.astype(np.uint16)  # The depth supposed to be uint16, so code should not reach here.
    
    # Use the depth channel as alpha channel (the channel order is BGRA - applies OpenCV conventions).
    bgrd = np.dstack((img, depth))
    
    print("\n\nRGBD shape")
    print(bgrd.shape)  # (1216, 64, 4)
    
    # Save the data to PNG file (the pixel format of the PNG file is 16 bits RGBA).
    cv2.imwrite('rgbd.png', bgrd)
    
    
    # Testing:
    ################################################################################
    # Reading the data:
    bgrd = cv2.imread('rgbd.png', cv2.IMREAD_UNCHANGED)
    
    img = bgrd[:, :, 0:3]  # First 3 channels are the image.
    depth = bgrd[:, :, 3]  # Last channel is the depth
    
    #img = (img // 256).astype(np.uint8)  # Convert back to uint8
    
    # Show images for testing:
    cv2.imshow('img', img)
    cv2.imshow('depth', depth)
    cv2.waitKey()
    cv2.destroyAllWindows()