Search code examples
ccamerayuvv4l2

Converting uyvy data to rgb


I've been trying to capture images with an elgato facecam using v4l and c. Due to the lack of examples-tutorials, i've used code produced by AI chatbots as well as modified code from the few kind-of-relevant examples i could locate, and then tried to familiarize myself with the code and make it functional. There are a few problems now but the main one, which i'm currently trying to tackle, is that the captured photos are in green and magenda.

I've tried using different yuv to rgb formulas, value ranges and pixel formats (i believe UYVY is the correct one). I managed to get different colour pairs, and in one occasion 3 colours but never all 4. (by "colours" i refer to the quadrants of the yuv plane) I also tried solutions proposed in other SO questions but with no luck. Here's my code:

#include <sys/mman.h>
#include <linux/videodev2.h>
#include <jpeglib.h>

#define VIDEO_DEVICE "/dev/video2"
#define CAPTURE_WIDTH 1920
#define CAPTURE_HEIGHT 1080
#define BUFFER_SIZE (CAPTURE_WIDTH * CAPTURE_HEIGHT * 2)

// Function to convert YUV to RGB
void yuv2rgb(unsigned char *yuv, unsigned char *rgb, int width, int height) {
    int i, j;
    int y, u, v;
    int r, g, b;
    int index_yuv, index_rgb;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j += 2) {
            index_yuv = (i * width + j) * 2;
            index_rgb = (i * width + j) * 3;

            y = yuv[index_yuv];
            u = yuv[index_yuv + 1];
            v = yuv[index_yuv + 3];

            // Adjust U and V range to -128 to 127
            u -= 128;
            v -= 128;

            // YUV to RGB conversion
            r = y + 1.402 * v;
            g = y - 0.344136 * u - 0.714136 * v;
            b = y + 1.772 * u;

            r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
            g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
            b = (b < 0) ? 0 : ((b > 255) ? 255 : b);

            rgb[index_rgb] = r;
            rgb[index_rgb + 1] = g;
            rgb[index_rgb + 2] = b;

            y = yuv[index_yuv + 2];

            // YUV to RGB conversion
            r = y + 1.402 * v;
            g = y - 0.344136 * u - 0.714136 * v;
            b = y + 1.772 * u;


            r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
            g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
            b = (b < 0) ? 0 : ((b > 255) ? 255 : b);

            rgb[index_rgb + 3] = r;
            rgb[index_rgb + 4] = g;
            rgb[index_rgb + 5] = b;
        }
    }
}



int main() {
    int videoFd;
    struct v4l2_capability cap;
    struct v4l2_format format;
    struct v4l2_requestbuffers reqbuf;
    struct v4l2_buffer buf;

    // Open the video device
    videoFd = open(VIDEO_DEVICE, O_RDWR);
    if (videoFd < 0) {
        perror("Failed to open video device");
        return 1;
    }

    // Check device capabilities
    if (ioctl(videoFd, VIDIOC_QUERYCAP, &cap) < 0) {
        perror("Failed to query device capabilities");
        close(videoFd);
        return 1;
    }

    if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
        fprintf(stderr, "Video capture not supported\n");
        close(videoFd);
        return 1;
    }

    // Set the desired capture format
    memset(&format, 0, sizeof(format));
    format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    format.fmt.pix.width = CAPTURE_WIDTH;
    format.fmt.pix.height = CAPTURE_HEIGHT;
    format.fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY; // YUV 4:2:2 format
    if (ioctl(videoFd, VIDIOC_S_FMT, &format) < 0) {
        perror("Failed to set video format");
        close(videoFd);
        return 1;
    }

    // Request a single buffer for capture
    memset(&reqbuf, 0, sizeof(reqbuf));
    reqbuf.count = 1;
    reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    reqbuf.memory = V4L2_MEMORY_MMAP;
    if (ioctl(videoFd, VIDIOC_REQBUFS, &reqbuf) < 0) {
        perror("Failed to request buffer for capture");
        close(videoFd);
        return 1;
    }

    // Map the buffer for user-space access
    memset(&buf, 0, sizeof(buf));
    buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    buf.memory = V4L2_MEMORY_MMAP;
    buf.index = 0;
    if (ioctl(videoFd, VIDIOC_QUERYBUF, &buf) < 0) {
        perror("Failed to query buffer");
        close(videoFd);
        return 1;
    }

    void *buffer = mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, videoFd, buf.m.offset);
    if (buffer == MAP_FAILED) {
        perror("Failed to map buffer");
        close(videoFd);
        return 1;
    }

    // Queue the buffer for capture
    if (ioctl(videoFd, VIDIOC_QBUF, &buf) < 0) {
        perror("Failed to queue buffer");
        munmap(buffer, buf.length);
        close(videoFd);
        return 1;
    }

    // Start capturing frames
    enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
    if (ioctl(videoFd, VIDIOC_STREAMON, &type) < 0) {
        perror("Failed to start capturing");
        munmap(buffer, buf.length);
        close(videoFd);
        return 1;
    }

    // Wait for the frame to be captured
    if (ioctl(videoFd, VIDIOC_DQBUF, &buf) < 0) {
        perror("Failed to dequeue buffer");
        munmap(buffer, buf.length);
        close(videoFd);
        return 1;
    }

    // Convert the captured frame from YUV to RGB
    unsigned char *rgbBuffer = (unsigned char *)malloc(CAPTURE_WIDTH * CAPTURE_HEIGHT * 3);
    yuv2rgb((unsigned char *)buffer, rgbBuffer, CAPTURE_WIDTH, CAPTURE_HEIGHT);

    // Save the RGB frame as a JPEG image
    FILE *file = fopen("captured_frame.jpg", "wb");
    if (file == NULL) {
        perror("Failed to open output file");
        munmap(buffer, buf.length);
        free(rgbBuffer);
        close(videoFd);
        return 1;
    }

    // Use libjpeg to write the RGB data as a JPEG image
    struct jpeg_compress_struct cinfo;
    struct jpeg_error_mgr jerr;

    cinfo.err = jpeg_std_error(&jerr);
    jpeg_create_compress(&cinfo);
    jpeg_stdio_dest(&cinfo, file);

    cinfo.image_width = CAPTURE_WIDTH;
    cinfo.image_height = CAPTURE_HEIGHT;
    cinfo.input_components = 3;
    cinfo.in_color_space = JCS_RGB;

    jpeg_set_defaults(&cinfo);
    jpeg_set_quality(&cinfo, 80, TRUE);
    jpeg_start_compress(&cinfo, TRUE);

    JSAMPROW row_pointer[1];
    while (cinfo.next_scanline < cinfo.image_height) {
        row_pointer[0] = &rgbBuffer[cinfo.next_scanline * cinfo.image_width * cinfo.input_components];
        jpeg_write_scanlines(&cinfo, row_pointer, 1);
    }

    jpeg_finish_compress(&cinfo);
    fclose(file);
    jpeg_destroy_compress(&cinfo);

    // Stop capturing frames
    if (ioctl(videoFd, VIDIOC_STREAMOFF, &type) < 0) {
        perror("Failed to stop capturing");
        munmap(buffer, buf.length);
        free(rgbBuffer);
        close(videoFd);
        return 1;
    }

    // Clean up resources
    munmap(buffer, buf.length);
    free(rgbBuffer);
    close(videoFd);

    printf("Frame captured and saved successfully.\n");

    return 0;
}

Alternative yuv to rgb function (which produces similar, maybe slightly worse results):

void yuv2rgb(unsigned char *uyvy, unsigned char *rgb, int width, int height) {
    int i, j;
    int y0, u, y1, v;
    int r, g, b;
    int index_uyvy, index_rgb;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j += 2) {
            index_uyvy = (i * width + j) * 2;
            index_rgb = (i * width + j) * 3;

            y0 = uyvy[index_uyvy];
            u = uyvy[index_uyvy + 1];
            y1 = uyvy[index_uyvy + 2];
            v = uyvy[index_uyvy + 3];

            // Adjust U and V range to -128 to 127
            u -= 128;
            v -= 128;

            // UYVY to RGB conversion
            r = y0 + 1.402 * v;
            g = y0 - 0.344136 * u - 0.714136 * v;
            b = y0 + 1.772 * u;

            r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
            g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
            b = (b < 0) ? 0 : ((b > 255) ? 255 : b);

            rgb[index_rgb] = r;
            rgb[index_rgb + 1] = g;
            rgb[index_rgb + 2] = b;

            // UYVY to RGB conversion
            r = y1 + 1.402 * v;
            g = y1 - 0.344136 * u - 0.714136 * v;
            b = y1 + 1.772 * u;

            r = (r < 0) ? 0 : ((r > 255) ? 255 : r);
            g = (g < 0) ? 0 : ((g > 255) ? 255 : g);
            b = (b < 0) ? 0 : ((b > 255) ? 255 : b);

            rgb[index_rgb + 3] = r;
            rgb[index_rgb + 4] = g;
            rgb[index_rgb + 5] = b;
        }
    }
}

Solution

  • According to V4L - Packed YUV formats:

    Similarly to the packed RGB formats, the packed YUV formats store the Y, Cb and Cr components consecutively in memory. They may apply subsampling to the chroma components and thus differ in how they interlave the three components.

    Note

    • In all the tables that follow, bit 7 is the most significant bit in a byte.
    • 'Y', 'Cb' and 'Cr' denote bits of the luma, blue chroma (also known as 'U') and red chroma (also known as 'V') components respectively. 'A' denotes bits of the alpha component (if supported by the format), and 'X' denotes padding bits.
    Identifier Code Byte0 Byte1 Byte2 Byte3 Byte4 Byte5 Byte6 Byte7
    V4L2_PIX_FMT_UYVY 'UYVY' Cb0 Y'0 Cr0 Y'1 Cb2 Y'2 Cr2 Y'3

    These formats, commonly referred to as YUYV or YUY2, subsample the chroma components horizontally by 2, storing 2 pixels in a container. The container is 32-bits for 8-bit formats, and 64-bits for 10+-bit formats.

    The packed YUYV formats with more than 8 bits per component are stored as four 16-bit little-endian words. Each word's most significant bits contain one component, and the least significant bits are zero padding.

    Check your conversion code, i think you're not reading the correct component sequence.