Search code examples
c++cffmpegyuvsubsampling

How to convert ffmpeg video frame to YUV444?


I have been following a tutorial on how to use ffmpeg and SDL to make a simple video player with no audio (yet). While looking through the tutorial I realized it was out of date and many of the functions it used, for both ffmpeg and SDL, were deprecated. So I searched for an up-to-date solution and found a stackoverflow question answer that completed what the tutorial was missing.

However, it uses YUV420 which is of low quality. I want to implement YUV444 and after studying chroma-subsampling for a bit and looking at the different formats for YUV am confused as to how to implement it. From what I understand YUV420 is a quarter of the quality YUV444 is. YUV444 means every pixel has its own chroma sample and as such is more detailed while YUV420 means pixels are grouped together and have the same chroma sample and therefore is less detailed.

And from what I understand the different formats of YUV(420, 422, 444) are different in the way they order y, u, and v. All of this is a bit overwhelming because I haven't done much with codecs, conversions, etc. Any help would be much appreciated and if additional info is needed please let me know before downvoting.

Here is the code from the answer I mentioned concerning the conversion to YUV420:

texture = SDL_CreateTexture(
        renderer,
        SDL_PIXELFORMAT_YV12,
        SDL_TEXTUREACCESS_STREAMING,
        pCodecCtx->width,
        pCodecCtx->height
        );
    if (!texture) {
        fprintf(stderr, "SDL: could not create texture - exiting\n");
        exit(1);
    }

    // initialize SWS context for software scaling
    sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
        pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
        AV_PIX_FMT_YUV420P,
        SWS_BILINEAR,
        NULL,
        NULL,
        NULL);

    // set up YV12 pixel array (12 bits per pixel)
    yPlaneSz = pCodecCtx->width * pCodecCtx->height;
    uvPlaneSz = pCodecCtx->width * pCodecCtx->height / 4;
    yPlane = (Uint8*)malloc(yPlaneSz);
    uPlane = (Uint8*)malloc(uvPlaneSz);
    vPlane = (Uint8*)malloc(uvPlaneSz);
    if (!yPlane || !uPlane || !vPlane) {
        fprintf(stderr, "Could not allocate pixel buffers - exiting\n");
        exit(1);
    }

    uvPitch = pCodecCtx->width / 2;
    while (av_read_frame(pFormatCtx, &packet) >= 0) {
        // Is this a packet from the video stream?
        if (packet.stream_index == videoStream) {
            // Decode video frame
            avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

            // Did we get a video frame?
            if (frameFinished) {
                AVPicture pict;
                pict.data[0] = yPlane;
                pict.data[1] = uPlane;
                pict.data[2] = vPlane;
                pict.linesize[0] = pCodecCtx->width;
                pict.linesize[1] = uvPitch;
                pict.linesize[2] = uvPitch;

                // Convert the image into YUV format that SDL uses
                sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
                    pFrame->linesize, 0, pCodecCtx->height, pict.data,
                    pict.linesize);

                SDL_UpdateYUVTexture(
                    texture,
                    NULL,
                    yPlane,
                    pCodecCtx->width,
                    uPlane,
                    uvPitch,
                    vPlane,
                    uvPitch
                    );

                SDL_RenderClear(renderer);
                SDL_RenderCopy(renderer, texture, NULL, NULL);
                SDL_RenderPresent(renderer);

            }
        }

        // Free the packet that was allocated by av_read_frame
        av_free_packet(&packet);
        SDL_PollEvent(&event);
        switch (event.type) {
            case SDL_QUIT:
                SDL_DestroyTexture(texture);
                SDL_DestroyRenderer(renderer);
                SDL_DestroyWindow(screen);
                SDL_Quit();
                exit(0);
                break;
            default:
                break;
        }

    }

    // Free the YUV frame
    av_frame_free(&pFrame);
    free(yPlane);
    free(uPlane);
    free(vPlane);

    // Close the codec
    avcodec_close(pCodecCtx);
    avcodec_close(pCodecCtxOrig);

    // Close the video file
    avformat_close_input(&pFormatCtx);

EDIT:

After more research I learned that in YUV420 is stored with all Y's first then a combination of U and V bytes one after another as illustrated by this image:
(source: wikimedia.org)

However I also learned that YUV444 is stored in the order U, Y, V and repeats like this picture shows:

I tried changing some things around in code:

    // I changed SDL_PIXELFORMAT_YV12 to SDL_PIXELFORMAT_UYVY
    // as to reflect the order of YUV444
    texture = SDL_CreateTexture(
        renderer,
        SDL_PIXELFORMAT_UYVY,
        SDL_TEXTUREACCESS_STREAMING,
        pCodecCtx->width,
        pCodecCtx->height
        );
    if (!texture) {
        fprintf(stderr, "SDL: could not create texture - exiting\n");
        exit(1);
    }

    // Changed AV_PIX_FMT_YUV420P to AV_PIX_FMT_YUV444P
    // for rather obvious reasons
    sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
        pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
        AV_PIX_FMT_YUV444P,
        SWS_BILINEAR,
        NULL,
        NULL,
        NULL);

    // There are as many Y, U and V bytes as pixels I just
    // made yPlaneSz and uvPlaneSz equal to the number of pixels
    yPlaneSz = pCodecCtx->width * pCodecCtx->height;
    uvPlaneSz = pCodecCtx->width * pCodecCtx->height;
    yPlane = (Uint8*)malloc(yPlaneSz);
    uPlane = (Uint8*)malloc(uvPlaneSz);
    vPlane = (Uint8*)malloc(uvPlaneSz);
    if (!yPlane || !uPlane || !vPlane) {
        fprintf(stderr, "Could not allocate pixel buffers - exiting\n");
        exit(1);
    }

    uvPitch = pCodecCtx->width * 2;
    while (av_read_frame(pFormatCtx, &packet) >= 0) {
        // Is this a packet from the video stream?
        if (packet.stream_index == videoStream) {
            // Decode video frame
            avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

            // Rearranged the order of the planes to reflect UYV order
            // then set linesize to the number of Y, U and V bytes
            // per row
            if (frameFinished) {
                AVPicture pict;
                pict.data[0] = uPlane;
                pict.data[1] = yPlane;
                pict.data[2] = vPlane;
                pict.linesize[0] = pCodecCtx->width;
                pict.linesize[1] = pCodecCtx->width;
                pict.linesize[2] = pCodecCtx->width;

                // Convert the image into YUV format that SDL uses
                sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
                    pFrame->linesize, 0, pCodecCtx->height, pict.data,
                    pict.linesize);

                SDL_UpdateYUVTexture(
                    texture,
                    NULL,
                    yPlane,
                    1,
                    uPlane,
                    uvPitch,
                    vPlane,
                    uvPitch
                    );
//.................................................

But now I get an access violation at the call to SDL_UpdateYUVTexture... I'm honestly not sure what's wrong. I think it may have to do with setting AVPicture pic's member data and linesize improperly but I'm not positive.


Solution

  • After many hours of scouring the web for possible answers I stumbled upon this post in which someone was asking about YUV444 support for packed or planar mode. The only current format I've found is AYUV which is packed.

    The answer they got was a list of all the currently supported formats which did not include AYUV. Therefore SDL does not support YUV444.

    The only solution is to use a different library that supports AYUV / YUV444.