Search code examples
c++cffmpegdecodeh.264

ffmpeg decoding through C-API leads to artefacts when input resolution is 1200x1600. Am I doing something wrong?


Using the C-API and FFmpeg 5.1 I have been able to encode h264 videos with libx264 on Android. Now I wanted to replay them on Linux inside my C++ application. These videos can be played correctly on a browser, or on other players that I tried like mplayer or ffplay from ffmpeg. Also, I can unroll the frames with ffmpeg -i recording.mp4 -start_number 0 -qscale:v 5 %06d.jpg and the images look alright.

However in my C++ application every now and then, but in a very repeatable way, I get artifacts (like the bright pixels showing up above the monitor). They do not accumulate, even though they are not related to keyframes. So whatever error is going on, it doesn't seem to have an impact on subsequent frames. I use OpenCV to visualize the output, and I am pretty sure the problem is not the conversion to BGR because the artifact is already there if I simply show the y channel (luminance, grayscale).

These artifacts show up in videos that I have recorded with a 1200x1600 resolution. It is to be noted that 1200 is not divisible by 32 so ffmpeg does add some padding, but I am dealing with it and it's not an issue. Videos recorded at 1920x1440 are replayed with no artifacts. Two sample videos can be found here for download.

Here follows the code I am using, on the bottom you can see a picture of my decoded image with the artifact and the same as unrolled by ffmpeg command line. It should be noted that I am working with a custom built version of ffmpeg, out of conan packages, while the unrolling is done with ffmpeg from command line that comes with Ubuntu.

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
}

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <iostream>
int main(int argc, char** argv) {

    int ret;

    auto pkt = av_packet_alloc();
    if (!pkt) {
        std::cerr << "Failed av_packet_alloc()" << std::endl;
        exit(1);
    }

    AVFormatContext* av_format = avformat_alloc_context();
    ret = avformat_open_input(&av_format, FILE_NAME, nullptr, nullptr);
    if (ret < 0) {
        std::cerr << "Failed avformat_open_input, Error: " << ret << std::endl;
        ///Error codes https://stackoverflow.com/questions/12780931/ffmpeg-exit-status-1094995529
        exit(1);
    }
    av_dump_format(av_format, 0, FILE_NAME, 0);
    auto video_st_number = av_find_best_stream(av_format, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
    if (video_st_number < 0) {
        std::cerr << "av_find_best_stream couldn't find video stream" << std::endl;
        exit(1);
    }
    auto video_st = av_format->streams[video_st_number];
    auto codec_id = video_st->codecpar->codec_id;
    std::cout << "Duration " << video_st->duration << std::endl;
    std::cout << "n_frames " << video_st->nb_frames << std::endl;

    auto frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }

    auto codec = avcodec_find_decoder(codec_id);
    if (!codec) {
        fprintf(stderr, "Codec not found\n");
        exit(1);
    }

    auto c = avcodec_alloc_context3(codec);
    if (!c) {
        fprintf(stderr, "Could not allocate video codec context\n");
        exit(1);
    }


    if ((ret = avcodec_parameters_to_context(c, video_st->codecpar))) {
        fprintf(stderr, "Failed avcodec_parameters_to_context\n");
        exit(1);
    }

    c->pix_fmt = AV_PIX_FMT_YUV420P;///Not really necessary
    c->thread_count = 1;///No impact

    /* open it */
    if (avcodec_open2(c, codec, NULL) < 0) {
        fprintf(stderr, "Could not open codec\n ");
        exit(1);
    }

    std::size_t counter = 0;
    std::size_t n_keyframes = 0;

    while (ret >= 0) {
        ret = av_read_frame(av_format, pkt);
        if (pkt->size == 0) {
            std::cout << "Skipping packet of size zero" << std::endl;
            av_packet_unref(pkt);
            continue;
        }
        while (avcodec_send_packet(c, pkt) != 0) {
            if (avcodec_receive_frame(c, frame) != 0) {
                std::cerr << "Error receiving frame" << std::endl;
                exit(1);
            } else {
                n_keyframes += frame->key_frame;
                std::cout << "Decoded " << ++counter << " frames. Frame No. " << frame->pts / pkt->duration << " "
                          << frame->decode_error_flags << " " << frame->key_frame << " " << n_keyframes << " "
                          << frame->pkt_dts << std::endl;
            }
            display(frame);
        }
        av_packet_unref(pkt);
    }

    avcodec_send_packet(c, nullptr);
    std::cout << "Flushing decoder" << std::endl;

    while (avcodec_receive_frame(c, frame) == 0) {
        n_keyframes += frame->key_frame;
        std::cout << "Decoded " << ++counter << " frames. Frame No. " << frame->pts << " " << frame->decode_error_flags
                  << " " << frame->key_frame << " " << n_keyframes << " " << frame->pkt_dts << std::endl;

        display(frame);
    }

    avcodec_free_context(&c);
    avformat_free_context(av_format);
    av_frame_free(&frame);
    av_packet_free(&pkt);

    return 0;
}

Picture as from my encoder Picture from ffmpeg command line unrolling

For completeness, this is the display function, using openCV

void display(const AVFrame* frame) {
    static std::vector<uint8_t> yuv_buffer;
    yuv_buffer.resize(frame->linesize[0] * 3 / 2 * frame->width);
    cv::Mat mYUV(frame->height * 3 / 2, frame->width, CV_8UC1, yuv_buffer.data(), frame->linesize[0]);
    memcpy(mYUV.ptr(), frame->data[0], frame->linesize[0] * frame->height);
    //cv::imshow("grayscale", mYUV.rowRange(0, frame->height));
    //cv::imshow("u", cv::Mat(frame->height / 2, frame->width / 2, CV_8UC1, frame->data[1], frame->linesize[1]));
    //cv::imshow("v", cv::Mat(frame->height / 2, frame->width / 2, CV_8UC1, frame->data[2], frame->linesize[2]));

    int dest_row = frame->height;
    for (int j = 0; j < frame->height / 2; j++) {
        memcpy(mYUV.ptr(dest_row), frame->data[1] + frame->linesize[1] * j, frame->width);
        j++;
        memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[1] + frame->linesize[1] * j, frame->width);
        dest_row++;
    }
    for (int j = 0; j < frame->height / 2; j++) {
        memcpy(mYUV.ptr(dest_row), frame->data[2] + frame->linesize[2] * j, frame->width);
        j++;
        memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[2] + frame->linesize[2] * j, frame->width);
        dest_row++;
    }
    cv::Mat mRGB(frame->height, frame->width, CV_8UC3);
    cvtColor(mYUV, mRGB, cv::COLOR_YUV2BGR_I420, 3);
    cv::imshow("Video", mRGB);
    cv::waitKey(0);
}

Note: The AVFrame -> cv::Mat converter is now available in corrected version as answer here.


Solution

  • When executing the code in Visual Studio, I am getting exception "Access violation writing location..." in display function.

    The reason is that the allocated size is frame->linesize[0] * 3 / 2 * frame->width instead of frame->linesize[0] * 3 / 2 * frame->height (should be height instead of width).

    Replace: yuv_buffer.resize(frame->linesize[0] * 3 / 2 * frame->width); with:

    yuv_buffer.resize(frame->linesize[0] * 3 / 2 * frame->height);
    

    Writing out of the bounds of the buffer may cause strange artefacts.


    Another small issue is in the loops that copy U and V channels:
    Number of bytes to be copied should be frame->width/2 instead of frame->width (U, V row size is frame->width/2).

    int dest_row = frame->height;
    for (int j = 0; j < frame->height / 2; j++) {
        //memcpy(mYUV.ptr(dest_row), frame->data[1] + frame->linesize[1] * j, frame->width);  //<--- should be frame->width/2 instead of frame->width
        memcpy(mYUV.ptr(dest_row), frame->data[1] + frame->linesize[1] * j, frame->width/2);
        j++;
        memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[1] + frame->linesize[1] * j, frame->width/2);
        dest_row++;
    }
    for (int j = 0; j < frame->height / 2; j++) {
        memcpy(mYUV.ptr(dest_row), frame->data[2] + frame->linesize[2] * j, frame->width/2);
        j++;
        memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[2] + frame->linesize[2] * j, frame->width/2);
        dest_row++;
    }
    

    For completeness, here is the complete code sample:

    extern "C" {
    #include <libavcodec/avcodec.h>
    #include <libavformat/avformat.h>
    }
    
    #include <opencv2/opencv.hpp>
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #include <vector>
    
    #include <iostream>
    
    #define FILE_NAME "portrait1200x1400.mp4"
    
    static void display(const AVFrame* frame) {
        static std::vector<uint8_t> yuv_buffer;    
        //yuv_buffer.resize(frame->linesize[0] * 3 / 2 * frame->width); // <--- should be frame->height instead of frame->width
        yuv_buffer.resize(frame->linesize[0] * 3 / 2 * frame->height);
        
        cv::Mat mYUV(frame->height * 3 / 2, frame->width, CV_8UC1, yuv_buffer.data(), frame->linesize[0]);
        memcpy(mYUV.data, frame->data[0], frame->linesize[0] * frame->height);
        //cv::imshow("grayscale", mYUV.rowRange(0, frame->height));
        //cv::imshow("u", cv::Mat(frame->height / 2, frame->width / 2, CV_8UC1, frame->data[1], frame->linesize[1]));
        //cv::imshow("v", cv::Mat(frame->height / 2, frame->width / 2, CV_8UC1, frame->data[2], frame->linesize[2]));
    
        int dest_row = frame->height;
        for (int j = 0; j < frame->height / 2; j++) {
            //memcpy(mYUV.ptr(dest_row), frame->data[1] + frame->linesize[1] * j, frame->width);  //<--- should be frame->width/2 instead of frame->width
            memcpy(mYUV.ptr(dest_row), frame->data[1] + frame->linesize[1] * j, frame->width/2);
            j++;
            memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[1] + frame->linesize[1] * j, frame->width/2);
            dest_row++;
        }
        for (int j = 0; j < frame->height / 2; j++) {
            memcpy(mYUV.ptr(dest_row), frame->data[2] + frame->linesize[2] * j, frame->width/2);
            j++;
            memcpy(mYUV.ptr(dest_row) + frame->width / 2, frame->data[2] + frame->linesize[2] * j, frame->width/2);
            dest_row++;
        }
    
        cv::Mat mRGB(frame->height, frame->width, CV_8UC3);
        cvtColor(mYUV, mRGB, cv::COLOR_YUV2BGR_I420, 3);
        cv::imshow("Video", mRGB);
        cv::waitKey(0);
    }
    
    
    int main(int argc, char** argv) {
    
        int ret;
    
        auto pkt = av_packet_alloc();
        if (!pkt) {
            std::cerr << "Failed av_packet_alloc()" << std::endl;
            exit(1);
        }
    
        AVFormatContext* av_format = avformat_alloc_context();
        ret = avformat_open_input(&av_format, FILE_NAME, nullptr, nullptr);
        if (ret < 0) {
            std::cerr << "Failed avformat_open_input, Error: " << ret << std::endl;
            ///Error codes https://stackoverflow.com/questions/12780931/ffmpeg-exit-status-1094995529
            exit(1);
        }
        av_dump_format(av_format, 0, FILE_NAME, 0);
        auto video_st_number = av_find_best_stream(av_format, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
        if (video_st_number < 0) {
            std::cerr << "av_find_best_stream couldn't find video stream" << std::endl;
            exit(1);
        }
        auto video_st = av_format->streams[video_st_number];
        auto codec_id = video_st->codecpar->codec_id;
        std::cout << "Duration " << video_st->duration << std::endl;
        std::cout << "n_frames " << video_st->nb_frames << std::endl;
    
        auto frame = av_frame_alloc();
        if (!frame) {
            fprintf(stderr, "Could not allocate video frame\n");
            exit(1);
        }
    
        auto codec = avcodec_find_decoder(codec_id);
        if (!codec) {
            fprintf(stderr, "Codec not found\n");
            exit(1);
        }
    
        auto c = avcodec_alloc_context3(codec);
        if (!c) {
            fprintf(stderr, "Could not allocate video codec context\n");
            exit(1);
        }
    
    
        if ((ret = avcodec_parameters_to_context(c, video_st->codecpar))) {
            fprintf(stderr, "Failed avcodec_parameters_to_context\n");
            exit(1);
        }
    
        c->pix_fmt = AV_PIX_FMT_YUV420P;///Not really necessary
        c->thread_count = 1;///No impact
    
        /* open it */
        if (avcodec_open2(c, codec, NULL) < 0) {
            fprintf(stderr, "Could not open codec\n ");
            exit(1);
        }
    
        std::size_t counter = 0;
        std::size_t n_keyframes = 0;
    
        while (ret >= 0) {
            ret = av_read_frame(av_format, pkt);
            if (pkt->size == 0) {
                std::cout << "Skipping packet of size zero" << std::endl;
                av_packet_unref(pkt);
                continue;
            }
            while (avcodec_send_packet(c, pkt) != 0) {
                if (avcodec_receive_frame(c, frame) != 0) {
                    std::cerr << "Error receiving frame" << std::endl;
                    exit(1);
                } else {
                    n_keyframes += frame->key_frame;
                    std::cout << "Decoded " << ++counter << " frames. Frame No. " << frame->pts / pkt->duration << " "
                              << frame->decode_error_flags << " " << frame->key_frame << " " << n_keyframes << " "
                              << frame->pkt_dts << std::endl;
                }
                display(frame);
            }
            av_packet_unref(pkt);
        }
    
        avcodec_send_packet(c, nullptr);
        std::cout << "Flushing decoder" << std::endl;
    
        while (avcodec_receive_frame(c, frame) == 0) {
            n_keyframes += frame->key_frame;
            std::cout << "Decoded " << ++counter << " frames. Frame No. " << frame->pts << " " << frame->decode_error_flags
                      << " " << frame->key_frame << " " << n_keyframes << " " << frame->pkt_dts << std::endl;
    
            display(frame);
        }
    
        avcodec_free_context(&c);
        avformat_free_context(av_format);
        av_frame_free(&frame);
        av_packet_free(&pkt);
    
        return 0;
    }