Search code examples
c++ffmpegrgbyuvlibavcodec

Bad src image ptrs converting YUV to RGB after H264 decoding with libav and c++


I am getting "bad src image ptrs" errors when trying to convert my frames to RGB with sws_scale after decoding frames from a H264 file and cannot figure out wht is going wrong.

I checked what is causing the error and found the check_image_pointers function in swscale.c which validates that the planes and line sizes needed for the pixel format (av_pix_fmt_desc_get) are present in the given data which seems not to be the case with my data.

The written pgm files look ok to me, also replaying the file works.

I printed the corresponding data of my frame. The problem seems that planes 1 and 2 have lines sizes of 0. All 3 of them seem to have data. Plane 0 line size is three times image width which is also confusing to me.

Here is my output:

Have videoStreamIndex 0 codec id: 27
saving frame 1 C:\\tmp\\output-frame-1.pgm colorspace 2 pix_fmt 0 w: 3840 h: 2160
Required:
plane 0 : 0
plane 1 : 1
plane 2 : 2
plane 3 : 0
Present:
Frame plane 0: 1 , 11520
Frame plane 1: 1 , 0
Frame plane 2: 1 , 0
Frame plane 3: 0 , 0
Frame plane 4: 0 , 0
Frame plane 5: 0 , 0
Frame plane 6: 0 , 0
Frame plane 7: 0 , 0

Here the whole code of my application, the issues occurs in method decode:

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cstdint>
#include <string>
#include <iostream>
#include <chrono>

// #include <opencv2/highgui.hpp>
// #include <opencv2/opencv.hpp>

extern "C"
{

#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>
#include <libavutil/display.h>
#include "libavutil/imgutils.h"
}

#define INBUF_SIZE 4096
class H264Decoder
{
public:
    H264Decoder(const std::string &inputFilename, const std::string &outputFilenamePrefix)
    {

        // Open input file
        if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0)
        {
            throw std::runtime_error("Could not open input file");
        }

        if (avformat_find_stream_info(formatContext, nullptr) < 0)
        {
            throw std::runtime_error("Could not find stream information");
        }

        // Find H.264 video stream
        for (unsigned i = 0; i < formatContext->nb_streams; i++)
        {
            if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264)
            {
                videoStreamIndex = i;
                std::cout << "Have videoStreamIndex " << videoStreamIndex << " codec id: " << formatContext->streams[i]->codecpar->codec_id << std::endl;
                break;
            }
        }

        if (videoStreamIndex == -1)
        {
            throw std::runtime_error("H.264 video stream not found");
        }

        // Initialize codec and codec context
        const AVCodec *codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
        if (!codec)
        {
            throw std::runtime_error("Codec not found");
        }

        parser = av_parser_init(codec->id);
        if (!parser)
        {
            throw std::runtime_error("parser not found");
        }

        codecContext = avcodec_alloc_context3(codec);
        if (!codecContext)
        {
            throw std::runtime_error("Could not allocate codec context");
        }

        if (avcodec_open2(codecContext, codec, nullptr) < 0)
        {
            throw std::runtime_error("Could not open codec");
        }

        // Initialize frame
        frame = av_frame_alloc();
        frame->format = AV_PIX_FMT_YUV420P;
        if (!frame)
        {
            throw std::runtime_error("Could not allocate frame");
        }

        inputPacket = av_packet_alloc();
        if (!inputPacket)
        {
            throw std::runtime_error("Could not allocate packet");
        }

        inputFilename_ = inputFilename;
        outputFilenamePrefix_ = outputFilenamePrefix;
    }

    void decode()
    {
        char buf[1024];
        int ret;

        ret = avcodec_send_packet(codecContext, inputPacket);
        if (ret < 0)
        {
            fprintf(stderr, "Error sending a packet for decoding\n");
            exit(1);
        }

        while (ret >= 0)
        {
            ret = avcodec_receive_frame(codecContext, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                return;
            else if (ret < 0)
            {
                fprintf(stderr, "Error during decoding\n");
                exit(1);
            }

            /* the picture is allocated by the decoder. no need to
               free it */
            snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);

            std::cout << "saving frame " << codecContext->frame_num << " " << buf << " colorspace " << frame->colorspace << " pix_fmt " << codecContext->pix_fmt << " w: " << frame->width << " h: " << frame->height << std::endl;

            SwsContext *sws_ctx = NULL;

            sws_ctx = sws_getContext(codecContext->width,
                                     codecContext->height,
                                     codecContext->pix_fmt,
                                     codecContext->width,
                                     codecContext->height,
                                     AV_PIX_FMT_RGB24,
                                     SWS_BICUBIC,
                                     NULL,
                                     NULL,
                                     NULL);

            AVFrame *frame2 = av_frame_alloc();
            int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);
            uint8_t *frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
            av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codecContext->pix_fmt);
            std::cout << "Required:" << std::endl;
            for (int i = 0; i < 4; i++)
            {
                int plane = desc->comp[i].plane;
                std::cout << "plane " << i << " : " << plane << std::endl;
            }
            std::cout << "Present:" << std::endl;
            for (int i = 0; i < AV_NUM_DATA_POINTERS; ++i)
            {
                std::cout << "Frame plane " << i << ": " << static_cast<bool>(frame->data[i]) << " , " << frame->linesize[i] << std::endl;
            }

            sws_scale(sws_ctx, frame->data,
                      frame->linesize, 0, codecContext->height,
                      frame2->data, frame2->linesize);

            // cv::Mat img(frame2->height, frame2->width, CV_8UC3, frame2->data[0]);
            // cv::imshow("Image", img);

            pgm_save(frame->data[0], frame->linesize[0],
                     frame->width, frame->height, buf);
        }
    }

    ~H264Decoder()
    {
        avformat_close_input(&formatContext);
        avformat_free_context(formatContext);
        avcodec_free_context(&codecContext);
        av_frame_free(&frame);
        av_packet_free(&inputPacket);
    }

    void readAndDecode()
    {
        FILE *f;
        uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
        uint8_t *data;
        size_t data_size;
        int ret;
        int eof;
        f = fopen(inputFilename_.c_str(), "rb");
        auto start = std::chrono::high_resolution_clock::now();
        do
        {
            /* read raw data from the input file */
            data_size = fread(inbuf, 1, INBUF_SIZE, f);
            if (ferror(f))
                break;
            eof = !data_size;

            /* use the parser to split the data into frames */
            data = inbuf;
            while (data_size > 0 || eof)
            {
                ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size,
                                       data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                if (ret < 0)
                {
                    fprintf(stderr, "Error while parsing\n");
                    exit(1);
                }
                data += ret;
                data_size -= ret;

                if (inputPacket->size)
                {
                    decode();
                }
                else if (eof)
                {
                    break;
                }
            }
        } while (!eof);
        auto diff = std::chrono::high_resolution_clock::now() - start;
        std::cout << "Decoded " << codecContext->frame_num << " frames in " << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms" << std::endl;
    }

private:
    AVFormatContext *formatContext = nullptr;
    AVCodecContext *codecContext = nullptr;
    AVCodecParserContext *parser;
    AVFrame *frame = nullptr;
    AVFrame *frameRgb = nullptr;
    AVPacket *inputPacket = nullptr;
    int videoStreamIndex = -1;
    std::string inputFilename_;
    std::string outputFilenamePrefix_;

    static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize, const char *filename)
    {
        FILE *f = fopen(filename, "wb");
        if (!f)
        {
            std::cout << "Error opening file for saving PGM" << std::endl;
            exit(1);
        }

        fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
        for (int i = 0; i < ysize; i++)
            fwrite(buf + i * wrap, 1, xsize, f);

        fclose(f);
    }
};

int main(int argc, char *argv[])
{
    if (argc < 2)
    {
        std::cout << "Please provide input file name as parameter" << std::endl;
    }

    std::string inputFilename = argv[1];
    std::string outputFilenamePrefix = "C:\\tmp\\output-frame";

    try
    {

        H264Decoder decoder(inputFilename, outputFilenamePrefix);
        decoder.readAndDecode();
    }
    catch (const std::exception &e)
    {
        std::cout << "Error: " << e.what() << std::endl;
        return 1;
    }

    return 0;
}

Solution

  • The issue was me trying to initialize frame2. I overrode the linesize of the yuv frame instead of the rgb frame: av_image_fill_arrays(frame2->data, frame->linesize, frame2_buffer, AV_PIX_FMT_RGB24, codecContext->width, codecContext->height, 32);

    I removed the complete initialization of frame2 and used av_image_alloc to allocate the buffers for the rgb frame.

    Here is my current working code in case anyone wants to use it as a reference. Conversion changed from RGB to BGR to show it with OpenCV.

    #include <chrono>
    #include <cstdint>
    #include <cstdio>
    #include <cstring>
    #include <iostream>
    #include <string>
    
    #include <opencv2/highgui.hpp>
    #include <opencv2/opencv.hpp>
    
    extern "C" {
    #include <libavcodec/avcodec.h>
    #include <libavformat/avformat.h>
    #include <libavutil/imgutils.h>
    #include <libswscale/swscale.h>
    }
    
    #define INBUF_SIZE 4096
    class H264Decoder {
    public:
        H264Decoder(const std::string& inputFilename, const std::string& outputFilenamePrefix, uint16_t outputWidth,
                    uint16_t outputHeight, bool show, bool save)
                : doShow(show), doSave(save), inputFilename_(inputFilename), outputFilenamePrefix_(outputFilenamePrefix),
                  outputHeight(outputHeight), outputWidth(outputWidth) {
            // Open input file
            if (avformat_open_input(&formatContext, inputFilename.c_str(), nullptr, nullptr) != 0) {
                throw std::runtime_error("Could not open input file");
            }
    
            if (avformat_find_stream_info(formatContext, nullptr) < 0) {
                throw std::runtime_error("Could not find stream information");
            }
    
            // Find H.264 video stream
            for (unsigned i = 0; i < formatContext->nb_streams; i++) {
                if (formatContext->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) {
                    videoStreamIndex = i;
                    break;
                }
            }
    
            if (videoStreamIndex == -1) {
                throw std::runtime_error("H.264 video stream not found");
            }
    
            // Initialize codec and codec context
            codec = avcodec_find_decoder(formatContext->streams[videoStreamIndex]->codecpar->codec_id);
            if (!codec) {
                throw std::runtime_error("Codec not found");
            }
    
            parser = av_parser_init(codec->id);
            if (!parser) {
                throw std::runtime_error("parser not found");
            }
    
            codecContext = avcodec_alloc_context3(codec);
            if (!codecContext) {
                throw std::runtime_error("Could not allocate codec context");
            }
    
            if (avcodec_open2(codecContext, codec, nullptr) < 0) {
                throw std::runtime_error("Could not open codec");
            }
    
            // Initialize frame
            frame = av_frame_alloc();
            if (!frame) {
                throw std::runtime_error("Could not allocate frame");
            }
    
            frameRgb = av_frame_alloc();
            if (!frameRgb) {
                throw std::runtime_error("Could not allocate frame");
            }
            av_image_alloc(frameRgb->data, frameRgb->linesize, outputWidth, outputHeight, AV_PIX_FMT_BGR24, 32);
    
            inputPacket = av_packet_alloc();
            if (!inputPacket) {
                throw std::runtime_error("Could not allocate packet");
            }
    
        }
    
        void decode() {
            char buf[1024];
            int ret;
    
            ret = avcodec_send_packet(codecContext, inputPacket);
            if (ret < 0) {
                fprintf(stderr, "Error sending a packet for decoding\n");
                exit(1);
            }
    
            while (ret >= 0) {
                ret = avcodec_receive_frame(codecContext, frame);
                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
                    return;
                else if (ret < 0) {
                    fprintf(stderr, "Error during decoding\n");
                    exit(1);
                }
    
                snprintf(buf, sizeof(buf), "%s-%" PRId64 ".pgm", outputFilenamePrefix_.c_str(), codecContext->frame_num);
    
                SwsContext* sws_ctx = NULL;
    
                sws_ctx = sws_getContext(codecContext->width, codecContext->height, codecContext->pix_fmt, outputWidth,
                                         outputHeight, AV_PIX_FMT_BGR24, SWS_BICUBIC, NULL, NULL, NULL);
                if (doSave) {
                    pgm_save(frame->data[0], frame->linesize[0], frame->width, frame->height, buf);
                }
    
                sws_scale(sws_ctx, frame->data, frame->linesize, 0, codecContext->height, frameRgb->data,
                          frameRgb->linesize);
    
                if (doShow) {
                    cv::Mat img(outputHeight, outputWidth, CV_8UC3, frameRgb->data[0]);
                    cv::imshow("Image", img);
                    cv::waitKey(1);
                }
            }
        }
    
        ~H264Decoder() {
            avformat_close_input(&formatContext);
            avformat_free_context(formatContext);
            avcodec_free_context(&codecContext);
            av_frame_free(&frame);
            av_frame_free(&frameRgb);
            av_packet_free(&inputPacket);
            av_freep(&frameRgb->data[0]);
        }
    
        void readAndDecode() {
            FILE* f;
            uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
            uint8_t* data;
            size_t data_size;
            int ret;
            int eof;
            f = fopen(inputFilename_.c_str(), "rb");
            if (!f) {
                std::cout << "Error opening file" << std::endl;;
                exit(1);
            }
            memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
            auto start = std::chrono::high_resolution_clock::now();
            do {
                /* read raw data from the input file */
                data_size = fread(inbuf, 1, INBUF_SIZE, f);
                if (ferror(f))
                    break;
                eof = !data_size;
    
                /* use the parser to split the data into frames */
                data = inbuf;
                while (data_size > 0 || eof) {
                    ret = av_parser_parse2(parser, codecContext, &inputPacket->data, &inputPacket->size, data, data_size,
                                           AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
                    if (ret < 0) {
                        fprintf(stderr, "Error while parsing\n");
                        exit(1);
                    }
                    data += ret;
                    data_size -= ret;
    
                    if (inputPacket->size) {
                        decode();
                    } else if (eof) {
                        break;
                    }
                }
            } while (!eof);
            auto diff = std::chrono::high_resolution_clock::now() - start;
            std::cout << "Decoded " << codecContext->frame_num << " frames in "
                      << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() << " ms "
                      << std::chrono::duration_cast<std::chrono::milliseconds>(diff).count() / codecContext->frame_num
                      << " ms/frame " << std::endl;
        }
    
    private:
        bool doShow{false};
        bool doSave{true};
        const AVCodec* codec;
        AVFormatContext* formatContext = nullptr;
        AVCodecContext* codecContext = nullptr;
        AVCodecParserContext* parser;
        AVFrame* frame = nullptr;
        AVFrame* frameRgb = nullptr;
        AVPacket* inputPacket = nullptr;
        int videoStreamIndex = -1;
        std::string inputFilename_;
        std::string outputFilenamePrefix_;
        uint16_t outputHeight = 1280;
        uint16_t outputWidth = 1632;
    
        static void pgm_save(unsigned char* buf, int wrap, int xsize, int ysize, const char* filename) {
            FILE* f = fopen(filename, "wb");
            if (!f) {
                std::cout << "Error opening file for saving PGM" << std::endl;
                exit(1);
            }
    
            fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
            for (int i = 0; i < ysize; i++)
                fwrite(buf + i * wrap, 1, xsize, f);
    
            fclose(f);
        }
    };
    
    int main(int argc, char* argv[]) {
        if (argc < 2) {
            std::cout << "Please provide input file name as parameter" << std::endl;
            exit(1);
        }
    
        std::string inputFilename = argv[1];
        std::string outputFilenamePrefix = "C:\\tmp\\pics\\output-frame";
    
        try {
            H264Decoder decoder(inputFilename, outputFilenamePrefix, 1632, 1280, true, false);
            decoder.readAndDecode();
        } catch (const std::exception& e) {
            std::cout << "Error: " << e.what() << std::endl;
            return 1;
        }
    
        return 0;
    }