Search code examples
c++ffmpegh.264libav

FFmpeg C++ api decode h264 error


I'm trying to use the C++ API of FFMpeg (version 20150526) under Windows using the prebuilt binaries to decode an h264 video file (*.ts).

I've written a very simple code that automatically detects the required codec from the file itself (and it is AV_CODEC_ID_H264, as expected).

Then I re-open the video file in read-binary mode and I read a fixed-size buffer of bytes from it and provide the read bytes to the decoder within a while-loop until the end of file. However when I call the function avcodec_decode_video2 a large amount of errors happen like the following ones:

[h264 @ 008df020] top block unavailable for requested intro mode at 34 0

[h264 @ 008df020] error while decoding MB 34 0, bytestream 3152

[h264 @ 008df020] decode_slice_header error

Sometimes the function avcodec_decode_video2 sets the value of got_picture_ptr to 1 and hence I expect to find a good frame. Instead, though all the computations are successful, when I view the decoded frame (using OpenCV only for visualization purposes) I see a gray one with some artifacts.

If I employ the same code to decode an *.avi file it works fine.

Reading the examples of FFMpeg I did not find a solution to my problem. I've also implemented the solution proposed in the simlar question FFmpeg c++ H264 decoding error but it did not work.

Does anyone know where the error is?

Thank you in advance for any reply!

The code is the following [EDIT: code updated including the parser management]:

#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>

#include <opencv2/opencv.hpp>

#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus

#define INBUF_SIZE  4096

void main()
{
    AVCodec*            l_pCodec;
    AVCodecContext*     l_pAVCodecContext;
    SwsContext*         l_pSWSContext;
    AVFormatContext*    l_pAVFormatContext;
    AVFrame*            l_pAVFrame;
    AVFrame*            l_pAVFrameBGR;
    AVPacket            l_AVPacket;
    AVPacket            l_AVPacket_out;
    AVStream*           l_pStream;
    AVCodecParserContext*   l_pParser;
    FILE*               l_pFile_in;
    FILE*               l_pFile_out;
    std::string         l_sFile;
    int                 l_iResult;
    int                 l_iFrameCount;
    int                 l_iGotFrame;
    int                 l_iBufLength;
    int                 l_iParsedBytes;
    int                 l_iPts;
    int                 l_iDts;
    int                 l_iPos;
    int                 l_iSize;
    int                 l_iDecodedBytes;
    uint8_t             l_auiInBuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
    uint8_t*            l_pData;
    cv::Mat             l_cvmImage;

    l_pCodec = NULL;
    l_pAVCodecContext = NULL;
    l_pSWSContext = NULL;
    l_pAVFormatContext = NULL;
    l_pAVFrame = NULL;
    l_pAVFrameBGR = NULL;
    l_pParser = NULL;
    l_pStream = NULL;
    l_pFile_in = NULL;
    l_pFile_out = NULL;
    l_iPts = 0;
    l_iDts = 0;
    l_iPos = 0;
    l_pData = NULL;

    l_sFile = "myvideo.ts";

    avdevice_register_all();
    avfilter_register_all();
    avcodec_register_all();
    av_register_all();
    avformat_network_init();

    l_pAVFormatContext = avformat_alloc_context();

    l_iResult = avformat_open_input(&l_pAVFormatContext,
                                    l_sFile.c_str(),
                                    NULL,
                                    NULL);

    if (l_iResult >= 0)
    {
        l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);

        if (l_iResult >= 0)
        {
            for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
            {
                if (l_pAVFormatContext->streams[i]->codec->codec_type ==
                        AVMEDIA_TYPE_VIDEO)
                {
                    l_pCodec = avcodec_find_decoder(
                                l_pAVFormatContext->streams[i]->codec->codec_id);

                    l_pStream = l_pAVFormatContext->streams[i];
                }
            }
        }
    }

    av_init_packet(&l_AVPacket);
    av_init_packet(&l_AVPacket_out);

    memset(l_auiInBuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);

    if (l_pCodec)
    {
        l_pAVCodecContext = avcodec_alloc_context3(l_pCodec);

        l_pParser = av_parser_init(l_pAVCodecContext->codec_id);

        if (l_pParser)
        {
            av_register_codec_parser(l_pParser->parser);
        }

        if (l_pAVCodecContext)
        {
            if (l_pCodec->capabilities & CODEC_CAP_TRUNCATED)
            {
                l_pAVCodecContext->flags |= CODEC_FLAG_TRUNCATED;
            }

            l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);

            if (l_iResult >= 0)
            {
                l_pFile_in = fopen(l_sFile.c_str(), "rb");

                if (l_pFile_in)
                {
                    l_pAVFrame = av_frame_alloc();
                    l_pAVFrameBGR = av_frame_alloc();

                    if (l_pAVFrame)
                    {
                        l_iFrameCount = 0;

                        avcodec_get_frame_defaults(l_pAVFrame);

                        while (1)
                        {
                            l_iBufLength = fread(l_auiInBuf,
                                                 1,
                                                 INBUF_SIZE,
                                                 l_pFile_in);

                            if (l_iBufLength == 0)
                            {
                                break;
                            }
                            else
                            {
                                l_pData = l_auiInBuf;
                                l_iSize = l_iBufLength;

                                while (l_iSize > 0)
                                {
                                    if (l_pParser)
                                    {
                                        l_iParsedBytes = av_parser_parse2(
                                                    l_pParser,
                                                    l_pAVCodecContext,
                                                    &l_AVPacket_out.data,
                                                    &l_AVPacket_out.size,
                                                    l_pData,
                                                    l_iSize,
                                                    l_AVPacket.pts,
                                                    l_AVPacket.dts,
                                                    AV_NOPTS_VALUE);

                                        if (l_iParsedBytes <= 0)
                                        {
                                            break;
                                        }

                                        l_AVPacket.pts = l_AVPacket.dts = AV_NOPTS_VALUE;
                                        l_AVPacket.pos = -1;
                                    }
                                    else
                                    {
                                        l_AVPacket_out.data = l_pData;
                                        l_AVPacket_out.size = l_iSize;
                                    }

                                    l_iDecodedBytes =
                                            avcodec_decode_video2(
                                                l_pAVCodecContext,
                                                l_pAVFrame,
                                                &l_iGotFrame,
                                                &l_AVPacket_out);

                                    if (l_iDecodedBytes >= 0)
                                    {
                                        if (l_iGotFrame)
                                        {
                                            l_pSWSContext = sws_getContext(
                                                        l_pAVCodecContext->width,
                                                        l_pAVCodecContext->height,
                                                        l_pAVCodecContext->pix_fmt,
                                                        l_pAVCodecContext->width,
                                                        l_pAVCodecContext->height,
                                                        AV_PIX_FMT_BGR24,
                                                        SWS_BICUBIC,
                                                        NULL,
                                                        NULL,
                                                        NULL);

                                            if (l_pSWSContext)
                                            {
                                                l_iResult = avpicture_alloc(
                                                            reinterpret_cast<AVPicture*>(l_pAVFrameBGR),
                                                            AV_PIX_FMT_BGR24,
                                                            l_pAVFrame->width,
                                                            l_pAVFrame->height);

                                                l_iResult = sws_scale(
                                                            l_pSWSContext,
                                                            l_pAVFrame->data,
                                                            l_pAVFrame->linesize,
                                                            0,
                                                            l_pAVCodecContext->height,
                                                            l_pAVFrameBGR->data,
                                                            l_pAVFrameBGR->linesize);

                                                if (l_iResult > 0)
                                                {
                                                    l_cvmImage = cv::Mat(
                                                                l_pAVFrame->height,
                                                                l_pAVFrame->width,
                                                                CV_8UC3,
                                                                l_pAVFrameBGR->data[0],
                                                            l_pAVFrameBGR->linesize[0]);

                                                    if (l_cvmImage.empty() == false)
                                                    {
                                                        cv::imshow("image", l_cvmImage);
                                                        cv::waitKey(10);
                                                    }
                                                }
                                            }

                                            l_iFrameCount++;
                                        }
                                    }
                                    else
                                    {
                                        break;
                                    }

                                    l_pData += l_iParsedBytes;
                                    l_iSize -= l_iParsedBytes;
                                }
                            }

                        } // end while(1).
                    }

                    fclose(l_pFile_in);
                }
            }
        }
    }
}

EDIT: The following is the final code that solves my problem, thanks to the suggestions of Ronald.

#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>

#include <opencv2/opencv.hpp>

#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libpostproc/postprocess.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#ifdef __cplusplus
} // end extern "C".
#endif // __cplusplus

void main()
{
    AVCodec*            l_pCodec;
    AVCodecContext*     l_pAVCodecContext;
    SwsContext*         l_pSWSContext;
    AVFormatContext*    l_pAVFormatContext;
    AVFrame*            l_pAVFrame;
    AVFrame*            l_pAVFrameBGR;
    AVPacket            l_AVPacket;
    std::string         l_sFile;
    uint8_t*            l_puiBuffer;
    int                 l_iResult;
    int                 l_iFrameCount;
    int                 l_iGotFrame;
    int                 l_iDecodedBytes;
    int                 l_iVideoStreamIdx;
    int                 l_iNumBytes;
    cv::Mat             l_cvmImage;

    l_pCodec = NULL;
    l_pAVCodecContext = NULL;
    l_pSWSContext = NULL;
    l_pAVFormatContext = NULL;
    l_pAVFrame = NULL;
    l_pAVFrameBGR = NULL;
    l_puiBuffer = NULL;

    l_sFile = "myvideo.ts";

    av_register_all();

    l_iResult = avformat_open_input(&l_pAVFormatContext,
                                    l_sFile.c_str(),
                                    NULL,
                                    NULL);

    if (l_iResult >= 0)
    {
        l_iResult = avformat_find_stream_info(l_pAVFormatContext, NULL);

        if (l_iResult >= 0)
        {
            for (int i=0; i<l_pAVFormatContext->nb_streams; i++)
            {
                if (l_pAVFormatContext->streams[i]->codec->codec_type ==
                        AVMEDIA_TYPE_VIDEO)
                {
                    l_iVideoStreamIdx = i;

                    l_pAVCodecContext =
                            l_pAVFormatContext->streams[l_iVideoStreamIdx]->codec;

                    if (l_pAVCodecContext)
                    {
                        l_pCodec = avcodec_find_decoder(l_pAVCodecContext->codec_id);
                    }

                    break;
                }
            }
        }
    }

    if (l_pCodec && l_pAVCodecContext)
    {
        l_iResult = avcodec_open2(l_pAVCodecContext, l_pCodec, NULL);

        if (l_iResult >= 0)
        {
            l_pAVFrame = av_frame_alloc();
            l_pAVFrameBGR = av_frame_alloc();

            l_iNumBytes = avpicture_get_size(PIX_FMT_BGR24,
                                             l_pAVCodecContext->width,
                                             l_pAVCodecContext->height);

            l_puiBuffer = (uint8_t *)av_malloc(l_iNumBytes*sizeof(uint8_t));

            avpicture_fill((AVPicture *)l_pAVFrameBGR,
                           l_puiBuffer,
                           PIX_FMT_RGB24,
                           l_pAVCodecContext->width,
                           l_pAVCodecContext->height);

            l_pSWSContext = sws_getContext(
                        l_pAVCodecContext->width,
                        l_pAVCodecContext->height,
                        l_pAVCodecContext->pix_fmt,
                        l_pAVCodecContext->width,
                        l_pAVCodecContext->height,
                        AV_PIX_FMT_BGR24,
                        SWS_BICUBIC,
                        NULL,
                        NULL,
                        NULL);

            while (av_read_frame(l_pAVFormatContext, &l_AVPacket) >= 0)
            {
                if (l_AVPacket.stream_index == l_iVideoStreamIdx)
                {
                    l_iDecodedBytes = avcodec_decode_video2(
                                l_pAVCodecContext,
                                l_pAVFrame,
                                &l_iGotFrame,
                                &l_AVPacket);

                    if (l_iGotFrame)
                    {
                        if (l_pSWSContext)
                        {
                            l_iResult = sws_scale(
                                        l_pSWSContext,
                                        l_pAVFrame->data,
                                        l_pAVFrame->linesize,
                                        0,
                                        l_pAVCodecContext->height,
                                        l_pAVFrameBGR->data,
                                        l_pAVFrameBGR->linesize);

                            if (l_iResult > 0)
                            {
                                l_cvmImage = cv::Mat(
                                            l_pAVFrame->height,
                                            l_pAVFrame->width,
                                            CV_8UC3,
                                            l_pAVFrameBGR->data[0],
                                        l_pAVFrameBGR->linesize[0]);

                                if (l_cvmImage.empty() == false)
                                {
                                    cv::imshow("image", l_cvmImage);
                                    cv::waitKey(1);
                                }
                            }
                        }

                        l_iFrameCount++;
                    }
                }
            }
        }
    }
}

Solution

  • You're never using the l_pParser object, or in other words, you're not using a H264 parser, you're just sending raw file data into the decoder without proper NAL packetization. Please read the frame parsing API docs to figure out how to use the parser.