Search code examples
ffmpegh.264libavcodeclibavformat

FFMPEG transcoder producing broken video


I have the following code to transcode video which is closely related to the FFMPEG transcoding example.

However it produced broken video as shown:

Big Buck Bunny broken 1 Big Buck Bunny broken 2

It seems like the i-frames are decoded correctly, but the p and b frames are out of order? I thought av_interleaved_write_frame() would rectify that for me. It also seems like the libx264 is not creating any extradata which I thought it would.

Could someone please help me work out why?

Many thanks

#include <stdbool.h>
#include <stdio.h>

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>

#define DEBUG(level, format, ...) fprintf(stderr, format "\n", ##__VA_ARGS__)

#define stringify2(var) #var
#define stringify(var) stringify2(var)

#define RASSERT(cond, ...) do { \
    if (!(cond)) { \
        DEBUG(LOG_FATAL, __FILE__ ":" stringify(__LINE__) " " "Assertion failed! " #cond ". " __VA_ARGS__); \
        abort(); \
    } \
} while (0)

#define FFCHECK(ret, func) RASSERT(ret == 0, #func " failed: %s (%d)", av_err2str(ret), ret)

typedef struct decode_context {
    AVFormatContext *format;
    AVCodecContext *videoCodec;
    AVCodecContext *audioCodec;
    AVStream *videoStream;
    AVStream *audioStream;
} decode_context_t;

typedef struct encode_context {
    AVFormatContext *format;
    AVCodecContext *videoCodec;
    AVCodecContext *audioCodec;
    AVStream *videoStream;
    AVStream *audioStream;
} encode_context_t;

void open_input(decode_context_t *dec, const char *file) {
    int ret;

    ret = avformat_open_input(&dec->format, file, NULL, NULL);
    FFCHECK(ret, "avformat_open_input()");

    ret = avformat_find_stream_info(dec->format, NULL);
    FFCHECK(ret, "avformat_find_stream_info()");

    for (unsigned int i = 0; i < dec->format->nb_streams; ++i) {
        AVStream * stream = dec->format->streams[i];
        enum AVMediaType type = stream->codecpar->codec_type;
        switch (type) {
        case AVMEDIA_TYPE_VIDEO:
            if (dec->videoStream)
                break;

            dec->videoStream = stream;
            break;

        case AVMEDIA_TYPE_AUDIO:
            dec->audioStream = stream;

            if (dec->audioStream)
                break;
            
            break;

        default:
            break;
        }
    }

    RASSERT(dec->videoStream != NULL, "Didn't find video stream");

    const AVCodec * codec = avcodec_find_decoder(dec->videoStream->codecpar->codec_id);

    RASSERT(codec, "Failed to find decoder");

    dec->videoCodec = avcodec_alloc_context3(codec);
    RASSERT(dec->videoCodec, "avcodec_alloc_context3() failed");

    ret = avcodec_parameters_to_context(dec->videoCodec, dec->videoStream->codecpar);
    FFCHECK(ret, "avcodec_parameters_to_context()");

    dec->videoCodec->framerate = av_guess_frame_rate(dec->format, dec->videoStream, NULL);

    ret = avcodec_open2(dec->videoCodec, codec, NULL);
    FFCHECK(ret, "avcodec_open2()");
}

void open_output(encode_context_t *enc, const char *file, decode_context_t *dec) {
    int ret;

    ret = avformat_alloc_output_context2(&enc->format, NULL, NULL, file);
    FFCHECK(ret, "avformat_alloc_output_context2()");
    
    enc->videoStream = avformat_new_stream(enc->format, NULL);
    RASSERT(enc->videoStream, "avformat_new_stream() failed");

    enc->videoStream->id = enc->format->nb_streams - 1;

    const AVCodec *codec = avcodec_find_encoder_by_name("libx264");
    RASSERT(codec, "Failed to find encoder");

    enc->videoCodec = avcodec_alloc_context3(codec);
    RASSERT(enc->videoCodec, "avcodec_alloc_context3() failed");

    enc->videoCodec->bit_rate = 400000;
    enc->videoCodec->width = dec->videoCodec->width;
    enc->videoCodec->height = dec->videoCodec->height;
    enc->videoCodec->sample_aspect_ratio = dec->videoCodec->sample_aspect_ratio;
    enc->videoCodec->time_base = av_inv_q(dec->videoCodec->framerate);
    //enc->videoCodec->gop_size = 12;
    //enc->videoCodec->max_b_frames = 2;
    enc->videoCodec->pix_fmt = dec->videoCodec->pix_fmt;
    enc->videoCodec->framerate = dec->videoCodec->framerate;

    if (codec->id == AV_CODEC_ID_H264) {
        av_opt_set(enc->videoCodec->priv_data, "preset", "slow", 0);
        av_opt_set(enc->videoCodec->priv_data, "profile", "high", 0);
        av_opt_set(enc->videoCodec->priv_data, "level", "4.1", 0);
    }

    if (enc->format->flags & AVFMT_GLOBALHEADER)
        enc->videoCodec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    ret = avcodec_open2(enc->videoCodec, codec, NULL);
    FFCHECK(ret, "avcodec_open2()");

    ret = avcodec_parameters_from_context(enc->videoStream->codecpar, enc->videoCodec);
    FFCHECK(ret, "avcodec_parameters_from_context()");
    //enc->videoStream->time_base = enc->videoCodec->time_base;
    //enc->videoStream->codecpar->extradata = enc->videoCodec->extradata;
    //enc->videoStream->codecpar->extradata_size = enc->videoCodec->extradata_size;

    av_dump_format(enc->format, 0, file, 1);

    ret = avio_open(&enc->format->pb, file, AVIO_FLAG_WRITE);
    FFCHECK(ret, "avio_open()");

    ret = avformat_write_header(enc->format, NULL);
    FFCHECK(ret, "avformat_write_header()");
}

int main(int argc, const char * argv[]) {
    int ret;

    if (argc < 3) {
        fprintf(stderr, "%s input output\n", argv[0]);
        return 1;
    }

    decode_context_t dec_ctx = {};
    decode_context_t *dec = &dec_ctx;
    encode_context_t enc_ctx = {};
    encode_context_t *enc = &enc_ctx;
    
    open_input(dec, argv[1]);

    open_output(enc, argv[2], dec);

    while (true) {
        AVPacket *packet = av_packet_alloc();
        ret = av_read_frame(dec->format, packet);
        if (ret < 0)
            break;

        if (packet->stream_index == dec->videoStream->index) {
            ret = avcodec_send_packet(dec->videoCodec, packet);
            if (ret == AVERROR(EAGAIN)) {
                AVFrame * frame = av_frame_alloc();
                while (true) {
                    ret = avcodec_receive_frame(dec->videoCodec, frame);
                    if (ret == AVERROR(EAGAIN))
                        break;
                    FFCHECK(ret, "avcodec_receive_frame()");

                    ret = avcodec_send_frame(enc->videoCodec, frame);
                    if (ret == AVERROR(EAGAIN)) {
                        AVPacket *pkt = av_packet_alloc();
                        while (true) {
                            ret = avcodec_receive_packet(enc->videoCodec, pkt);
                            if (ret == AVERROR(EAGAIN))
                                break;
                            FFCHECK(ret, "avcodec_receive_packet()");

                            pkt->stream_index = enc->videoStream->id;
                            av_packet_rescale_ts(pkt, dec->videoStream->time_base, enc->videoStream->time_base);

                            ret = av_interleaved_write_frame(enc->format, pkt);
                            FFCHECK(ret, "av_interleaved_write_frame()");
                        }
                        av_packet_free(&pkt);
                    }
                }
                av_frame_free(&frame);
            } else {
                FFCHECK(ret, "avcodec_send_packet()");
            }
        } else if (packet->stream_index == dec->audioStream->index) {
            // Deal with audio
        }

        av_packet_free(&packet);
    }

    ret = av_write_trailer(enc->format);
    FFCHECK(ret, "av_write_trailer()");

    ret = avio_close(enc->format->pb);
    FFCHECK(ret, "avio_close()");

    // Close some more stuff

    return 0;
}

Solution

  • After thinking about this some more, I realised that I wasn't even sure if my decoder was correct. And another thorough inspection of the transcoding example revealed a small difference in behaviour.

    Effectively, I was doing this:

    while (true) {
        ret = avcodec_send_packet(dec->videoCodec, packet);
        if (ret != AVERROR(EAGAIN)) {
            continue;
        }
    
        ret = avcodec_receive_frame(dec->videoCodec, frame);
    }
    

    Removing the continue and immediately trying to receive frames from the decoder works much better, and fixed my issue.

    while (true) {
        ret = avcodec_send_packet(dec->videoCodec, packet);
        if (ret != 0)
            abort();
    
        ret = avcodec_receive_frame(dec->videoCodec, frame);
    }
    

    I also had the same error in the encoding side so I've fixed it there too.