Search code examples
visual-c++ffmpegrtspwowza

Output RTSP stream with ffmpeg


I'm attempting to use the ffmpeg libraries to send a video stream from my application to a media server (in this case wowza). I have been able to do the reverse and consume an RTSP stream but I'm having a few issues writing an RTSP stream.

I have found a few examples and attempted to utilise the relevant bits. The code is below. I have simplified it as much as I can. I do only want to send a single H264 bit stream to the wowza server and which it can handle.

I get an "Integer division by zero" exception whenever in the av_interleaved_write_frame function when I try and send a packet. The exception looks like it's related to the packet timestamps not being set correctly. I've tried different values and can get past the exception by setting some contrived values but then the write call fails.

#include <iostream>
#include <fstream>
#include <sstream>
#include <cstring>

#include "stdafx.h"
#include "windows.h"

extern "C"
{
    #include <libavcodec\avcodec.h>
    #include <libavformat\avformat.h>
    #include <libavformat\avio.h>
    #include <libswscale\swscale.h>
}

using namespace std;

static int video_is_eof;

#define STREAM_DURATION   50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
#define VIDEO_CODEC_ID CODEC_ID_H264

static int sws_flags = SWS_BICUBIC;

/* video output */
static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;

static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
    /* rescale output packet timestamp values from codec to stream timebase */
    pkt->pts = av_rescale_q_rnd(pkt->pts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
    pkt->dts = av_rescale_q_rnd(pkt->dts, *time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
    pkt->duration = av_rescale_q(pkt->duration, *time_base, st->time_base);

    pkt->stream_index = st->index;

    // Exception occurs here.
    return av_interleaved_write_frame(fmt_ctx, pkt);
}

/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find encoder for '%s'\n", avcodec_get_name(codec_id));
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }

    st->id = oc->nb_streams - 1;
    c = st->codec;
    c->codec_id = codec_id;
    c->bit_rate = 400000;
    c->width = 352;
    c->height = 288;
    c->time_base.den = STREAM_FRAME_RATE;
    c->time_base.num = 1;
    c->gop_size = 12; /* emit one intra frame every twelve frames at most */
    c->pix_fmt = STREAM_PIX_FMT;

    return st;
}

static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret;
    AVCodecContext *c = st->codec;

    /* open the codec */
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        fprintf(stderr, "Could not open video codec: ");
        exit(1);
    }

    /* allocate and init a re-usable frame */
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    /* Allocate the encoded raw picture. */
    ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate picture: ");
        exit(1);
    }

    /* copy data and linesize picture pointers to frame */
    *((AVPicture *)frame) = dst_picture;
}

/* Prepare a dummy image. */
static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
{
    int x, y, i;

    i = frame_index;

    /* Y */
    for (y = 0; y < height; y++)
        for (x = 0; x < width; x++)
            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;

    /* Cb and Cr */
    for (y = 0; y < height / 2; y++) {
        for (x = 0; x < width / 2; x++) {
            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
        }
    }
}

static void write_video_frame(AVFormatContext *oc, AVStream *st, int flush)
{
    int ret;
    AVCodecContext *c = st->codec;

    if (!flush) {
        fill_yuv_image(&dst_picture, frame_count, c->width, c->height);
    }

    AVPacket pkt = { 0 };
    int got_packet;
    av_init_packet(&pkt);

    /* encode the image */
    frame->pts = frame_count;
    ret = avcodec_encode_video2(c, &pkt, flush ? NULL : frame, &got_packet);
    if (ret < 0) {
        fprintf(stderr, "Error encoding video frame:");
        exit(1);
    }
    /* If size is zero, it means the image was buffered. */

    if (got_packet) {
        ret = write_frame(oc, &c->time_base, st, &pkt);
    }
    else {
        if (flush) {
            video_is_eof = 1;
        }
        ret = 0;
    }

    if (ret < 0) {
        fprintf(stderr, "Error while writing video frame: ");
        exit(1);
    }
    frame_count++;
}

static void close_video(AVFormatContext *oc, AVStream *st)
{
    avcodec_close(st->codec);
    av_free(src_picture.data[0]);
    av_free(dst_picture.data[0]);
    av_frame_free(&frame);
}

int _tmain(int argc, _TCHAR* argv[])
{
    printf("starting...\n");

    const char *filename = "rtsp://test:[email protected]:1935/ffmpeg/0";

    AVOutputFormat *fmt;
    AVFormatContext *oc;
    AVStream *video_st;
    AVCodec *video_codec;
    double video_time;
    int flush, ret;

    /* Initialize libavcodec, and register all codecs and formats. */
    av_register_all();
    avformat_network_init();

    AVOutputFormat* oFmt = av_oformat_next(NULL);
    while (oFmt) {
        if (oFmt->video_codec == VIDEO_CODEC_ID) {
            break;
        }
        oFmt = av_oformat_next(oFmt);
    }

    if (!oFmt) {
        printf("Could not find the required output format.\n");
        exit(1);
    }

    /* allocate the output media context */
    avformat_alloc_output_context2(&oc, oFmt, "rtsp", filename);

    if (!oc) {
        printf("Could not set the output media context.\n");
        exit(1);
    }

    fmt = oc->oformat;
    if (!fmt) {
        printf("Could not create the output format.\n");
        exit(1);
    }

    video_st = NULL;

    cout << "Codec = " << avcodec_get_name(fmt->video_codec) << endl;
    if (fmt->video_codec != AV_CODEC_ID_NONE)
    {
        video_st = add_stream(oc, &video_codec, fmt->video_codec);
    }

    /* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
    if (video_st) {
        open_video(oc, video_codec, video_st);
    }

    av_dump_format(oc, 0, filename, 1);
    char errorBuff[80];

    if (!(fmt->flags & AVFMT_NOFILE)) {
        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
        if (ret < 0) {
            fprintf(stderr, "Could not open outfile '%s': %s", filename, av_make_error_string(errorBuff, 80, ret));
            return 1;
        }
    }

    flush = 0;
    while (video_st && !video_is_eof) {
        /* Compute current video time. */
        video_time = (video_st && !video_is_eof) ? video_st->pts.val * av_q2d(video_st->time_base) : INFINITY;

        if (!flush && (!video_st || video_time >= STREAM_DURATION)) {
            flush = 1;
        }

        if (video_st && !video_is_eof) {
            write_video_frame(oc, video_st, flush);
        }
    }

    if (video_st) {
        close_video(oc, video_st);
    }

    if ((fmt->flags & AVFMT_NOFILE)) {
        avio_close(oc->pb);
    }

    avformat_free_context(oc);

    printf("finished.\n");

    getchar();

    return 0;
}

Does anyone have any insights about how the packet timestamps can be successfully set?


Solution

  • I solved the integer division by zero by building ffmpeg on my Windows instance and debugging the av_interleaved_write_frame call. Turns out it was the pts not being set on the video stream object that was causing the exception.

    Adding the line below to the while loop in the main function fixed the problem:

    video_st->pts.val += av_rescale_q(1, video_st->codec->time_base, video_st->time_base);
    

    Here's a sample that works to get a H264 encoded dummy stream to a Wowza server via ffmpeg's RTSP pipeline.

    // Roughly based on: https://ffmpeg.org/doxygen/trunk/muxing_8c-source.html
    
    #include <chrono>
    #include <thread>
    #include <tchar.h>
    
    extern "C"
    {
        #include <libavcodec\avcodec.h>
        #include <libavformat\avformat.h>
        #include <libavformat\avio.h>
        #include <libswscale\swscale.h>
        #include <libavutil\time.h>
    }
    
    #pragma comment(lib,"libavformat/libavformat.a")
    #pragma comment(lib,"libavcodec/libavcodec.a")
    #pragma comment(lib,"libavutil/libavutil.a")
    #pragma comment(lib,"libswscale/libswscale.a")
    #pragma comment(lib,"x264.lib")
    #pragma comment(lib,"libswresample/libswresample.a")
    
    using namespace std;
    
    static int video_is_eof;
    
    #define STREAM_DURATION   20
    #define STREAM_FRAME_RATE 25 /* 25 images/s */
    #define STREAM_PIX_FMT   AV_PIX_FMT_YUV420P /* default pix_fmt */ //AV_PIX_FMT_NV12;
    #define VIDEO_CODEC_ID CODEC_ID_H264
    
    /* video output */
    static AVFrame *frame;
    static AVPicture src_picture, dst_picture;
    
    /* Add an output stream. */
    static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id)
    {
        AVCodecContext *c;
        AVStream *st;
    
        /* find the encoder */
        *codec = avcodec_find_encoder(codec_id);
        if (!(*codec)) {
            av_log(NULL, AV_LOG_ERROR, "Could not find encoder for '%s'.\n", avcodec_get_name(codec_id));
        }
        else {
            st = avformat_new_stream(oc, *codec);
            if (!st) {
                av_log(NULL, AV_LOG_ERROR, "Could not allocate stream.\n");
            }
            else {
                st->id = oc->nb_streams - 1;
                st->time_base.den = st->pts.den = 90000;
                st->time_base.num = st->pts.num = 1;
    
                c = st->codec;
                c->codec_id = codec_id;
                c->bit_rate = 400000;
                c->width = 352;
                c->height = 288;
                c->time_base.den = STREAM_FRAME_RATE;
                c->time_base.num = 1;
                c->gop_size = 12; /* emit one intra frame every twelve frames at most */
                c->pix_fmt = STREAM_PIX_FMT;
            }
        }
    
        return st;
    }
    
    static int open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
    {
        int ret;
        AVCodecContext *c = st->codec;
    
        /* open the codec */
        ret = avcodec_open2(c, codec, NULL);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Could not open video codec.\n", avcodec_get_name(c->codec_id));
        }
        else {
    
            /* allocate and init a re-usable frame */
            frame = av_frame_alloc();
            if (!frame) {
                av_log(NULL, AV_LOG_ERROR, "Could not allocate video frame.\n");
                ret = -1;
            }
            else {
                frame->format = c->pix_fmt;
                frame->width = c->width;
                frame->height = c->height;
    
                /* Allocate the encoded raw picture. */
                ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Could not allocate picture.\n");
                }
                else {
                    /* copy data and linesize picture pointers to frame */
                    *((AVPicture *)frame) = dst_picture;
                }
            }
        }
    
        return ret;
    }
    
    /* Prepare a dummy image. */
    static void fill_yuv_image(AVPicture *pict, int frame_index, int width, int height)
    {
        int x, y, i;
    
        i = frame_index;
    
        /* Y */
        for (y = 0; y < height; y++)
            for (x = 0; x < width; x++)
                pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
    
        /* Cb and Cr */
        for (y = 0; y < height / 2; y++) {
            for (x = 0; x < width / 2; x++) {
                pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
                pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
            }
        }
    }
    
    static int write_video_frame(AVFormatContext *oc, AVStream *st, int frameCount)
    {
        int ret = 0;
        AVCodecContext *c = st->codec;
    
        fill_yuv_image(&dst_picture, frameCount, c->width, c->height);
    
        AVPacket pkt = { 0 };
        int got_packet;
        av_init_packet(&pkt);
    
        /* encode the image */
        frame->pts = frameCount;
        ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
        if (ret < 0) {
            av_log(NULL, AV_LOG_ERROR, "Error encoding video frame.\n");
        }
        else {
            if (got_packet) {
                pkt.stream_index = st->index;
                pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, AVRounding(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
                ret = av_write_frame(oc, &pkt);
    
                if (ret < 0) {
                    av_log(NULL, AV_LOG_ERROR, "Error while writing video frame.\n");
                }
            }
        }
    
        return ret;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        printf("starting...\n");
    
        const char *url = "rtsp://test:[email protected]:1935/ffmpeg/0";
        //const char *url = "rtsp://192.168.33.19:1935/ffmpeg/0";
    
        AVFormatContext *outContext;
        AVStream *video_st;
        AVCodec *video_codec;
        int ret = 0, frameCount = 0;
    
        av_log_set_level(AV_LOG_DEBUG);
        //av_log_set_level(AV_LOG_TRACE);
    
        av_register_all();
        avformat_network_init();
    
        avformat_alloc_output_context2(&outContext, NULL, "rtsp", url);
    
        if (!outContext) {
            av_log(NULL, AV_LOG_FATAL, "Could not allocate an output context for '%s'.\n", url);
            goto end;
        }
    
        if (!outContext->oformat) {
            av_log(NULL, AV_LOG_FATAL, "Could not create the output format for '%s'.\n", url);
            goto end;
        }
    
        video_st = add_stream(outContext, &video_codec, VIDEO_CODEC_ID);
    
        /* Now that all the parameters are set, we can open the video codec and allocate the necessary encode buffers. */
        if (video_st) {
            av_log(NULL, AV_LOG_DEBUG, "Video stream codec %s.\n ", avcodec_get_name(video_st->codec->codec_id));
    
            ret = open_video(outContext, video_codec, video_st);
            if (ret < 0) {
                av_log(NULL, AV_LOG_FATAL, "Open video stream failed.\n");
                goto end;
            }
        }
        else {
            av_log(NULL, AV_LOG_FATAL, "Add video stream for the codec '%s' failed.\n", avcodec_get_name(VIDEO_CODEC_ID));
            goto end;
        }
    
        av_dump_format(outContext, 0, url, 1);
    
        ret = avformat_write_header(outContext, NULL);
        if (ret != 0) {
            av_log(NULL, AV_LOG_ERROR, "Failed to connect to RTSP server for '%s'.\n", url);
            goto end;
        }
    
        printf("Press any key to start streaming...\n");
        getchar();
    
        auto startSend = std::chrono::system_clock::now();
    
        while (video_st) {
            frameCount++;
            auto startFrame = std::chrono::system_clock::now();
    
            ret = write_video_frame(outContext, video_st, frameCount);
    
            if (ret < 0) {
                av_log(NULL, AV_LOG_ERROR, "Write video frame failed.\n", url);
                goto end;
            }
    
            auto streamDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startSend).count();
    
            printf("Elapsed time %ldms, video stream pts %ld.\n", streamDuration, video_st->pts.val);
    
            if (streamDuration / 1000.0 > STREAM_DURATION) {
                break;
            }
            else {
                auto frameDuration = std::chrono::duration_cast<chrono::milliseconds>(std::chrono::system_clock::now() - startFrame).count();
                std::this_thread::sleep_for(std::chrono::milliseconds((long)(1000.0 / STREAM_FRAME_RATE - frameDuration)));
            }
        }
    
        if (video_st) {
            avcodec_close(video_st->codec);
            av_free(src_picture.data[0]);
            av_free(dst_picture.data[0]);
            av_frame_free(&frame);
        }
    
        avformat_free_context(outContext);
    
    end:
        printf("finished.\n");
    
        getchar();
    
        return 0;
    }