Search code examples
ffmpegrtmplive-streamingflv

streaming FLV to RTMP with FFMpeg using H264 codec and C++ API to flv.js


I would like to stream live video from webcam using OpenCV using H264 codec and converting that to FLV then stream over RTMP server and catch the stream in browser with flv.js. Basically I have everything working except that I cannot read stream in flv.js. I can open stream with ffplay so I think at least most of the things are set correctly.

My current implementation:

#include <iostream>
#include <vector>

#include <opencv2/highgui.hpp>
#include <opencv2/video.hpp>

extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavutil/avutil.h>
#include <libavutil/pixdesc.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}

void stream_video(double width, double height, int fps, int camID)
{
  av_register_all();
  avformat_network_init();

  const char *output = "rtmp://localhost/live/stream";
  const AVRational dst_fps = {fps, 1};
  int ret;

  // initialize video capture device
  cv::VideoCapture cam(camID);
  if (!cam.isOpened())
  {
    std::cout << "Failed to open video capture device!" << std::endl;
    exit(1);
  }

  cam.set(cv::CAP_PROP_FRAME_WIDTH, width);
  cam.set(cv::CAP_PROP_FRAME_HEIGHT, height);

  // allocate cv::Mat with extra bytes (required by AVFrame::data)
  std::vector<uint8_t> imgbuf(height * width * 3 + 16);
  cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3);

  // open output format context
  AVFormatContext *outctx = nullptr;
  ret = avformat_alloc_output_context2(&outctx, nullptr, "flv", output);
  if (ret < 0)
  {
    std::cout << "Could not allocate output format context!" << std::endl;
    exit(1);
  }

  // open output IO context
  if (!(outctx->oformat->flags & AVFMT_NOFILE))
  {
    ret = avio_open2(&outctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr);
    if (ret < 0)
    {
      std::cout << "Could not open output IO context!" << std::endl;
      exit(1);
    }
  }

  // create new video stream
  AVCodec *codec = avcodec_find_encoder(AV_CODEC_ID_H264);
  AVStream *strm = avformat_new_stream(outctx, codec);
  AVCodecContext *avctx = avcodec_alloc_context3(codec);

  avctx->codec_id = AV_CODEC_ID_H264;
  avctx->width = width;
  avctx->height = height;
  avctx->pix_fmt = AV_PIX_FMT_YUV420P;
  avctx->framerate = dst_fps;
  avctx->time_base = av_inv_q(dst_fps);

  ret = avcodec_parameters_from_context(strm->codecpar, avctx);
  if (ret < 0)
  {
    std::cout << "Could not initialize stream codec parameters!" << std::endl;
    exit(1);
  }

  AVDictionary *opts = nullptr;
  av_dict_set(&opts, "preset", "superfast", 0);
  av_dict_set(&opts, "tune", "zerolatency", 0);

  // open video encoder
  ret = avcodec_open2(avctx, codec, &opts);
  if (ret < 0)
  {
    std::cout << "Could not open video encoder!" << std::endl;
    exit(1);
  }

  // initialize sample scaler
  SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, avctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
  if (!swsctx)
  {
    std::cout << "Could not initialize sample scaler!" << std::endl;
    exit(1);
  }

  // allocate frame buffer for encoding
  AVFrame *frame = av_frame_alloc();

  std::vector<uint8_t> framebuf(av_image_get_buffer_size(avctx->pix_fmt, width, height, 1));
  av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), avctx->pix_fmt, width, height, 1);
  frame->width = width;
  frame->height = height;
  frame->format = static_cast<int>(avctx->pix_fmt);

  // write header
  ret = avformat_write_header(outctx, nullptr);
  if (ret < 0)
  {
    std::cout << "Could not write header!" << std::endl;
    exit(1);
  }

  // encoding loop
  int64_t frame_pts = 0;
  unsigned nb_frames = 0;
  bool end_of_stream = false;

  do
  {
    nb_frames++;

    if (!end_of_stream)
    {
      cam >> image;
      // convert cv::Mat to AVFrame.
      const int stride[] = {static_cast<int>(image.step[0])};
      sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize);
      frame->pts = frame_pts++;
    }
    // encode video frame.
    AVPacket pkt = {0};
    av_init_packet(&pkt);

    ret = avcodec_send_frame(avctx, frame);
    if (ret < 0)
    {
      std::cout << "Error sending frame to codec context!" << std::endl;
      exit(1);
    }

    ret = avcodec_receive_packet(avctx, &pkt);
    if (ret < 0)
    {
      std::cout << "Error receiving packet from codec context!" << std::endl;
      exit(1);
    }

    // rescale packet timestamp.
    av_packet_rescale_ts(&pkt, avctx->time_base, strm->time_base);
    // write packet.
    pkt.pts = AV_NOPTS_VALUE;
    pkt.dts = AV_NOPTS_VALUE;
    av_interleaved_write_frame(outctx, &pkt);

    std::cout << " Frames: " << nb_frames << '\r' << std::flush;

    av_packet_unref(&pkt);
  } while (!end_of_stream);

  av_write_trailer(outctx);
  std::cout << nb_frames << " frames encoded" << std::endl;

  av_frame_free(&frame);
  avcodec_close(avctx);
  avio_close(outctx->pb);
  avformat_free_context(outctx);
}

int main()
{
  double width = 1280, height = 720, fps = 30;
  int camID = 1;

  stream_video(width, height, fps, camID);

  return 0;
}

As I said before I can successfully open the stream with ffplay rtmp://localhost/live/stream or ffplay http://localhost:8000/live/stream.flv but I cannot open the stream with flv.js player inside browser with getting errors:

flv: Invalid AVCDecoderConfigurationRecord, lack of data!
[FLVDemuxer] > Malformed Nalus near timestamp 0, NaluSize > DataSize!
[FLVDemuxer] > Malformed Nalus near timestamp 1, NaluSize > DataSize!
[FLVDemuxer] > Malformed Nalus near timestamp 2, NaluSize > DataSize!
....

I would really appreciate any help of fixing the stream to work properly with flv.js, if I stream video like ffmpeg -re -i input.mp4 -c copy -f flv rtmp://localhost/live/stream I can open stream in flv.js without any issues, so "this command" I would like to achieve inside code roughly. I also put my code on GitHub repository here if someone would like to compile the code and check on it.


Solution

  • I solve this issue myself. Main reason why this wasn't working as expected is that AVStream extradata (sps and pps headers) was empty. I needed to manually copy extradata and extradata_size from AVCodecContext (not sure why this isn't done automatically). After I done this I saw picture in flv.js for the first time. Then I just needed to calculate frame->pts correctly to get video stream working correctly. I am attaching whole working code below in case anyone else encounters same issue.

    #include <iostream>
    #include <vector>
    
    #include <opencv2/highgui.hpp>
    #include <opencv2/video.hpp>
    
    extern "C" {
    #include <libavformat/avformat.h>
    #include <libavcodec/avcodec.h>
    #include <libavutil/imgutils.h>
    #include <libswscale/swscale.h>
    }
    
    cv::VideoCapture get_device(int camID, double width, double height)
    {
      cv::VideoCapture cam(camID);
      if (!cam.isOpened())
      {
        std::cout << "Failed to open video capture device!" << std::endl;
        exit(1);
      }
    
      cam.set(cv::CAP_PROP_FRAME_WIDTH, width);
      cam.set(cv::CAP_PROP_FRAME_HEIGHT, height);
    
      return cam;
    }
    
    void initialize_avformat_context(AVFormatContext *&fctx, const char *format_name)
    {
      int ret = avformat_alloc_output_context2(&fctx, nullptr, format_name, nullptr);
      if (ret < 0)
      {
        std::cout << "Could not allocate output format context!" << std::endl;
        exit(1);
      }
    }
    
    void initialize_io_context(AVFormatContext *&fctx, const char *output)
    {
      if (!(fctx->oformat->flags & AVFMT_NOFILE))
      {
        int ret = avio_open2(&fctx->pb, output, AVIO_FLAG_WRITE, nullptr, nullptr);
        if (ret < 0)
        {
          std::cout << "Could not open output IO context!" << std::endl;
          exit(1);
        }
      }
    }
    
    void set_codec_params(AVFormatContext *&fctx, AVCodecContext *&codec_ctx, double width, double height, int fps)
    {
      const AVRational dst_fps = {fps, 1};
    
      codec_ctx->codec_tag = 0;
      codec_ctx->codec_id = AV_CODEC_ID_H264;
      codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
      codec_ctx->width = width;
      codec_ctx->height = height;
      codec_ctx->gop_size = 12;
      codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
      codec_ctx->framerate = dst_fps;
      codec_ctx->time_base = av_inv_q(dst_fps);
      if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
      {
        codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
      }
    }
    
    void initialize_codec_stream(AVStream *&stream, AVCodecContext *&codec_ctx, AVCodec *&codec)
    {
      int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx);
      if (ret < 0)
      {
        std::cout << "Could not initialize stream codec parameters!" << std::endl;
        exit(1);
      }
    
      AVDictionary *codec_options = nullptr;
      av_dict_set(&codec_options, "profile", "high", 0);
      av_dict_set(&codec_options, "preset", "superfast", 0);
      av_dict_set(&codec_options, "tune", "zerolatency", 0);
    
      // open video encoder
      ret = avcodec_open2(codec_ctx, codec, &codec_options);
      if (ret < 0)
      {
        std::cout << "Could not open video encoder!" << std::endl;
        exit(1);
      }
    }
    
    SwsContext *initialize_sample_scaler(AVCodecContext *codec_ctx, double width, double height)
    {
      SwsContext *swsctx = sws_getContext(width, height, AV_PIX_FMT_BGR24, width, height, codec_ctx->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
      if (!swsctx)
      {
        std::cout << "Could not initialize sample scaler!" << std::endl;
        exit(1);
      }
    
      return swsctx;
    }
    
    AVFrame *allocate_frame_buffer(AVCodecContext *codec_ctx, double width, double height)
    {
      AVFrame *frame = av_frame_alloc();
    
      std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
      av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
      frame->width = width;
      frame->height = height;
      frame->format = static_cast<int>(codec_ctx->pix_fmt);
    
      return frame;
    }
    
    void write_frame(AVCodecContext *codec_ctx, AVFormatContext *fmt_ctx, AVFrame *frame)
    {
      AVPacket pkt = {0};
      av_init_packet(&pkt);
    
      int ret = avcodec_send_frame(codec_ctx, frame);
      if (ret < 0)
      {
        std::cout << "Error sending frame to codec context!" << std::endl;
        exit(1);
      }
    
      ret = avcodec_receive_packet(codec_ctx, &pkt);
      if (ret < 0)
      {
        std::cout << "Error receiving packet from codec context!" << std::endl;
        exit(1);
      }
    
      av_interleaved_write_frame(fmt_ctx, &pkt);
      av_packet_unref(&pkt);
    }
    
    void stream_video(double width, double height, int fps, int camID)
    {
      av_register_all();
      avformat_network_init();
    
      const char *output = "rtmp://localhost/live/stream";
      int ret;
      auto cam = get_device(camID, width, height);
      std::vector<uint8_t> imgbuf(height * width * 3 + 16);
      cv::Mat image(height, width, CV_8UC3, imgbuf.data(), width * 3);
      AVFormatContext *ofmt_ctx = nullptr;
      AVCodec *out_codec = nullptr;
      AVStream *out_stream = nullptr;
      AVCodecContext *out_codec_ctx = nullptr;
    
      initialize_avformat_context(ofmt_ctx, "flv");
      initialize_io_context(ofmt_ctx, output);
    
      out_codec = avcodec_find_encoder(AV_CODEC_ID_H264);
      out_stream = avformat_new_stream(ofmt_ctx, out_codec);
      out_codec_ctx = avcodec_alloc_context3(out_codec);
    
      set_codec_params(ofmt_ctx, out_codec_ctx, width, height, fps);
      initialize_codec_stream(out_stream, out_codec_ctx, out_codec);
    
      out_stream->codecpar->extradata = out_codec_ctx->extradata;
      out_stream->codecpar->extradata_size = out_codec_ctx->extradata_size;
    
      av_dump_format(ofmt_ctx, 0, output, 1);
    
      auto *swsctx = initialize_sample_scaler(out_codec_ctx, width, height);
      auto *frame = allocate_frame_buffer(out_codec_ctx, width, height);
    
      int cur_size;
      uint8_t *cur_ptr;
    
      ret = avformat_write_header(ofmt_ctx, nullptr);
      if (ret < 0)
      {
        std::cout << "Could not write header!" << std::endl;
        exit(1);
      }
    
      bool end_of_stream = false;
      do
      {
        cam >> image;
        const int stride[] = {static_cast<int>(image.step[0])};
        sws_scale(swsctx, &image.data, stride, 0, image.rows, frame->data, frame->linesize);
        frame->pts += av_rescale_q(1, out_codec_ctx->time_base, out_stream->time_base);
        write_frame(out_codec_ctx, ofmt_ctx, frame);
      } while (!end_of_stream);
    
      av_write_trailer(ofmt_ctx);
    
      av_frame_free(&frame);
      avcodec_close(out_codec_ctx);
      avio_close(ofmt_ctx->pb);
      avformat_free_context(ofmt_ctx);
    }
    
    int main()
    {
      // av_log_set_level(AV_LOG_DEBUG);
      double width = 1280, height = 720;
      int camID = 1, fps = 25;
    
      stream_video(width, height, fps, camID);
    
      return 0;
    }
    

    That's it!