I am working with FFMPEG 5.2 using it with C++ in Visual Studio. What I require to do is to add a SEI Unregistered message (5) to every frame of a stream, for that, I am demuxing a MP4 container, then taking the video stream, decoding every packet to get a frame, then add SEI message to every frame, encoding and remuxing a new video stream (video only) and saving the new stream to a separate container.
To add the SEI data I use this specific code:
const char* sideDataMsg = "139FB1A9446A4DEC8CBF65B1E12D2CFDHola";;
size_t sideDataSize = sizeof(sideDataMsg);
AVBufferRef* sideDataBuffer = av_buffer_alloc(20);
sideDataBuffer->data = (uint8_t*)sideDataMsg;
AVFrameSideData* sideData = av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_SEI_UNREGISTERED, sideDataBuffer);
regarding the format of the sideDataMsg I have tried several apporaches including setting it like: "139FB1A9-446A-4DEC-8CBF65B1E12D2CFD+Hola!" which is indicated to be the required format in H.264 specs, however, even when in memory I see the SEI data is added to every frame as we observe as follows:
the resulting stream/container does not shows the expected data, below my entire code, this is mostly code taken/adapted from doc/examples folder of FFMPEG library.
BTW: I also tried setting AVCodecContext->export_side_data to different bit values (0 to FF) understanding that this can indicate the encoder to export the SEI data in every frame to be encoded but no luck.
I appreciate in advance any help from you!
// FfmpegTests.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#pragma warning(disable : 4996)
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavfilter/avfilter.h"
#include "libavutil/opt.h"
#include "libavutil/avutil.h"
#include "libavutil/error.h"
#include "libavfilter/buffersrc.h"
#include "libavfilter/buffersink.h"
#include "libswscale/swscale.h"
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "swscale.lib")
#include <cstdio>
#include <iostream>
#include <chrono>
#include <thread>
static AVFormatContext* fmt_ctx;
static AVCodecContext* dec_ctx;
AVFilterGraph* filter_graph;
AVFilterContext* buffersrc_ctx;
AVFilterContext* buffersink_ctx;
static int video_stream_index = -1;
const char* filter_descr = "scale=78:24,transpose=cclock";
static int64_t last_pts = AV_NOPTS_VALUE;
// FOR SEI NAL INSERTION
const AVOutputFormat* ofmt = NULL;
AVFormatContext* ofmt_ctx = NULL;
int stream_index = 0;
int* stream_mapping = NULL;
int stream_mapping_size = 0;
int FRAMES_COUNT = 0;
const AVCodec* codec_enc;
AVCodecContext* c = NULL;
static int open_input_file(const char* filename)
{
const AVCodec* dec;
int ret;
if ((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
return ret;
}
if ((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
return ret;
}
/* select the video stream */
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
/* create decoding context */
dec_ctx = avcodec_alloc_context3(dec);
if (!dec_ctx)
return AVERROR(ENOMEM);
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
FRAMES_COUNT = fmt_ctx->streams[video_stream_index]->nb_frames;
/* init the video decoder */
if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video decoder\n");
return ret;
}
return 0;
}
static int init_filters(const char* filters_descr)
{
char args[512];
int ret = 0;
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
const AVFilter* buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut* outputs = avfilter_inout_alloc();
AVFilterInOut* inputs = avfilter_inout_alloc();
AVRational time_base = fmt_ctx->streams[video_stream_index]->time_base;
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
ret = AVERROR(ENOMEM);
goto end;
}
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt,
time_base.num, time_base.den,
dec_ctx->sample_aspect_ratio.num, dec_ctx->sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
goto end;
}
/* buffer video sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output pixel format\n");
goto end;
}
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr,
&inputs, &outputs, NULL)) < 0)
goto end;
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
goto end;
end:
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return ret;
}
static void display_frame(const AVFrame* frame, AVRational time_base)
{
int x, y;
uint8_t* p0, * p;
int64_t delay;
if (frame->pts != AV_NOPTS_VALUE) {
if (last_pts != AV_NOPTS_VALUE) {
/* sleep roughly the right amount of time;
* usleep is in microseconds, just like AV_TIME_BASE. */
AVRational timeBaseQ;
timeBaseQ.num = 1;
timeBaseQ.den = AV_TIME_BASE;
delay = av_rescale_q(frame->pts - last_pts, time_base, timeBaseQ);
if (delay > 0 && delay < 1000000)
std::this_thread::sleep_for(std::chrono::microseconds(delay));
}
last_pts = frame->pts;
}
/* Trivial ASCII grayscale display. */
p0 = frame->data[0];
puts("\033c");
for (y = 0; y < frame->height; y++) {
p = p0;
for (x = 0; x < frame->width; x++)
putchar(" .-+#"[*(p++) / 52]);
putchar('\n');
p0 += frame->linesize[0];
}
fflush(stdout);
}
int save_frame_as_jpeg(AVCodecContext* pCodecCtx, AVFrame* pFrame, int FrameNo) {
int ret = 0;
const AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);
if (!jpegCodec) {
return -1;
}
AVCodecContext* jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) {
return -1;
}
jpegContext->pix_fmt = pCodecCtx->pix_fmt;
jpegContext->height = pFrame->height;
jpegContext->width = pFrame->width;
jpegContext->time_base = AVRational{ 1,10 };
jpegContext->strict_std_compliance = FF_COMPLIANCE_UNOFFICIAL;
ret = avcodec_open2(jpegContext, jpegCodec, NULL);
if (ret < 0) {
return ret;
}
FILE* JPEGFile;
char JPEGFName[256];
AVPacket packet;
packet.data = NULL;
packet.size = 0;
av_init_packet(&packet);
int gotFrame;
ret = avcodec_send_frame(jpegContext, pFrame);
if (ret < 0) {
return ret;
}
ret = avcodec_receive_packet(jpegContext, &packet);
if (ret < 0) {
return ret;
}
sprintf(JPEGFName, "c:\\folder\\dvr-%06d.jpg", FrameNo);
JPEGFile = fopen(JPEGFName, "wb");
fwrite(packet.data, 1, packet.size, JPEGFile);
fclose(JPEGFile);
av_packet_unref(&packet);
avcodec_close(jpegContext);
return 0;
}
int initialize_output_stream(AVFormatContext* input_fctx, const char* out_filename) {
int ret = 0;
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
return -1;
}
stream_mapping_size = input_fctx->nb_streams;
stream_mapping = (int*)av_calloc(stream_mapping_size, sizeof(*stream_mapping));
if (!stream_mapping) {
ret = AVERROR(ENOMEM);
return -1;
}
for (int i = 0; i < input_fctx->nb_streams; i++) {
AVStream* out_stream;
AVStream* in_stream = input_fctx->streams[i];
AVCodecParameters* in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
stream_mapping[i] = -1;
continue;
}
stream_mapping[i] = stream_index++;
out_stream = avformat_new_stream(ofmt_ctx, NULL);
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
return ret;
}
ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
if (ret < 0) {
fprintf(stderr, "Failed to copy codec parameters\n");
return -1;
}
out_stream->codecpar->codec_tag = 0;
}
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
return -1;
}
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
return -1;
}
// ENCODER
codec_enc = avcodec_find_encoder_by_name("libx264");
if (!codec_enc) {
fprintf(stderr, "Codec '%s' not found\n", "libx264");
return -1;
}
c = avcodec_alloc_context3(codec_enc);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
c->bit_rate = dec_ctx->bit_rate;
c->width = dec_ctx->width;
c->height = dec_ctx->height;
c->time_base = dec_ctx->time_base;
c->framerate = dec_ctx->framerate;
c->gop_size = dec_ctx->gop_size;
c->max_b_frames = dec_ctx->max_b_frames;
c->pix_fmt = dec_ctx->pix_fmt;
c->time_base = AVRational{ 1,1 };
c->export_side_data = 255;
if (codec_enc->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
ret = avcodec_open2(c, codec_enc, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open codec\n");
return ret;
}
}
int add_frame_output_stream(AVFrame* frame) {
int ret;
AVPacket* pkt;
pkt = av_packet_alloc();
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame for decoding\n");
return ret;
}
while (ret >= 0) {
ret = avcodec_receive_packet(c, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return 0;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
return -1;
}
pkt->stream_index = stream_mapping[pkt->stream_index];
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
av_packet_unref(pkt);
}
return 0;
}
int main(int argc, char** argv)
{
AVFrame* frame;
AVFrame* filt_frame;
AVPacket* packet;
int ret, count = 0;
// FOR SEI NAL INSERTION
const char* out_filename;
if (argc < 2) {
fprintf(stderr, "Usage: %s file\n", argv[0]);
exit(1);
}
frame = av_frame_alloc();
filt_frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !filt_frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
if ((ret = open_input_file(argv[1])) < 0)
goto end;
if ((ret = init_filters(filter_descr)) < 0)
goto end;
out_filename = argv[2];
initialize_output_stream(fmt_ctx, out_filename);
while (count < FRAMES_COUNT)
{
if ((ret = av_read_frame(fmt_ctx, packet)) < 0)
break;
if (packet->stream_index == video_stream_index) {
ret = avcodec_send_packet(dec_ctx, packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while sending a packet to the decoder\n");
break;
}
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
goto end;
}
frame->pts = frame->best_effort_timestamp;
/* push the decoded frame into the filtergraph */
if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
/* pull filtered frames from the filtergraph */
while (1) {
ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
if (ret < 0)
goto end;
// display_frame(filt_frame, buffersink_ctx->inputs[0]->time_base);
av_frame_unref(filt_frame);
/* ret = save_frame_as_jpeg(dec_ctx, frame, dec_ctx->frame_number);
if (ret < 0)
goto end; */
//2. Add metadata to frames SEI
ret = av_frame_make_writable(frame);
if (ret < 0)
exit(1);
char sideDataSei[43] = "139FB1A9446A4DEC8CBF65B1E12D2CFDHola";
const char* sideDataMsg = "139FB1A9446A4DEC8CBF65B1E12D2CFDHola";
size_t sideDataSize = sizeof(sideDataMsg);
AVBufferRef* sideDataBuffer = av_buffer_alloc(20);
sideDataBuffer->data = (uint8_t*)sideDataMsg;
AVFrameSideData* sideData = av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_SEI_UNREGISTERED, sideDataBuffer);
ret = add_frame_output_stream(frame);
if (ret < 0)
goto end;
}
av_frame_unref(frame);
count++;
}
}
av_packet_unref(packet);
}
av_write_trailer(ofmt_ctx);
end:
avfilter_graph_free(&filter_graph);
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_frame_free(&filt_frame);
av_packet_free(&packet);
if (ret < 0 && ret != AVERROR_EOF) {
char errBuf[AV_ERROR_MAX_STRING_SIZE]{ 0 };
int res = av_strerror(ret, errBuf, AV_ERROR_MAX_STRING_SIZE);
fprintf(stderr, "Error: %s\n", errBuf);
exit(1);
}
exit(0);
}
Well, I came up with this solution from a friend, just had to add:
*av_opt_set_int(c->priv_data, "udu_sei", 1, 0);*
In the function initialize_output_stream after all parameters are set for AVCodecContext (c) that is being used for the output stream encoding.
Hope this helps someone!