In C++ code, I can correctly save a serial of images(opencv's cv::Mat
) to a mp4 file by using ffmpeg
library, see the question and answer here: avformat_write_header() function call crashed when I try to save several RGB data to a output.mp4 file
Now here comes another question:
Rotem' answer in that question can have the output.mp4
saved correctly. When play the mp4 file, I see the frames(OpenCV's cv::Mat
image) shows in a const rate.
What I can do if I have got the frames not in a const frequency, for example, the I got the first frame at the 0ms
, and the second frame at the 50ms
the third frame at 75ms
, so that the each frame as associated time stamp, for example, those time stamp array are something like below:
int timestamp[100] = {0, 50, 75, ...};
What is the method to modify the Rotem's answer to reflect this? It looks like I have to change the pts
field of each frame
. Because I just test the code, if I change this:
yuvFrame->pts = av_rescale_q(frame_count*frame_count, outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
// note I change from frame_count to frame_count*frame_count
Then the output.mp4
plays slower and slower, because the later frame has large pts
values.
Thanks.
EDIT
This is the code I'm currently used:
#include <iostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavutil/imgutils.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
}
#include<cstdlib> // to generate time stamps
using namespace std;
using namespace cv;
int main()
{
// Set up input frames as BGR byte arrays
vector<Mat> frames;
int width = 640;
int height = 480;
int num_frames = 100;
Scalar black(0, 0, 0);
Scalar white(255, 255, 255);
int font = FONT_HERSHEY_SIMPLEX;
double font_scale = 1.0;
int thickness = 2;
for (int i = 0; i < num_frames; i++) {
Mat frame = Mat::zeros(height, width, CV_8UC3);
putText(frame, std::to_string(i), Point(width / 2 - 50, height / 2), font, font_scale, white, thickness);
frames.push_back(frame);
}
// generate a serial of time stamps which is used to set the PTS value
// suppose they are in ms unit, the time interval is between 30ms to 59ms
vector<int> timestamps;
for (int i = 0; i < num_frames; i++) {
int timestamp;
if (i == 0)
timestamp = 0;
else
{
int random = 30 + (rand() % 30);
timestamp = timestamps[i-0] + random;
}
timestamps.push_back(timestamp);
}
// Populate frames with BGR byte arrays
// Initialize FFmpeg
//av_register_all();
// Set up output file
AVFormatContext* outFormatCtx = nullptr;
//AVCodec* outCodec = nullptr;
AVCodecContext* outCodecCtx = nullptr;
//AVStream* outStream = nullptr;
//AVPacket outPacket;
const char* outFile = "output.mp4";
int outWidth = frames[0].cols;
int outHeight = frames[0].rows;
int fps = 25;
// Open the output file context
avformat_alloc_output_context2(&outFormatCtx, nullptr, nullptr, outFile);
if (!outFormatCtx) {
cerr << "Error: Could not allocate output format context" << endl;
return -1;
}
// Open the output file
if (avio_open(&outFormatCtx->pb, outFile, AVIO_FLAG_WRITE) < 0) {
cerr << "Error opening output file" << std::endl;
return -1;
}
// Set up output codec
const AVCodec* outCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!outCodec) {
cerr << "Error: Could not find H.264 codec" << endl;
return -1;
}
outCodecCtx = avcodec_alloc_context3(outCodec);
if (!outCodecCtx) {
cerr << "Error: Could not allocate output codec context" << endl;
return -1;
}
outCodecCtx->codec_id = AV_CODEC_ID_H264;
outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outCodecCtx->width = outWidth;
outCodecCtx->height = outHeight;
outCodecCtx->time_base = { 1, fps*1000 }; // 25000
outCodecCtx->framerate = {fps, 1}; // 25
outCodecCtx->bit_rate = 4000000;
//https://github.com/leandromoreira/ffmpeg-libav-tutorial
//We set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it can use the global headers.
if (outFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
{
outCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; //
}
// Open output codec
if (avcodec_open2(outCodecCtx, outCodec, nullptr) < 0) {
cerr << "Error: Could not open output codec" << endl;
return -1;
}
// Create output stream
AVStream* outStream = avformat_new_stream(outFormatCtx, outCodec);
if (!outStream) {
cerr << "Error: Could not allocate output stream" << endl;
return -1;
}
// Configure output stream parameters (e.g., time base, codec parameters, etc.)
// ...
// Connect output stream to format context
outStream->codecpar->codec_id = outCodecCtx->codec_id;
outStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
outStream->codecpar->width = outCodecCtx->width;
outStream->codecpar->height = outCodecCtx->height;
outStream->codecpar->format = outCodecCtx->pix_fmt;
outStream->time_base = outCodecCtx->time_base;
int ret = avcodec_parameters_from_context(outStream->codecpar, outCodecCtx);
if (ret < 0) {
cerr << "Error: Could not copy codec parameters to output stream" << endl;
return -1;
}
outStream->avg_frame_rate = outCodecCtx->framerate;
//outStream->id = outFormatCtx->nb_streams++; <--- We shouldn't modify outStream->id
ret = avformat_write_header(outFormatCtx, nullptr);
if (ret < 0) {
cerr << "Error: Could not write output header" << endl;
return -1;
}
// Convert frames to YUV format and write to output file
int frame_count = -1;
for (const auto& frame : frames) {
frame_count++;
AVFrame* yuvFrame = av_frame_alloc();
if (!yuvFrame) {
cerr << "Error: Could not allocate YUV frame" << endl;
return -1;
}
av_image_alloc(yuvFrame->data, yuvFrame->linesize, outWidth, outHeight, AV_PIX_FMT_YUV420P, 32);
yuvFrame->width = outWidth;
yuvFrame->height = outHeight;
yuvFrame->format = AV_PIX_FMT_YUV420P;
// Convert BGR frame to YUV format
Mat yuvMat;
cvtColor(frame, yuvMat, COLOR_BGR2YUV_I420);
memcpy(yuvFrame->data[0], yuvMat.data, outWidth * outHeight);
memcpy(yuvFrame->data[1], yuvMat.data + outWidth * outHeight, outWidth * outHeight / 4);
memcpy(yuvFrame->data[2], yuvMat.data + outWidth * outHeight * 5 / 4, outWidth * outHeight / 4);
// Set up output packet
//av_init_packet(&outPacket); //error C4996: 'av_init_packet': was declared deprecated
AVPacket* outPacket = av_packet_alloc();
memset(outPacket, 0, sizeof(outPacket)); //Use memset instead of av_init_packet (probably unnecessary).
//outPacket->data = nullptr;
//outPacket->size = 0;
yuvFrame->pts = av_rescale_q(timestamps[frame_count], outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
// Encode frame and write to output file
int ret = avcodec_send_frame(outCodecCtx, yuvFrame);
if (ret < 0) {
cerr << "Error: Could not send frame to output codec" << endl;
return -1;
}
while (ret >= 0) {
ret = avcodec_receive_packet(outCodecCtx, outPacket);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
cerr << "Error: Could not receive packet from output codec" << endl;
return -1;
}
//av_packet_rescale_ts(&outPacket, outCodecCtx->time_base, outStream->time_base);
outPacket->stream_index = outStream->index;
outPacket->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base); // Set packet duration
ret = av_interleaved_write_frame(outFormatCtx, outPacket);
av_packet_unref(outPacket);
if (ret < 0) {
cerr << "Error: Could not write packet to output file" << endl;
return -1;
}
}
av_frame_free(&yuvFrame);
}
// Flush the encoder
ret = avcodec_send_frame(outCodecCtx, nullptr);
if (ret < 0) {
std::cerr << "Error flushing encoder: " << std::endl;
return -1;
}
while (ret >= 0) {
AVPacket* pkt = av_packet_alloc();
if (!pkt) {
std::cerr << "Error allocating packet" << std::endl;
return -1;
}
ret = avcodec_receive_packet(outCodecCtx, pkt);
// Write the packet to the output file
if (ret == 0)
{
pkt->stream_index = outStream->index;
pkt->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base); // <---- Set packet duration
ret = av_interleaved_write_frame(outFormatCtx, pkt);
av_packet_unref(pkt);
if (ret < 0) {
std::cerr << "Error writing packet to output file: " << std::endl;
return -1;
}
}
}
// Write output trailer
av_write_trailer(outFormatCtx);
// Clean up
avcodec_close(outCodecCtx);
avcodec_free_context(&outCodecCtx);
avformat_free_context(outFormatCtx);
return 0;
}
Especially, I have those changes to the original Rotem's answer:
Fist, I have some code to generate a time stamp array:
// generate a serial of time stamps which is used to set the PTS value
// suppose they are in ms unit, the time interval is between 30ms to 59ms
vector<int> timestamps;
for (int i = 0; i < num_frames; i++) {
int timestamp;
if (i == 0)
timestamp = 0;
else
{
int random = 30 + (rand() % 30);
timestamp = timestamps[i-0] + random;
}
timestamps.push_back(timestamp);
}
Second, I just set the PTS by those values:
yuvFrame->pts = av_rescale_q(timestamps[frame_count], outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp
Note that I have set the fps like below:
outCodecCtx->time_base = { 1, fps*1000 }; // 25000
outCodecCtx->framerate = {fps, 1}; // 25
Now, when I run the program, I got a lot of warnings in the console:
[libx264 @ 0000022e7fa621c0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0000022e7fa621c0] profile High, level 3.0, 4:2:0, 8-bit
[libx264 @ 0000022e7fa621c0] 264 - core 164 r3094M bfc87b7 - H.264/MPEG-4 AVC codec - Copyleft 2003-2022 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=6 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=abr mbtree=1 bitrate=4000 ratetol=1.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] non-strictly-monotonic PTS
[libx264 @ 0000022e7fa621c0] invalid DTS: PTS is less than DTS
[mp4 @ 0000022e090b2300] pts (592) < dts (1129348497) in stream 0
Error: Could not write packet to output file
Any ideas? Thanks.
I think I have found the answer, there is a bug in my code:
// generate a serial of time stamps which is used to set the PTS value
// suppose they are in ms unit, the time interval is between 30ms to 59ms
vector<int> timestamps;
for (int i = 0; i < num_frames; i++) {
int timestamp;
if (i == 0)
timestamp = 0;
else
{
int random = 30 + (rand() % 30);
timestamp = timestamps[i-0] + random;
}
timestamps.push_back(timestamp);
}
Here, the bug is the timestamps[i-0]
, it should be timestamps[i-1]
, with such changes, I can get a correctly monotonically increasing array of timestamps, and I don't have the non-strictly-monotonic PTS
like issue now when running the fixed code.
This discussion gives me many help: Discussion between ollydbg23 and Rotem | chat.stackoverflow.com
Especially the sentence:
When encoding, we set the PTS of the frame. The PTS and DTS of the packets are filled automatically (from the frame PTS). All you have to do is pts = pts_list[frame_count] and scale the pts according to the timebase of the stream.
So, many thanks to @Rotem.
I have post the final working code here in github gist: