Search code examples
c#ffmpegvolume

Increase/Decrease audio volume using FFmpeg


I'm am currently using C# invokes to call the FFmpeg APIs to handle video and audio. I have the following code in place to extract the audio from a video and write it to a file.

while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
    if (packet.stream_index == streamIndex)
    {
        while (packet.size > 0)
        {
            int frameDecoded;
            int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);

            if (frameDecoded > 0 && frameDecodedResult >= 0)
            {
                //writeAudio.WriteFrame(frame);

                packet.data += totalBytesDecoded;
                packet.size -= totalBytesDecoded;
            }
        }

        frameIndex++;
    }

    Avcodec.av_free_packet(&packet);
}

This is all working correctly. I'm currently using the FFmpeg.AutoGen project for the API access.

I want to be able to increase/decrease the volume of the audio before its written to the file, but I cannot seem to find a command or any help with this. Does it have to be done manually?

Update 1:

After receiving some help, this is the class layout I have:

public unsafe class FilterVolume
{
    #region Private Member Variables

    private AVFilterGraph* m_filterGraph = null;
    private AVFilterContext* m_aBufferSourceFilterContext = null;
    private AVFilterContext* m_aBufferSinkFilterContext = null;

    #endregion

    #region Private Constant Member Variables

    private const int EAGAIN = 11;

    #endregion

    public FilterVolume(AVCodecContext* codecContext, AVStream* stream, float volume)
    {
        CodecContext = codecContext;
        Stream = stream;
        Volume = volume;

        Initialise();
    }

    public AVFrame* Adjust(AVFrame* frame)
    {
        AVFrame* returnFilteredFrame = ffmpeg.av_frame_alloc();

        if (m_aBufferSourceFilterContext != null && m_aBufferSinkFilterContext != null)
        {
            int bufferSourceAddFrameResult = ffmpeg.av_buffersrc_add_frame(m_aBufferSourceFilterContext, frame);
            if (bufferSourceAddFrameResult < 0)
            {
            }

            int bufferSinkGetFrameResult = ffmpeg.av_buffersink_get_frame(m_aBufferSinkFilterContext, returnFilteredFrame);
            if (bufferSinkGetFrameResult < 0 && bufferSinkGetFrameResult != -EAGAIN)
            {
            }
        }

        return returnFilteredFrame;
    }

    public void Dispose()
    {
        Cleanup(m_filterGraph);
    }

    #region Private Properties

    private AVCodecContext* CodecContext { get; set; }
    private AVStream* Stream { get; set; }
    private float Volume { get; set; }

    #endregion

    #region Private Setup Helper Functions

    private void Initialise()
    {
        m_filterGraph = GetAllocatedFilterGraph();

        string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
            (int)CodecContext->sample_fmt,
            CodecContext->channel_layout,
            CodecContext->sample_rate,
            Stream->time_base.num,
            Stream->time_base.den);

        AVFilterContext* aBufferSourceFilterContext = CreateFilter("abuffer", m_filterGraph, aBufferFilterArguments);
        AVFilterContext* volumeFilterContext = CreateFilter("volume", m_filterGraph, string.Format("volume={0}", Volume));
        AVFilterContext* aBufferSinkFilterContext = CreateFilter("abuffersink", m_filterGraph);

        LinkFilter(aBufferSourceFilterContext, volumeFilterContext);
        LinkFilter(volumeFilterContext, aBufferSinkFilterContext);

        SetFilterGraphConfiguration(m_filterGraph, null);

        m_aBufferSourceFilterContext = aBufferSourceFilterContext;
        m_aBufferSinkFilterContext = aBufferSinkFilterContext;
    }

    #endregion

    #region Private Cleanup Helper Functions

    private static void Cleanup(AVFilterGraph* filterGraph)
    {
        if (filterGraph != null)
        {
            ffmpeg.avfilter_graph_free(&filterGraph);
        }
    }

    #endregion

    #region Provate Helpers

    private AVFilterGraph* GetAllocatedFilterGraph()
    {
        AVFilterGraph* filterGraph = ffmpeg.avfilter_graph_alloc();
        if (filterGraph == null)
        {
        }

        return filterGraph;
    }

    private AVFilter* GetFilterByName(string name)
    {
        AVFilter* filter = ffmpeg.avfilter_get_by_name(name);
        if (filter == null)
        {
        }

        return filter;
    }

    private void SetFilterGraphConfiguration(AVFilterGraph* filterGraph, void* logContext)
    {
        int filterGraphConfigResult = ffmpeg.avfilter_graph_config(filterGraph, logContext);
        if (filterGraphConfigResult < 0)
        {
        }
    }

    private AVFilterContext* CreateFilter(string filterName, AVFilterGraph* filterGraph, string filterArguments = null)
    {
        AVFilter* filter = GetFilterByName(filterName);
        AVFilterContext* filterContext;

        int aBufferFilterCreateResult = ffmpeg.avfilter_graph_create_filter(&filterContext, filter, filterName, filterArguments, null, filterGraph);
        if (aBufferFilterCreateResult < 0)
        {
        }

        return filterContext;
    }

    private void LinkFilter(AVFilterContext* source, AVFilterContext* destination)
    {
        int filterLinkResult = ffmpeg.avfilter_link(source, 0, destination, 0);
        if (filterLinkResult < 0)
        {
        }
    }

    #endregion
}

The Adjust() function is called after a frame is decoded. I'm currently getting a -22 error when av_buffersrc_add_frame() is called. This indicates that a parameter is invalid, but after debugging, I cannot see anything that would be causing this.

This is how the code is called:

while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
    if (packet.stream_index == streamIndex)
    {
        while (packet.size > 0)
        {
            int frameDecoded;
            int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);

            if (frameDecoded > 0 && frameDecodedResult >= 0)
            {
                AVFrame* filteredFrame = m_filterVolume.Adjust(frame);

                //writeAudio.WriteFrame(filteredFrame);

                packet.data += totalBytesDecoded;
                packet.size -= totalBytesDecoded;
            }
        }

        frameIndex++;
    }

    Avcodec.av_free_packet(&packet);
}

Update 2:

Cracked it. The "channel_layout" option in the filter argument string is supposed to be a hexadecimal. This is what the string formatting should look like:

string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout=0x{1}:sample_rate={2}:time_base={3}/{4}",
    (int)CodecContext->sample_fmt,
    CodecContext->channel_layout,
    CodecContext->sample_rate,
    Stream->time_base.num,
    Stream->time_base.den);

Solution

  • What you need to do is build a filter graph and process the audio stream through that graph. In your case, the graph is just INPUT ("abuffer") -> VOLUME -> OUTPUT ("abuffersink"). Here is a sample console app that demonstrates that. It's loosely based on ffmpeg samples filtering_audio, filter_audio and remuxing.

    You can use it like this:

    ChangeVolume.exe http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4 bunny_half.mp4 0.5
    

    And here is the code:

    class Program
    {
        static unsafe void Main(string[] args)
        {
            Console.WriteLine(@"Current directory: " + Environment.CurrentDirectory);
            Console.WriteLine(@"Running in {0}-bit mode.", Environment.Is64BitProcess ? @"64" : @"32");
    
            // adapt this to your context
            var ffmpegPath = string.Format(@"../../../FFmpeg/bin/{0}", Environment.Is64BitProcess ? @"x64" : @"x86");
            InteropHelper.SetDllDirectory(ffmpegPath);
    
            int ret, i;
            if (args.Length < 3)
            {
                Console.WriteLine("usage: ChangeVolume input output <volume ratio>");
                return;
            }
    
            string in_filename = args[0];
            string out_filename = args[1];
            double ratio = double.Parse(args[2]); 
            ffmpeg.av_register_all();
            ffmpeg.avfilter_register_all();
    
            // open input file
            AVFormatContext* ifmt_ctx = null;
            InteropHelper.Check(ffmpeg.avformat_open_input(&ifmt_ctx, in_filename, null, null));
    
            // dump input
            ffmpeg.av_dump_format(ifmt_ctx, 0, in_filename, 0);
    
            // get streams info to determine audio stream index
            InteropHelper.Check(ffmpeg.avformat_find_stream_info(ifmt_ctx, null));
    
            // determine input decoder
            AVCodec* dec;
            int audio_stream_index = ffmpeg.av_find_best_stream(ifmt_ctx, AVMediaType.AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
            AVCodecContext* dec_ctx = ifmt_ctx->streams[audio_stream_index]->codec;
    
            // open input decoder
            InteropHelper.Check(ffmpeg.avcodec_open2(dec_ctx, dec, null));
    
            // build a filter graph
            AVFilterContext* buffersrc_ctx;
            AVFilterContext* buffersink_ctx;
            AVFilterGraph* filter_graph = init_filter_graph(ifmt_ctx, dec_ctx, audio_stream_index, &buffersrc_ctx, &buffersink_ctx, ratio);
    
            // prepare output
            AVFormatContext* ofmt_ctx = null;
            InteropHelper.Check(ffmpeg.avformat_alloc_output_context2(&ofmt_ctx, null, null, out_filename));
            InteropHelper.Check(ofmt_ctx);
    
            // create output streams
            AVCodecContext* enc_ctx = null;
            ofmt_ctx->oformat->flags |= InteropHelper.AVFMT_NOTIMESTAMPS;
            for (i = 0; i < ifmt_ctx->nb_streams; i++)
            {
                AVStream* in_stream = ifmt_ctx->streams[i];
                if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA) // skip these
                    continue;
    
                AVStream* out_stream = ffmpeg.avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
                InteropHelper.Check(out_stream);
    
                InteropHelper.Check(ffmpeg.avcodec_copy_context(out_stream->codec, in_stream->codec));
    
                out_stream->codec->codec_tag = 0;
                if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_GLOBALHEADER) != 0)
                {
                    out_stream->codec->flags |= InteropHelper.AV_CODEC_FLAG_GLOBAL_HEADER;
                }
    
                if (i == audio_stream_index)
                {
                    // create audio encoder from audio decoder
                    AVCodec* enc = ffmpeg.avcodec_find_encoder(dec_ctx->codec_id);
                    InteropHelper.Check(enc);
    
                    enc_ctx = ffmpeg.avcodec_alloc_context3(enc);
                    InteropHelper.Check(enc_ctx);
    
                    enc_ctx->sample_rate = dec_ctx->sample_rate;
                    enc_ctx->channel_layout = dec_ctx->channel_layout;
                    enc_ctx->channels = ffmpeg.av_get_channel_layout_nb_channels(enc_ctx->channel_layout);
                    enc_ctx->sample_fmt = enc->sample_fmts[0];
                    enc_ctx->time_base.num = 1;
                    enc_ctx->time_base.den = enc_ctx->sample_rate;
                    InteropHelper.Check(ffmpeg.avcodec_open2(enc_ctx, enc, null));
                }
            }
    
            // dump output
            ffmpeg.av_dump_format(ofmt_ctx, 0, out_filename, 1);
    
            if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
            {
                // open output file
                InteropHelper.Check(ffmpeg.avio_open(&ofmt_ctx->pb, out_filename, InteropHelper.AVIO_FLAG_WRITE));
            }
    
            // write output file header
            InteropHelper.Check(ffmpeg.avformat_write_header(ofmt_ctx, null));
    
            // read all packets and process
            AVFrame* frame = ffmpeg.av_frame_alloc();
            AVFrame* filt_frame = ffmpeg.av_frame_alloc();
            while (true)
            {
                AVStream* in_stream;
                AVStream* out_stream;
                AVPacket pkt;
                ret = ffmpeg.av_read_frame(ifmt_ctx, &pkt);
                if (ret < 0)
                    break;
    
                in_stream = ifmt_ctx->streams[pkt.stream_index];
                if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA)
                    continue;
    
                // audio stream? we need to pass it through our filter graph
                if (pkt.stream_index == audio_stream_index)
                {
                    // decode audio (packet -> frame)
                    int got_frame = 0;
                    InteropHelper.Check(ffmpeg.avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt));
    
                    if (got_frame > 0)
                    {
                        // add the frame into the filter graph
                        InteropHelper.Check(ffmpeg.av_buffersrc_add_frame(buffersrc_ctx, frame));
                        while (true)
                        {
                            // get the frame out from the filter graph
                            ret = ffmpeg.av_buffersink_get_frame(buffersink_ctx, filt_frame);
                            const int EAGAIN = 11;
                            if (ret == -EAGAIN)
                                break;
    
                            InteropHelper.Check(ret);
    
                            // encode audio (frame -> packet)
                            AVPacket enc_pkt = new AVPacket();
                            int got_packet = 0;
                            InteropHelper.Check(ffmpeg.avcodec_encode_audio2(enc_ctx, &enc_pkt, filt_frame, &got_packet));
                            enc_pkt.stream_index = pkt.stream_index;
                            InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &enc_pkt));
                            ffmpeg.av_frame_unref(filt_frame);
                        }
                    }
                }
                else
                {
                    // write other (video) streams
                    out_stream = ofmt_ctx->streams[pkt.stream_index];
                    pkt.pts = ffmpeg.av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
                    pkt.dts = ffmpeg.av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
                    pkt.duration = ffmpeg.av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
                    pkt.pos = -1;
                    InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &pkt));
                }
                ffmpeg.av_packet_unref(&pkt);
            }
    
            // write trailer, close file
            ffmpeg.av_write_trailer(ofmt_ctx);
            ffmpeg.avformat_close_input(&ifmt_ctx);
            if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
            {
                ffmpeg.avio_closep(&ofmt_ctx->pb);
            }
    
            ffmpeg.avformat_free_context(ofmt_ctx);
    
            ffmpeg.av_frame_free(&filt_frame);
            ffmpeg.av_frame_free(&frame);
    
            ffmpeg.avfilter_graph_free(&filter_graph);
            return;
        }
    
        static unsafe AVFilterGraph* init_filter_graph(AVFormatContext* format, AVCodecContext* codec, int audio_stream_index, AVFilterContext** buffersrc_ctx, AVFilterContext** buffersink_ctx, double volumeRatio)
        {
            // create graph
            var filter_graph = ffmpeg.avfilter_graph_alloc();
            InteropHelper.Check(filter_graph);
    
            // add input filter
            var abuffersrc = ffmpeg.avfilter_get_by_name("abuffer");
            if (abuffersrc == null) InteropHelper.CheckTag("\x00F8FIL");
            string args = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
                (int)codec->sample_fmt,
                codec->channel_layout,
                codec->sample_rate,
                format->streams[audio_stream_index]->time_base.num,
                format->streams[audio_stream_index]->time_base.den);
            InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersrc_ctx, abuffersrc, "IN", args, null, filter_graph));
    
            // add volume filter
            var volume = ffmpeg.avfilter_get_by_name("volume");
            if (volume == null) InteropHelper.CheckTag("\x00F8FIL");
            AVFilterContext* volume_ctx;
            InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(&volume_ctx, volume, "VOL", "volume=" + volumeRatio.ToString(CultureInfo.InvariantCulture), null, filter_graph));
    
            // add output filter
            var abuffersink = ffmpeg.avfilter_get_by_name("abuffersink");
            if (abuffersink == null) InteropHelper.CheckTag("\x00F8FIL");
            InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersink_ctx, abuffersink, "OUT", "", null, filter_graph));
    
            // connect input -> volume -> output
            InteropHelper.Check(ffmpeg.avfilter_link(*buffersrc_ctx, 0, volume_ctx, 0));
            InteropHelper.Check(ffmpeg.avfilter_link(volume_ctx, 0, *buffersink_ctx, 0));
            InteropHelper.Check(ffmpeg.avfilter_graph_config(filter_graph, null));
            return filter_graph;
        }
    }
    

    It uses a utility InteropHelper class derived from AutoGen's:

    public class InteropHelper
    {
        [DllImport("kernel32", SetLastError = true)]
        public static extern bool SetDllDirectory(string lpPathName);
    
        public static readonly int AVERROR_EOF = -GetTag("EOF ");
        public static readonly int AVERROR_UNKNOWN = -GetTag("UNKN");
        public static readonly int AVFMT_GLOBALHEADER = 0x0040;
        public static readonly int AVFMT_NOFILE = 0x0001;
        public static readonly int AVIO_FLAG_WRITE = 2;
        public static readonly int AV_CODEC_FLAG_GLOBAL_HEADER = (1 << 22);
        public static readonly int AV_ROUND_ZERO = 0;
        public static readonly int AV_ROUND_INF = 1;
        public static readonly int AV_ROUND_DOWN = 2;
        public static readonly int AV_ROUND_UP = 3;
        public static readonly int AV_ROUND_PASS_MINMAX = 8192;
        public static readonly int AV_ROUND_NEAR_INF = 5;
        public static readonly int AVFMT_NOTIMESTAMPS = 0x0080;
    
        public static unsafe void Check(void* ptr)
        {
            if (ptr != null)
                return;
    
            const int ENOMEM = 12;
            Check(-ENOMEM);
        }
    
        public static unsafe void Check(IntPtr ptr)
        {
            if (ptr != IntPtr.Zero)
                return;
    
            Check((void*)null);
        }
    
        // example: "\x00F8FIL" is "Filter not found" (check libavutil/error.h)
        public static void CheckTag(string tag)
        {
            Check(-GetTag(tag));
        }
    
        public static int GetTag(string tag)
        {
            var bytes = new byte[4];
            for (int i = 0; i < 4; i++)
            {
                bytes[i] = (byte)tag[i];
            }
            return BitConverter.ToInt32(bytes, 0);
        }
    
        public static void Check(int res)
        {
            if (res >= 0)
                return;
    
            string err = "ffmpeg error " + res;
            string text = GetErrorText(res);
            if (!string.IsNullOrWhiteSpace(text))
            {
                err += ": " + text;
            }
            throw new Exception(err);
        }
    
        public static string GetErrorText(int res)
        {
            IntPtr err = Marshal.AllocHGlobal(256);
            try
            {
                ffmpeg.av_strerror(res, err, 256);
                return Marshal.PtrToStringAnsi(err);
            }
            finally
            {
                Marshal.FreeHGlobal(err);
            }
        }
    }