Increase/Decrease audio volume using FFmpeg

I'm am currently using C# invokes to call the FFmpeg APIs to handle video and audio. I have the following code in place to extract the audio from a video and write it to a file.

while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
    if (packet.stream_index == streamIndex)
    {
        while (packet.size > 0)
        {
            int frameDecoded;
            int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);

            if (frameDecoded > 0 && frameDecodedResult >= 0)
            {
                //writeAudio.WriteFrame(frame);

                packet.data += totalBytesDecoded;
                packet.size -= totalBytesDecoded;
            }
        }

        frameIndex++;
    }

    Avcodec.av_free_packet(&packet);
}

This is all working correctly. I'm currently using the FFmpeg.AutoGen project for the API access.

I want to be able to increase/decrease the volume of the audio before its written to the file, but I cannot seem to find a command or any help with this. Does it have to be done manually?

Update 1:

After receiving some help, this is the class layout I have:

public unsafe class FilterVolume
{
    #region Private Member Variables

    private AVFilterGraph* m_filterGraph = null;
    private AVFilterContext* m_aBufferSourceFilterContext = null;
    private AVFilterContext* m_aBufferSinkFilterContext = null;

    #endregion

    #region Private Constant Member Variables

    private const int EAGAIN = 11;

    #endregion

    public FilterVolume(AVCodecContext* codecContext, AVStream* stream, float volume)
    {
        CodecContext = codecContext;
        Stream = stream;
        Volume = volume;

        Initialise();
    }

    public AVFrame* Adjust(AVFrame* frame)
    {
        AVFrame* returnFilteredFrame = ffmpeg.av_frame_alloc();

        if (m_aBufferSourceFilterContext != null && m_aBufferSinkFilterContext != null)
        {
            int bufferSourceAddFrameResult = ffmpeg.av_buffersrc_add_frame(m_aBufferSourceFilterContext, frame);
            if (bufferSourceAddFrameResult < 0)
            {
            }

            int bufferSinkGetFrameResult = ffmpeg.av_buffersink_get_frame(m_aBufferSinkFilterContext, returnFilteredFrame);
            if (bufferSinkGetFrameResult < 0 && bufferSinkGetFrameResult != -EAGAIN)
            {
            }
        }

        return returnFilteredFrame;
    }

    public void Dispose()
    {
        Cleanup(m_filterGraph);
    }

    #region Private Properties

    private AVCodecContext* CodecContext { get; set; }
    private AVStream* Stream { get; set; }
    private float Volume { get; set; }

    #endregion

    #region Private Setup Helper Functions

    private void Initialise()
    {
        m_filterGraph = GetAllocatedFilterGraph();

        string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
            (int)CodecContext->sample_fmt,
            CodecContext->channel_layout,
            CodecContext->sample_rate,
            Stream->time_base.num,
            Stream->time_base.den);

        AVFilterContext* aBufferSourceFilterContext = CreateFilter("abuffer", m_filterGraph, aBufferFilterArguments);
        AVFilterContext* volumeFilterContext = CreateFilter("volume", m_filterGraph, string.Format("volume={0}", Volume));
        AVFilterContext* aBufferSinkFilterContext = CreateFilter("abuffersink", m_filterGraph);

        LinkFilter(aBufferSourceFilterContext, volumeFilterContext);
        LinkFilter(volumeFilterContext, aBufferSinkFilterContext);

        SetFilterGraphConfiguration(m_filterGraph, null);

        m_aBufferSourceFilterContext = aBufferSourceFilterContext;
        m_aBufferSinkFilterContext = aBufferSinkFilterContext;
    }

    #endregion

    #region Private Cleanup Helper Functions

    private static void Cleanup(AVFilterGraph* filterGraph)
    {
        if (filterGraph != null)
        {
            ffmpeg.avfilter_graph_free(&filterGraph);
        }
    }

    #endregion

    #region Provate Helpers

    private AVFilterGraph* GetAllocatedFilterGraph()
    {
        AVFilterGraph* filterGraph = ffmpeg.avfilter_graph_alloc();
        if (filterGraph == null)
        {
        }

        return filterGraph;
    }

    private AVFilter* GetFilterByName(string name)
    {
        AVFilter* filter = ffmpeg.avfilter_get_by_name(name);
        if (filter == null)
        {
        }

        return filter;
    }

    private void SetFilterGraphConfiguration(AVFilterGraph* filterGraph, void* logContext)
    {
        int filterGraphConfigResult = ffmpeg.avfilter_graph_config(filterGraph, logContext);
        if (filterGraphConfigResult < 0)
        {
        }
    }

    private AVFilterContext* CreateFilter(string filterName, AVFilterGraph* filterGraph, string filterArguments = null)
    {
        AVFilter* filter = GetFilterByName(filterName);
        AVFilterContext* filterContext;

        int aBufferFilterCreateResult = ffmpeg.avfilter_graph_create_filter(&filterContext, filter, filterName, filterArguments, null, filterGraph);
        if (aBufferFilterCreateResult < 0)
        {
        }

        return filterContext;
    }

    private void LinkFilter(AVFilterContext* source, AVFilterContext* destination)
    {
        int filterLinkResult = ffmpeg.avfilter_link(source, 0, destination, 0);
        if (filterLinkResult < 0)
        {
        }
    }

    #endregion
}

The Adjust() function is called after a frame is decoded. I'm currently getting a -22 error when av_buffersrc_add_frame() is called. This indicates that a parameter is invalid, but after debugging, I cannot see anything that would be causing this.

This is how the code is called:

while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
    if (packet.stream_index == streamIndex)
    {
        while (packet.size > 0)
        {
            int frameDecoded;
            int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);

            if (frameDecoded > 0 && frameDecodedResult >= 0)
            {
                AVFrame* filteredFrame = m_filterVolume.Adjust(frame);

                //writeAudio.WriteFrame(filteredFrame);

                packet.data += totalBytesDecoded;
                packet.size -= totalBytesDecoded;
            }
        }

        frameIndex++;
    }

    Avcodec.av_free_packet(&packet);
}

Update 2:

Cracked it. The "channel_layout" option in the filter argument string is supposed to be a hexadecimal. This is what the string formatting should look like:

string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout=0x{1}:sample_rate={2}:time_base={3}/{4}",
    (int)CodecContext->sample_fmt,
    CodecContext->channel_layout,
    CodecContext->sample_rate,
    Stream->time_base.num,
    Stream->time_base.den);

Solution

What you need to do is build a filter graph and process the audio stream through that graph. In your case, the graph is just INPUT ("abuffer") -> VOLUME -> OUTPUT ("abuffersink"). Here is a sample console app that demonstrates that. It's loosely based on ffmpeg samples filtering_audio, filter_audio and remuxing.

You can use it like this:

ChangeVolume.exe http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4 bunny_half.mp4 0.5

And here is the code:

class Program
{
    static unsafe void Main(string[] args)
    {
        Console.WriteLine(@"Current directory: " + Environment.CurrentDirectory);
        Console.WriteLine(@"Running in {0}-bit mode.", Environment.Is64BitProcess ? @"64" : @"32");

        // adapt this to your context
        var ffmpegPath = string.Format(@"../../../FFmpeg/bin/{0}", Environment.Is64BitProcess ? @"x64" : @"x86");
        InteropHelper.SetDllDirectory(ffmpegPath);

        int ret, i;
        if (args.Length < 3)
        {
            Console.WriteLine("usage: ChangeVolume input output <volume ratio>");
            return;
        }

        string in_filename = args[0];
        string out_filename = args[1];
        double ratio = double.Parse(args[2]); 
        ffmpeg.av_register_all();
        ffmpeg.avfilter_register_all();

        // open input file
        AVFormatContext* ifmt_ctx = null;
        InteropHelper.Check(ffmpeg.avformat_open_input(&ifmt_ctx, in_filename, null, null));

        // dump input
        ffmpeg.av_dump_format(ifmt_ctx, 0, in_filename, 0);

        // get streams info to determine audio stream index
        InteropHelper.Check(ffmpeg.avformat_find_stream_info(ifmt_ctx, null));

        // determine input decoder
        AVCodec* dec;
        int audio_stream_index = ffmpeg.av_find_best_stream(ifmt_ctx, AVMediaType.AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
        AVCodecContext* dec_ctx = ifmt_ctx->streams[audio_stream_index]->codec;

        // open input decoder
        InteropHelper.Check(ffmpeg.avcodec_open2(dec_ctx, dec, null));

        // build a filter graph
        AVFilterContext* buffersrc_ctx;
        AVFilterContext* buffersink_ctx;
        AVFilterGraph* filter_graph = init_filter_graph(ifmt_ctx, dec_ctx, audio_stream_index, &buffersrc_ctx, &buffersink_ctx, ratio);

        // prepare output
        AVFormatContext* ofmt_ctx = null;
        InteropHelper.Check(ffmpeg.avformat_alloc_output_context2(&ofmt_ctx, null, null, out_filename));
        InteropHelper.Check(ofmt_ctx);

        // create output streams
        AVCodecContext* enc_ctx = null;
        ofmt_ctx->oformat->flags |= InteropHelper.AVFMT_NOTIMESTAMPS;
        for (i = 0; i < ifmt_ctx->nb_streams; i++)
        {
            AVStream* in_stream = ifmt_ctx->streams[i];
            if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA) // skip these
                continue;

            AVStream* out_stream = ffmpeg.avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
            InteropHelper.Check(out_stream);

            InteropHelper.Check(ffmpeg.avcodec_copy_context(out_stream->codec, in_stream->codec));

            out_stream->codec->codec_tag = 0;
            if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_GLOBALHEADER) != 0)
            {
                out_stream->codec->flags |= InteropHelper.AV_CODEC_FLAG_GLOBAL_HEADER;
            }

            if (i == audio_stream_index)
            {
                // create audio encoder from audio decoder
                AVCodec* enc = ffmpeg.avcodec_find_encoder(dec_ctx->codec_id);
                InteropHelper.Check(enc);

                enc_ctx = ffmpeg.avcodec_alloc_context3(enc);
                InteropHelper.Check(enc_ctx);

                enc_ctx->sample_rate = dec_ctx->sample_rate;
                enc_ctx->channel_layout = dec_ctx->channel_layout;
                enc_ctx->channels = ffmpeg.av_get_channel_layout_nb_channels(enc_ctx->channel_layout);
                enc_ctx->sample_fmt = enc->sample_fmts[0];
                enc_ctx->time_base.num = 1;
                enc_ctx->time_base.den = enc_ctx->sample_rate;
                InteropHelper.Check(ffmpeg.avcodec_open2(enc_ctx, enc, null));
            }
        }

        // dump output
        ffmpeg.av_dump_format(ofmt_ctx, 0, out_filename, 1);

        if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
        {
            // open output file
            InteropHelper.Check(ffmpeg.avio_open(&ofmt_ctx->pb, out_filename, InteropHelper.AVIO_FLAG_WRITE));
        }

        // write output file header
        InteropHelper.Check(ffmpeg.avformat_write_header(ofmt_ctx, null));

        // read all packets and process
        AVFrame* frame = ffmpeg.av_frame_alloc();
        AVFrame* filt_frame = ffmpeg.av_frame_alloc();
        while (true)
        {
            AVStream* in_stream;
            AVStream* out_stream;
            AVPacket pkt;
            ret = ffmpeg.av_read_frame(ifmt_ctx, &pkt);
            if (ret < 0)
                break;

            in_stream = ifmt_ctx->streams[pkt.stream_index];
            if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA)
                continue;

            // audio stream? we need to pass it through our filter graph
            if (pkt.stream_index == audio_stream_index)
            {
                // decode audio (packet -> frame)
                int got_frame = 0;
                InteropHelper.Check(ffmpeg.avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt));

                if (got_frame > 0)
                {
                    // add the frame into the filter graph
                    InteropHelper.Check(ffmpeg.av_buffersrc_add_frame(buffersrc_ctx, frame));
                    while (true)
                    {
                        // get the frame out from the filter graph
                        ret = ffmpeg.av_buffersink_get_frame(buffersink_ctx, filt_frame);
                        const int EAGAIN = 11;
                        if (ret == -EAGAIN)
                            break;

                        InteropHelper.Check(ret);

                        // encode audio (frame -> packet)
                        AVPacket enc_pkt = new AVPacket();
                        int got_packet = 0;
                        InteropHelper.Check(ffmpeg.avcodec_encode_audio2(enc_ctx, &enc_pkt, filt_frame, &got_packet));
                        enc_pkt.stream_index = pkt.stream_index;
                        InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &enc_pkt));
                        ffmpeg.av_frame_unref(filt_frame);
                    }
                }
            }
            else
            {
                // write other (video) streams
                out_stream = ofmt_ctx->streams[pkt.stream_index];
                pkt.pts = ffmpeg.av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
                pkt.dts = ffmpeg.av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
                pkt.duration = ffmpeg.av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
                pkt.pos = -1;
                InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &pkt));
            }
            ffmpeg.av_packet_unref(&pkt);
        }

        // write trailer, close file
        ffmpeg.av_write_trailer(ofmt_ctx);
        ffmpeg.avformat_close_input(&ifmt_ctx);
        if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
        {
            ffmpeg.avio_closep(&ofmt_ctx->pb);
        }

        ffmpeg.avformat_free_context(ofmt_ctx);

        ffmpeg.av_frame_free(&filt_frame);
        ffmpeg.av_frame_free(&frame);

        ffmpeg.avfilter_graph_free(&filter_graph);
        return;
    }

    static unsafe AVFilterGraph* init_filter_graph(AVFormatContext* format, AVCodecContext* codec, int audio_stream_index, AVFilterContext** buffersrc_ctx, AVFilterContext** buffersink_ctx, double volumeRatio)
    {
        // create graph
        var filter_graph = ffmpeg.avfilter_graph_alloc();
        InteropHelper.Check(filter_graph);

        // add input filter
        var abuffersrc = ffmpeg.avfilter_get_by_name("abuffer");
        if (abuffersrc == null) InteropHelper.CheckTag("\x00F8FIL");
        string args = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
            (int)codec->sample_fmt,
            codec->channel_layout,
            codec->sample_rate,
            format->streams[audio_stream_index]->time_base.num,
            format->streams[audio_stream_index]->time_base.den);
        InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersrc_ctx, abuffersrc, "IN", args, null, filter_graph));

        // add volume filter
        var volume = ffmpeg.avfilter_get_by_name("volume");
        if (volume == null) InteropHelper.CheckTag("\x00F8FIL");
        AVFilterContext* volume_ctx;
        InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(&volume_ctx, volume, "VOL", "volume=" + volumeRatio.ToString(CultureInfo.InvariantCulture), null, filter_graph));

        // add output filter
        var abuffersink = ffmpeg.avfilter_get_by_name("abuffersink");
        if (abuffersink == null) InteropHelper.CheckTag("\x00F8FIL");
        InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersink_ctx, abuffersink, "OUT", "", null, filter_graph));

        // connect input -> volume -> output
        InteropHelper.Check(ffmpeg.avfilter_link(*buffersrc_ctx, 0, volume_ctx, 0));
        InteropHelper.Check(ffmpeg.avfilter_link(volume_ctx, 0, *buffersink_ctx, 0));
        InteropHelper.Check(ffmpeg.avfilter_graph_config(filter_graph, null));
        return filter_graph;
    }
}

It uses a utility InteropHelper class derived from AutoGen's:

public class InteropHelper
{
    [DllImport("kernel32", SetLastError = true)]
    public static extern bool SetDllDirectory(string lpPathName);

    public static readonly int AVERROR_EOF = -GetTag("EOF ");
    public static readonly int AVERROR_UNKNOWN = -GetTag("UNKN");
    public static readonly int AVFMT_GLOBALHEADER = 0x0040;
    public static readonly int AVFMT_NOFILE = 0x0001;
    public static readonly int AVIO_FLAG_WRITE = 2;
    public static readonly int AV_CODEC_FLAG_GLOBAL_HEADER = (1 << 22);
    public static readonly int AV_ROUND_ZERO = 0;
    public static readonly int AV_ROUND_INF = 1;
    public static readonly int AV_ROUND_DOWN = 2;
    public static readonly int AV_ROUND_UP = 3;
    public static readonly int AV_ROUND_PASS_MINMAX = 8192;
    public static readonly int AV_ROUND_NEAR_INF = 5;
    public static readonly int AVFMT_NOTIMESTAMPS = 0x0080;

    public static unsafe void Check(void* ptr)
    {
        if (ptr != null)
            return;

        const int ENOMEM = 12;
        Check(-ENOMEM);
    }

    public static unsafe void Check(IntPtr ptr)
    {
        if (ptr != IntPtr.Zero)
            return;

        Check((void*)null);
    }

    // example: "\x00F8FIL" is "Filter not found" (check libavutil/error.h)
    public static void CheckTag(string tag)
    {
        Check(-GetTag(tag));
    }

    public static int GetTag(string tag)
    {
        var bytes = new byte[4];
        for (int i = 0; i < 4; i++)
        {
            bytes[i] = (byte)tag[i];
        }
        return BitConverter.ToInt32(bytes, 0);
    }

    public static void Check(int res)
    {
        if (res >= 0)
            return;

        string err = "ffmpeg error " + res;
        string text = GetErrorText(res);
        if (!string.IsNullOrWhiteSpace(text))
        {
            err += ": " + text;
        }
        throw new Exception(err);
    }

    public static string GetErrorText(int res)
    {
        IntPtr err = Marshal.AllocHGlobal(256);
        try
        {
            ffmpeg.av_strerror(res, err, 256);
            return Marshal.PtrToStringAnsi(err);
        }
        finally
        {
            Marshal.FreeHGlobal(err);
        }
    }
}