Search code examples
csoxlibsox

Change audio file pitch without changing tempo using libsox


I developed a simple app that changes the pitch of an audio file with libsox (using this example). Here is my code. It works with 2 input arguments - input file path & output file path:

#include <sox.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>

int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    sox_signalinfo_t interm_signal; /* @ intermediate points in the chain. */
    sox_encodinginfo_t out_encoding = {
        SOX_ENCODING_SIGN2,
        16,
        0,
        sox_option_default,
        sox_option_default,
        sox_option_default,
        sox_false
    };
    sox_signalinfo_t out_signal = {
        16000,
        1,
        0,
        0,
        NULL
    };

assert(argc == 3);
assert(sox_init() == SOX_SUCCESS);
assert(in = sox_open_read(argv[1], NULL, NULL, NULL));
assert(out = sox_open_write(argv[2], &out_signal, &out_encoding, NULL, NULL, NULL));

chain = sox_create_effects_chain(&in->encoding, &out->encoding);

interm_signal = in->signal; /* NB: deep copy */

e = sox_create_effect(sox_find_effect("input"));
args[0] = (char *)in; 
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &in->signal) == SOX_SUCCESS);
free(e);

e = sox_create_effect(sox_find_effect("pitch"));
args[0] = "1000";
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
free(e);

e = sox_create_effect(sox_find_effect("output"));
args[0] = (char *)out;
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
free(e);

sox_flow_effects(chain, NULL, NULL);

sox_delete_effects_chain(chain);
sox_close(out);
sox_close(in);
sox_quit();

return 0;
}

But the result I got from the code above is a file with tempo changed. Here are the input and output details:

Input File     : 'input.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:11.87 = 189921 samples ~ 890.255 CDDA sectors

Input File     : 'output.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:21.15 = 338401 samples ~ 1586.25 CDDA sectors

Another thing, the sox app works fine.

sox input.wav output_app.wav pitch 1000

It generates a file with the same duration as input:

Input File     : 'output_app.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:11.87 = 189921 samples ~ 890.255 CDDA sectors

Does anyone here get the same issue? Or is there any other option that I have to provide to the sox_effect to make this effect works properly?


Solution

  • The "pitch" effect of libsox will change the audio samplerate. If you notice the samplerate after "pitch", you'll find it has been changed. In order to save audio file with the same samplerate, you'll need to add "rate" effect after "pitch" effect. Like this:

    #include <sox.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <assert.h>
    #include <string.h>
    
    int main(int argc, char * argv[])
    {
        static sox_format_t * in, * out; /* input and output files */
        sox_effects_chain_t * chain;
        sox_effect_t * e;
        char * args[10];
        sox_signalinfo_t interm_signal; /* @ intermediate points in the chain. */
        sox_encodinginfo_t out_encoding = {
            SOX_ENCODING_SIGN2,
            16,
            0,
            sox_option_default,
            sox_option_default,
            sox_option_default,
            sox_false
        };
        sox_signalinfo_t out_signal = {
            16000,
            1,
            0,
            0,
            NULL
        };
    
        assert(argc == 3);
        assert(sox_init() == SOX_SUCCESS);
        assert(in = sox_open_read(argv[1], NULL, NULL, NULL));
        assert(out = sox_open_write(argv[2], &out_signal, &out_encoding, NULL, NULL, NULL));
    
        chain = sox_create_effects_chain(&in->encoding, &out->encoding);
    
        interm_signal = in->signal; /* NB: deep copy */
    
        e = sox_create_effect(sox_find_effect("input"));
        args[0] = (char *)in; 
        assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &interm_signal, &in->signal) == SOX_SUCCESS);
        free(e);
    
        e = sox_create_effect(sox_find_effect("pitch"));
        args[0] = "1000";
        assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
        free(e);
    
        // Note: interm_signal.rate changed now, we need to rate it back
    
        e = sox_create_effect(sox_find_effect("rate"));
        args[0] = "-m";
        assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
        free(e);
    
        e = sox_create_effect(sox_find_effect("output"));
        args[0] = (char *)out;
        assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
        assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
        free(e);
    
        sox_flow_effects(chain, NULL, NULL);
    
        sox_delete_effects_chain(chain);
        sox_close(out);
        sox_close(in);
        sox_quit();
    
        return 0;
    }
    

    Optionally, you may add a "dither" effect after "rate" to obtain better result.