Search code examples
caudioportaudioflite

Getting flite to output audio with PortAudio


I am trying to get the flite speech synthesis library to work on my Mac, but my sound architecture isn't supported within the flite library. To fix that problem, I am using PortAudio to playback the synthesized audio; so I had to do a little bit of hacking within the audio.c file to get flite to use that library. I managed to get everything compiling just fine after mucking around in the GNU AutoTools for a while, but then I run the program and get this output:

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -2008986336
maxFrameIndex in callback: 32655
numChannels in callback: 152579008
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11  

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -71217888
maxFrameIndex in callback: 32712
numChannels in callback: 232979392
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11

Here is the relevant code from the audio.c file, which is called when I supply the command line argument -t. I marked the area of interest where the segmentation fault occurs in the playCallback() function after a bit of debugging.

static int playCallback( const void *inputBuffer, void *outputBuffer,
                        unsigned long framesPerBuffer,
                        const PaStreamCallbackTimeInfo* timeInfo,
                        PaStreamCallbackFlags statusFlags,
                        void *userData )
{
    cst_wave *data = (cst_wave*)userData;
    short *rptr = &data->samples[data->frameIndex * data->num_channels];
    short *wptr = (short*)outputBuffer;
    unsigned int i;
    int finished;
    unsigned int framesLeft = cst_wave_maxFrameIndex(data) - cst_wave_frameIndex(data);

    (void) inputBuffer; /* Prevent unused variable warnings. */
    (void) timeInfo;
    (void) statusFlags;
    (void) userData;

    printf("frameIndex in callback: %d\n", cst_wave_frameIndex(data));
    printf("maxFrameIndex in callback: %d\n", cst_wave_maxFrameIndex(data));
    printf("numChannels in callback: %d\n", cst_wave_num_channels(data));
    printf("numSamples in callback: %d\n", cst_wave_num_samples(data));
    printf("sampleRate in callback: %d\n\n", cst_wave_sample_rate(data));

    if( framesLeft < framesPerBuffer )
    {
        /* final buffer... */
        for( i=0; i<framesLeft; i++ )
        {
            *wptr++ = *rptr++;  /* left */
            if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++;  /* right */
        }
        for( ; i<framesPerBuffer; i++ )
        {
            *wptr++ = 0;  /* left */
            if( cst_wave_num_channels(data) == 2) *wptr++ = 0;  /* right */
        }
        data->frameIndex += framesLeft;
        finished = paComplete;
    }
    else
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
            *wptr++ = *rptr++;  /* left */
            if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++;  /* right */
        }
        cst_wave_set_frameIndex(data, framesPerBuffer);
        finished = paContinue;
    }
    return finished;
}

int play_wave(cst_wave *w)
{
    PaStream* stream;
    PaStreamParameters outputParameters;
    cst_wave_set_frameIndex(w, 0);
    cst_wave_set_maxFrameIndex(w, (cst_wave_num_samples(w) / cst_wave_sample_rate(w)) * cst_wave_num_channels(w) * sizeof(short));
    int err = 0;
    err = Pa_Initialize();
    outputParameters.device = Pa_GetDefaultOutputDevice();
    if (outputParameters.device == paNoDevice)
    {
        fprintf(stderr,"Error: No default output device.\n");
        return -5;
    }
    printf("frameIndex: %d\n", cst_wave_frameIndex(w));
    printf("maxFrameIndex: %d\n", cst_wave_maxFrameIndex(w));
    printf("numChannels: %d\n", cst_wave_num_channels(w));
    printf("numSamples: %d\n", cst_wave_num_samples(w));
    printf("sampleRate: %d\n", cst_wave_sample_rate(w));

    outputParameters.channelCount = cst_wave_num_channels(w);
    outputParameters.sampleFormat = paInt16;
    outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
    outputParameters.hostApiSpecificStreamInfo = NULL;
    puts("=== Now playing back. ===");
    err = Pa_OpenStream(&stream,
                        NULL, /* no input */
                        &outputParameters,
                        cst_wave_sample_rate(w),
                        512,
                        paClipOff,
                        playCallback,
                        &w);
    if( stream )
    {
        err = Pa_StartStream( stream );
        if( err != paNoError ) goto done;

        puts("Waiting for playback to finish.");

        while((err = Pa_IsStreamActive(stream)) == 1) Pa_Sleep(100);
        if( err < 0 ) goto done;

        err = Pa_CloseStream( stream );
        if( err != paNoError ) goto done;

        puts("Done.");
    }
done:
    Pa_Terminate();
    free(cst_wave_samples(w));
}

Because it is relevant, I also slightly modified the cst_wave structure in cst_wave.h so that it contains my data I have to store, as well as adding a few #defines to the ones that were already present:

typedef struct  cst_wave_struct {
    const char *type;
    int frameIndex;
    int maxFrameIndex;
    int sample_rate;
    int num_samples;
    int num_channels;
    short *samples;
} cst_wave;

#define cst_wave_num_samples(w) (w?w->num_samples:0)
#define cst_wave_num_channels(w) (w?w->num_channels:0)
#define cst_wave_sample_rate(w) (w?w->sample_rate:0)
#define cst_wave_samples(w) (w->samples)
#define cst_wave_frameIndex(w) (w->frameIndex)
#define cst_wave_maxFrameIndex(w) (w->maxFrameIndex)

#define cst_wave_set_num_samples(w,s) w->num_samples=s
#define cst_wave_set_num_channels(w,s) w->num_channels=s
#define cst_wave_set_sample_rate(w,s) w->sample_rate=s
#define cst_wave_set_frameIndex(w,s) w->frameIndex=s
#define cst_wave_set_maxFrameIndex(w,s) w->maxFrameIndex=s

Update 1:

Following the advice of @Rohan now gives me this output:

$ ./bin/flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: 0
maxFrameIndex in callback: 0
numChannels in callback: 1
numSamples in callback: 7225
sampleRate in callback: 8000

Done.
flite(68929,0x7fff71c0d310) malloc: *** error for object 0x7fd6e2809800: pointer being freed was not allocated
*** set a breakpoint in malloc_error_break to debug
Abort trap: 6

To fix that, I removed the free(cst_wave_samples(w));. Now the program executes normally with no visible errors, but there is still no audio output on my Mac. Any suggestions?


Solution

  • You're in luck. I was able to compile both PortAudio and flite on my own Mac, and solve your problem.

    You have several issues other than those mentioned before, all of which I have addressed in the code dump below.

    • Minor: You don't use consistently your own API for cst_wave.
    • Minor: I prefer to enclose my while and if blocks with {} always. This has a habit of preventing mysterious bugs.
    • Max Frames was being set to zero. That's because in (cst_wave_num_samples(w) / cst_wave_sample_rate(w)) * cst_wave_num_channels(w) * sizeof(short), you were dividing by the sample rate, which was greater than your number of samples. Given that integer division is left-associative and truncating, yadda yadda yadda zero.
    • Max Frames is still wrong, as a frame includes all channel samples. The number of frames is thus agnostic to both number of channels and the size of the samples themselves. Allowing myself to guess that flite misuses sample to mean frame, your max frame index is just cst_wave_num_samples(w). Else it will be cst_wave_num_samples(w) / cst_wave_num_channels(w).
    • PortAudio's documentation states you should call Pa_StopStream(stream) after the stream becomes inactive, whether or not you were waiting until it became so.
    • I simplified the callback, and corrected it for
      • Minor: Consistent use of your API
      • MAJOR: Ehm... cst_wave_set_frameIndex(data, framesPerBuffer); is definitely wrong. You're pinning yourself at frame index 512 instead of incrementing! That's because you asked for 512 frames per buffer when opening the stream and you're not incrementing the frame index by framesPerBuffer, you're setting the frame index to framesPerBuffer. You hadn't made it that far because your maxFrameIndex was 0 anyways so you were exiting. I fixed it so that the frame index increments - with your API of course.

    Here is the code, which I took the freedom of documenting and cleaning until it approached my standards of elegance. Enjoy!

    #include <stdio.h>
    #include <string.h>
    
    /**
     * Audio play callback.
     * 
     * Follows the PaStreamCallback signature, wherein:
     * 
     * @param input   and
     * @param output  are either arrays of interleaved samples or; if
     *                non-interleaved samples were requested using the
     *                paNonInterleaved sample format flag, an array of buffer
     *                pointers, one non-interleaved buffer for each channel.
     * @param frameCount    The number of sample frames to be processed by the
     *                      stream callback.
     * @param timeInfo      Timestamps indicating the ADC capture time of the first
     *                      sample in the input buffer, the DAC output time of the
     *                      first sample in the output buffer and the time the
     *                      callback was invoked. See PaStreamCallbackTimeInfo and
     *                      Pa_GetStreamTime()
     * @param statusFlags   Flags indicating whether input and/or output buffers
     *                      have been inserted or will be dropped to overcome
     *                      underflow or overflow conditions.
     * @param userData      The value of a user supplied pointer passed to
     *                      Pa_OpenStream() intended for storing synthesis data
     *                      etc.
     */
    
    static int  playCallback(const void*                     inputBuffer,
                             void*                           outputBuffer,
                             unsigned long                   framesPerBuffer,
                             const PaStreamCallbackTimeInfo* timeInfo,
                             PaStreamCallbackFlags           statusFlags,
                             void*                           userData){
        (void) inputBuffer; /* Prevent unused variable warnings. */
        (void) timeInfo;
        (void) statusFlags;
        (void) userData;
    
    
        /**
         * Compute current processing state.
         */
    
        cst_wave*    data;
        short*       rptr;
        short*       wptr;
        unsigned int framesLeft, /* Number of frames of data remaining within the stream ***as a whole*** */
                     frames,     /* Number of frames of data to be written for this buffer. */
                     framesPad,  /* Number of frames of padding required within the final buffer. */
                     samples,    /* Number of samples of data to be written for this buffer. */
                     samplesPad, /* Number of samples of padding required within the final buffer. */
                     numBytes,   /* Number of bytes of data to be written for this buffer. */
                     numBytesPad;/* Number of bytes of padding required within the final buffer. */
        int          finalBuffer;/* Stores whether or not this is the final buffer. */
    
    
        data         = (cst_wave*)userData;
        rptr         = &data->samples[cst_wave_frameIndex  (data) *
                                      cst_wave_num_channels(data)];
        wptr         = (short*)outputBuffer;
        framesLeft   = cst_wave_maxFrameIndex(data) - cst_wave_frameIndex(data);
        finalBuffer  = framesLeft      <= framesPerBuffer;
        frames       = finalBuffer     ?  framesLeft     : framesPerBuffer;
        framesPad    = framesPerBuffer -  frames;
        samples      = frames     * cst_wave_num_channels(data);
        samplesPad   = framesPad  * cst_wave_num_channels(data);
        numBytes     = samples    * sizeof(short);
        numBytesPad  = samplesPad * sizeof(short);
    
    
        /**
         * Debug code. Comment out in production.
         */
    
        printf("framesLeft in callback: %u\n", framesLeft);
        printf("framesPerBuffer in callback: %lu\n", framesPerBuffer);
        printf("frames in callback: %u\n", frames);
    
        printf("frameIndex in callback: %d\n", cst_wave_frameIndex(data));
        printf("maxFrameIndex in callback: %d\n", cst_wave_maxFrameIndex(data));
        printf("numChannels in callback: %d\n", cst_wave_num_channels(data));
        printf("numSamples in callback: %d\n", cst_wave_num_samples(data));
        printf("sampleRate in callback: %d\n\n", cst_wave_sample_rate(data));
    
    
        /**
         * Output data. We handle the final buffer specially, padding it with zeros.
         */
    
        memcpy(wptr, rptr, numBytes);
        wptr += samples;
        rptr += samples;
        cst_wave_set_frameIndex(data, cst_wave_frameIndex(data) + frames);
        memset(wptr, 0, numBytesPad);
        wptr += samplesPad;
        rptr += samplesPad;
    
    
        /**
         * Return a completion or continue code depending on whether this was the
         * final buffer or not respectively.
         */
    
        return finalBuffer ? paComplete : paContinue;
    }
    
    /**
     * Play wave function.
     * 
     * Plays the given cst_wave data as audio, blocking until this is done.
     */
    
    int play_wave(cst_wave *w){
        PaStream*          stream;
        PaStreamParameters outputParameters;
        int                err;
    
        /**
         * Initialize custom fields in cst_wave struct.
         */
    
        cst_wave_set_frameIndex(w, 0);
        cst_wave_set_maxFrameIndex(w, (cst_wave_num_samples(w)));
        // / cst_wave_sample_rate(w)  * cst_wave_num_channels(w) * sizeof(short)
    
    
        /**
         * Initialize Port Audio device and stream parameters.
         */
    
        err = Pa_Initialize();
        outputParameters.device = Pa_GetDefaultOutputDevice();
        if (outputParameters.device == paNoDevice){
            fprintf(stderr,"Error: No default output device.\n");
            return -5;
        }
    
        printf("frameIndex: %d\n", cst_wave_frameIndex(w));
        printf("maxFrameIndex: %d\n", cst_wave_maxFrameIndex(w));
        printf("numChannels: %d\n", cst_wave_num_channels(w));
        printf("numSamples: %d\n", cst_wave_num_samples(w));
        printf("sampleRate: %d\n", cst_wave_sample_rate(w));
    
        outputParameters.channelCount = cst_wave_num_channels(w);
        outputParameters.sampleFormat = paInt16;
        outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
        outputParameters.hostApiSpecificStreamInfo = NULL;
    
    
        /**
         * Open the stream for playback.
         */
    
        puts("=== Now playing back. ===");
        err = Pa_OpenStream(&stream,
                            NULL, /* no input */
                            &outputParameters,
                            cst_wave_sample_rate(w),
                            512,
                            paClipOff,
                            playCallback,
                            w);
    
        if(stream){
            /**
             * Start the stream.
             */
    
            err = Pa_StartStream(stream);
            if(err != paNoError){
                goto done;
            }
    
            /**
             * Block while it plays.
             */
    
            puts("Waiting for playback to finish.");
            while((err = Pa_IsStreamActive(stream)) == 1){
                Pa_Sleep(100);
            }
            if(err < 0){
                goto done;
            }
    
    
            /**
             * Stop and close the stream. Both are necessary.
             */
    
            Pa_StopStream(stream);
            err = Pa_CloseStream(stream);
            if(err != paNoError){
                goto done;
            }
            puts("Done.");
        }
    
        /**
         * Terminate and leave.
         */
    done:
        Pa_Terminate();
        return 0;
    }