Search code examples
iosaudiocore-audioaudiounitopenal

How to play live audio on iOS?


I have an IPCamera that requires the use of a custom library for connecting and communication. I have the video all taken care of, but I also want to give the user the option to listen to the audio that is recorded by the camera.

I receive the audio in the form of a byte stream (the audio is PCM u-law).

Since I don't read the data from a file or have an URL I can connect to, I think I would have to use something like AudioUnits or openAL to play my audio.

I tried to implement it with AudioUnits based on the examples I found online and this is what I have so far:

-(void) audioThread
{
    char buffer[1024];
    int size = 0;
    boolean audioConfigured = false;
    AudioComponentInstance audioUnit;

    while (running) {
        getAudioData(buffer,size);    //fill buffer with my audio

        int16_t* tempChar = (int16_t *)calloc(ret, sizeof(int16_t));
        for (int i = 0; i < ret; i++) {
            tempChar[i] = MuLaw_Decode(buf[i]);
        }

        uint8_t *data = NULL;
        data = malloc(size);
        data = memcpy(data, &tempChar, size);

        CMBlockBufferRef blockBuffer = NULL;
        OSStatus status = CMBlockBufferCreateWithMemoryBlock(NULL, data, 
                                                    size,  
                                                    kCFAllocatorNull, NULL,
                                                    0,    
                                                    size,  
                                                    0, &blockBuffer);

         CMSampleBufferRef sampleBuffer = NULL;
        // now I create my samplebuffer from the block buffer
        if(status == noErr)
        {
            const size_t sampleSize = size;
            status = CMSampleBufferCreate(kCFAllocatorDefault,
                                          blockBuffer, true, NULL, NULL,
                                          formatDesc, 1, 0, NULL, 1,
                                          &sampleSize, &sampleBuffer);
        }

        AudioStreamBasicDescription audioBasic;
        audioBasic.mBitsPerChannel = 16;
        audioBasic.mBytesPerPacket = 2;
        audioBasic.mBytesPerFrame = 2;
        audioBasic.mChannelsPerFrame = 1;
        audioBasic.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
        audioBasic.mFormatID = kAudioFormatLinearPCM;
        audioBasic.mFramesPerPacket = 1;
        audioBasic.mSampleRate = 48000;
        audioBasic.mReserved = 0;

        if(!audioConfigured)
        {
            //initialize the circular buffer
            if(instance.decodingBuffer == NULL)
                instance.decodingBuffer = malloc(sizeof(TPCircularBuffer));
            if(!TPCircularBufferInit(instance.decodingBuffer, 1024))
                continue;

            AudioComponentDescription componentDescription;
            componentDescription.componentType = kAudioUnitType_Output;
            componentDescription.componentSubType = kAudioUnitSubType_RemoteIO;
            componentDescription.componentManufacturer = kAudioUnitManufacturer_Apple;
            componentDescription.componentFlags = 0;
            componentDescription.componentFlagsMask = 0;

            AudioComponent component = AudioComponentFindNext(NULL, &componentDescription);
            if(AudioComponentInstanceNew(component, &audioUnit) != noErr) {
                NSLog(@"Failed to initialize the AudioComponent");
                continue;
            }

            //enable IO for playback
            UInt32 flag = 1;
            if(AudioUnitSetProperty(audioUnit, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Output, 0, &flag, sizeof(flag)) != noErr) {
                NSLog(@"Failed to enable IO for playback");
                continue;
            }

            // set the format for the outputstream
            if(AudioUnitSetProperty(audioUnit, kAudioUnitProperty_StreamFormat,
                                    kAudioUnitScope_Output, 1, &audioBasic, sizeof(audioBasic)) != noErr) {
                NSLog(@"Failed to set the format for the outputstream");
                continue;
            }

            // set output callback
            AURenderCallbackStruct callbackStruct;
            callbackStruct.inputProc = playbackCallback;
            callbackStruct.inputProcRefCon = (__bridge void*) self;
            if(AudioUnitSetProperty(audioUnit, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Global, 0, &callbackStruct, sizeof(callbackStruct))!= noErr) {
                NSLog(@"Failed to Set output callback");
                continue;
            }

            // Disable buffer allocation for the recorder (optional - do this if we want to pass in our own)
            flag = 0;
            status = AudioUnitSetProperty(audioUnit, kAudioUnitProperty_ShouldAllocateBuffer, kAudioUnitScope_Output, 1, &flag, sizeof(flag));

            if(AudioUnitInitialize(audioUnit) != noErr) {
                NSLog(@"Failed to initialize audioUnits");
            }

            if(AudioOutputUnitStart(audioUnit)!= noErr) {
                NSLog(@"[thread_ReceiveAudio] Failed to start audio");
            }
            audioConfigured = true;
        }

        AudioBufferList bufferList ;
       if (sampleBuffer!=NULL) {
            CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &bufferList, sizeof(bufferList), NULL, NULL, kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment, &blockBuffer);
            UInt64 size = CMSampleBufferGetTotalSampleSize(sampleBuffer);

            // Put audio into circular buffer
            TPCircularBufferProduceBytes(self.decodingBuffer, bufferList.mBuffers[0].mData, size);
            //TPCircularBufferCopyAudioBufferList(self.decodingBuffer, &bufferList, NULL, kTPCircularBufferCopyAll, NULL);
            CFRelease(sampleBuffer);
            CFRelease(blockBuffer);
        }
    }

    //stop playing audio
    if(audioConfigured){
        if(AudioOutputUnitStop(audioUnit)!= noErr) {
            NSLog(@"[thread_ReceiveAudio] Failed to stop audio");
        }
        else{
            //clean up audio
            AudioComponentInstanceDispose(audioUnit);
        }
    }
}

int16_t MuLaw_Decode(int8_t number)
{
    const uint16_t MULAW_BIAS = 33;
    uint8_t sign = 0, position = 0;
    int16_t decoded = 0;
    number = ~number;
    if (number & 0x80)
    {
        number &= ~(1 << 7);
        sign = -1;
    }
    position = ((number & 0xF0) >> 4) + 5;
    decoded = ((1 << position) | ((number & 0x0F) << (position - 4))
               | (1 << (position - 5))) - MULAW_BIAS;
    return (sign == 0) ? (decoded) : (-(decoded));
}

static OSStatus playbackCallback(void *inRefCon,
                              AudioUnitRenderActionFlags *ioActionFlags,
                              const AudioTimeStamp *inTimeStamp,
                              UInt32 inBusNumber,
                              UInt32 inNumberFrames,
                              AudioBufferList *ioData) {

    int bytesToCopy = ioData->mBuffers[0].mDataByteSize;
    SInt16 *targetBuffer = (SInt16*)ioData->mBuffers[0].mData;

    int32_t availableBytes;
    SInt16 *buffer = TPCircularBufferTail(instance.decodingBuffer, &availableBytes);
    int sampleCount = MIN(bytesToCopy, availableBytes);
    memcpy(targetBuffer, buffer, MIN(bytesToCopy, availableBytes));
    TPCircularBufferConsume(self.decodingBuffer, sampleCount);

    return noErr;
}

The code above doesn't produce any errors, but won't play any sound. I though I could set the audio through the bufferList in the recordCallback, but it is never called.

So my question is: How do I play audio from a byte stream on iOS?


Solution

  • I decided to look at the project with fresh eyes. I got rid of most of the code and got it to work now. It is not pretty, but at least it runs for now. For example: I had to set my sample rate to 4000, otherwise it would play to fast and I still have performance issues. Anyway this is what I came up with:

    #define BUFFER_SIZE 1024
    #define NUM_CHANNELS 2
    #define kOutputBus 0
    #define kInputBus 1
    
    -(void) main
    {
        char buf[BUFFER_SIZE];
        int size;
    
    runloop: while (self.running) {
            getAudioData(&buf, size);
    
            if(!self.configured) {
    
                if(![self activateAudioSession])
                    continue;
    
                self.configured = true;
            }
    
            TPCircularBufferProduceBytes(self.decodingBuffer, buf, size);
        }
        //stop audiounits
        AudioOutputUnitStop(self.audioUnit);
        AudioComponentInstanceDispose(self.audioUnit);
        if (self.decodingBuffer != NULL) {
            TPCircularBufferCleanup(self.decodingBuffer);
        }
    }
    
    static void audioSessionInterruptionCallback(void *inUserData, UInt32 interruptionState) {
        if (interruptionState == kAudioSessionEndInterruption) {
            AudioSessionSetActive(YES);
            AudioOutputUnitStart(self.audioUnit);
        }
    
        if (interruptionState == kAudioSessionBeginInterruption) {
            AudioOutputUnitStop(self.audioUnit);
        }
    }
    
    static OSStatus playbackCallback(void *inRefCon,
                                     AudioUnitRenderActionFlags *ioActionFlags,
                                     const AudioTimeStamp *inTimeStamp,
                                     UInt32 inBusNumber,
                                     UInt32 inNumberFrames,
                                     AudioBufferList *ioData) {
        // Notes: ioData contains buffers (may be more than one!)
        // Fill them up as much as you can. Remember to set the size value in each buffer to match how much data is in the buffer.
        if (!self.running ) {
    
            return -1;
        }
    
        int bytesToCopy = ioData->mBuffers[0].mDataByteSize;
        SInt16 *targetBuffer = (SInt16*)ioData->mBuffers[0].mData;
    
        // Pull audio from playthrough buffer
        int32_t availableBytes;
        if(self.decodingBuffer == NULL || self.decodingBuffer->length < 1) {
            NSLog(@"buffer is empty");
            return 0;
        }
        SInt16 *buffer = TPCircularBufferTail(self.decodingBuffer, &availableBytes);
        int sampleCount = MIN(bytesToCopy, availableBytes);
        memcpy(targetBuffer, buffer, sampleCount);
        TPCircularBufferConsume(self.decodingBuffer, sampleCount);
    
        return noErr;
    }
    
    - (BOOL) activateAudioSession {
    
        if (!self.activated_) {
    
            OSStatus result;
    
            result = AudioSessionInitialize(NULL,
                                            NULL,
                                            audioSessionInterruptionCallback,
                                            (__bridge void *)(self));
            if (kAudioSessionAlreadyInitialized != result)
                [self checkError:result message:@"Couldn't initialize audio session"];
    
            [self setupAudio]
            self.activated_ = YES;
    
        }
        return self.activated_;
    }
    
    - (void) setupAudio
    {
    
        OSStatus status;
    
        // Describe audio component
        AudioComponentDescription desc;
        desc.componentType = kAudioUnitType_Output;
        desc.componentSubType = kAudioUnitSubType_RemoteIO;
        desc.componentFlags = 0;
        desc.componentFlagsMask = 0;
        desc.componentManufacturer = kAudioUnitManufacturer_Apple;
    
        // Get component
        AudioComponent inputComponent = AudioComponentFindNext(NULL, &desc);
    
        // Get audio units
        AudioComponentInstanceNew(inputComponent, &_audioUnit);
    
        //    // Enable IO for recording
        //    UInt32 flag = 1;
        //    status = AudioUnitSetProperty(audioUnit,
        //                                  kAudioOutputUnitProperty_EnableIO,
        //                                  kAudioUnitScope_Input,
        //                                  kInputBus,
        //                                  &flag,
        //                                  sizeof(flag));
    
    
        // Enable IO for playback
        UInt32 flag = 1;
        AudioUnitSetProperty(_audioUnit,
                                      kAudioOutputUnitProperty_EnableIO,
                                      kAudioUnitScope_Output,
                                      kOutputBus,
                                      &flag,
                                      sizeof(flag));
    
        // Describe format
        AudioStreamBasicDescription format;
        format.mSampleRate       = 4000;
        format.mFormatID         = kAudioFormatULaw; //kAudioFormatULaw
        format.mFormatFlags      = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;//
        format.mBitsPerChannel   = 8 * sizeof(char);
        format.mChannelsPerFrame = NUM_CHANNELS;
        format.mBytesPerFrame    = sizeof(char) * NUM_CHANNELS;
        format.mFramesPerPacket  = 1;
        format.mBytesPerPacket   = format.mBytesPerFrame * format.mFramesPerPacket;
        format.mReserved         = 0;
        self.audioFormat = format;
    
        // Apply format
        AudioUnitSetProperty(_audioUnit,
                                      kAudioUnitProperty_StreamFormat,
                                      kAudioUnitScope_Output,
                                      kInputBus,
                                      &_audioFormat,
                                      sizeof(_audioFormat));
    
        AudioUnitSetProperty(_audioUnit,
                                      kAudioUnitProperty_StreamFormat,
                                      kAudioUnitScope_Input,
                                      kOutputBus,
                                      &_audioFormat,
                                      sizeof(_audioFormat));
    
        //    // Set input callback
        //    AURenderCallbackStruct callbackStruct;
        //    callbackStruct.inputProc = recordingCallback;
        //    callbackStruct.inputProcRefCon = self;
        //    status = AudioUnitSetProperty(audioUnit,
        //                                  kAudioOutputUnitProperty_SetInputCallback,
        //                                  kAudioUnitScope_Global,
        //                                  kInputBus,
        //                                  &callbackStruct,
        //                                  sizeof(callbackStruct));
        //    checkStatus(status);
    
        // Set output callback
        AURenderCallbackStruct callbackStruct;
        callbackStruct.inputProc = playbackCallback;
        callbackStruct.inputProcRefCon = (__bridge void * _Nullable)(self);
        AudioUnitSetProperty(_audioUnit,
                                      kAudioUnitProperty_SetRenderCallback,
                                      kAudioUnitScope_Global,
                                      kOutputBus,
                                      &callbackStruct,
                                      sizeof(callbackStruct));
    
        // Disable buffer allocation for the recorder (optional - do this if we want to pass in our own)
        flag = 0;
        status = AudioUnitSetProperty(_audioUnit,
                                      kAudioUnitProperty_ShouldAllocateBuffer,
                                      kAudioUnitScope_Output,
                                      kInputBus,
                                      &flag,
                                      sizeof(flag));
    
        //initialize the circular buffer
        if(self.decodingBuffer == NULL)
            self.decodingBuffer = malloc(sizeof(TPCircularBuffer));
    
            if(!TPCircularBufferInit(self.decodingBuffer, 512*1024))
                return NO;
    
        // Initialise
        status = AudioUnitInitialize(self.audioUnit);
    
        AudioOutputUnitStart(self.audioUnit);
    }
    

    I found most of this by looking through github and from a tasty pixel