Search code examples
swiftmacosavfoundationcore-audio

AirPods gestures not sending AVAudioApplication mute state notifications


I have a macOS Swift app that gets information from the user's microphone. I'm attempting to use the new (macOS 14+) APIs available on AVAudioApplication that allow the user to use gestures (pressing the stem) to mute the audio input.

According to WWDC, there are two "levels" of doing this: getting a notification and handling it at the app level, or using a lower-level CoreAudio API. I'm trying to do the former in this case.

Here's my example code (the relevant part is just Manager -- the rest is just a ton of boilerplate to get microphone input via CoreAudio so that this works as a minimal reproducible example).

class Manager: ObservableObject {
    private var controller: AudioInputController?
    private var cancellable: AnyCancellable?

    init() {
        cancellable = NotificationCenter.default.publisher(for: AVAudioApplication.inputMuteStateChangeNotification)
            .sink { notification in
                print("Notification", notification)
            }

        do {
            try AVAudioApplication.shared.setInputMuteStateChangeHandler { isMuted in
                print("Mute state", isMuted, Date())
                return true
            }
        } catch {
            assertionFailure()
            print("Error setting up handler", error)
        }

        controller = AudioInputController()!
        controller?.start()
    }
}

struct ContentView: View {
    @StateObject private var manager = Manager()

    var body: some View {
        VStack {
            Image(systemName: "globe")
                .imageScale(.large)
                .foregroundStyle(.tint)
        }
        .padding()
    }
}

func getDefaultAudioDeviceID() -> AudioDeviceID? {
    var deviceID = AudioDeviceID()
    var dataSize = UInt32(MemoryLayout<AudioDeviceID>.size)

    var propertyAddress = AudioObjectPropertyAddress(
        mSelector: kAudioHardwarePropertyDefaultInputDevice,
        mScope: kAudioObjectPropertyScopeInput,
        mElement: kAudioObjectPropertyElementMain
    )

    let status = AudioObjectGetPropertyData(
        AudioObjectID(kAudioObjectSystemObject),
        &propertyAddress,
        0,
        nil,
        &dataSize,
        &deviceID
    )

    guard status == noErr else {
        assertionFailure()
        return nil
    }

    return deviceID
}

private final class AudioInputController {
    private var auHAL: AudioComponentInstance?
    private var inputBufferList: UnsafeMutableAudioBufferListPointer?
    private var sampleRate: Float = 0.0

    init?() {
        guard let audioDeviceID = getDefaultAudioDeviceID() else {
            assertionFailure()
            return nil
        }
        var osStatus: OSStatus = noErr

        // Create an AUHAL instance.
        var description = AudioComponentDescription(
            componentType: kAudioUnitType_Output,
            componentSubType: kAudioUnitSubType_HALOutput,
            componentManufacturer: kAudioUnitManufacturer_Apple,
            componentFlags: 0,
            componentFlagsMask: 0
        )
        guard let component = AudioComponentFindNext(nil, &description) else {
            assertionFailure()
            return
        }

        osStatus = AudioComponentInstanceNew(component, &auHAL)

        guard osStatus == noErr, let auHAL else {
            return nil
        }

        // Enable the input bus, and disable the output bus.
        let kInputElement: UInt32 = 1
        let kOutputElement: UInt32 = 0
        var kInputData: UInt32 = 1
        var kOutputData: UInt32 = 0
        let ioDataSize: UInt32 = UInt32(MemoryLayout<UInt32>.size)

        osStatus = AudioUnitSetProperty(
            auHAL,
            kAudioOutputUnitProperty_EnableIO,
            kAudioUnitScope_Input,
            kInputElement,
            &kInputData,
            ioDataSize
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        osStatus = AudioUnitSetProperty(
            auHAL,
            kAudioOutputUnitProperty_EnableIO,
            kAudioUnitScope_Output,
            kOutputElement,
            &kOutputData,
            ioDataSize
        )

        if osStatus != noErr {
            assertionFailure()
        }

        var inputDevice: AudioDeviceID = audioDeviceID
        let inputDeviceSize: UInt32 = UInt32(MemoryLayout<AudioDeviceID>.size)

        osStatus = AudioUnitSetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioOutputUnitProperty_CurrentDevice),
            AudioUnitScope(kAudioUnitScope_Global),
            0,
            &inputDevice,
            inputDeviceSize
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        // Adopt the stream format.
        var deviceFormat = AudioStreamBasicDescription()
        var desiredFormat = AudioStreamBasicDescription()
        var ioFormatSize: UInt32 = UInt32(MemoryLayout<AudioStreamBasicDescription>.size)

        osStatus = AudioUnitGetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
            AudioUnitScope(kAudioUnitScope_Input),
            kInputElement,
            &deviceFormat,
            &ioFormatSize
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        osStatus = AudioUnitGetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
            AudioUnitScope(kAudioUnitScope_Output),
            kInputElement,
            &desiredFormat,
            &ioFormatSize
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        // Same sample rate, same number of channels.
        desiredFormat.mSampleRate = deviceFormat.mSampleRate
        desiredFormat.mChannelsPerFrame = deviceFormat.mChannelsPerFrame

        // Canonical audio format.
        desiredFormat.mFormatID = kAudioFormatLinearPCM
        desiredFormat
            .mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked | kAudioFormatFlagIsNonInterleaved
        desiredFormat.mFramesPerPacket = 1
        desiredFormat.mBytesPerFrame = UInt32(MemoryLayout<Float32>.size)
        desiredFormat.mBytesPerPacket = UInt32(MemoryLayout<Float32>.size)
        desiredFormat.mBitsPerChannel = 8 * UInt32(MemoryLayout<Float32>.size)

        osStatus = AudioUnitSetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioUnitProperty_StreamFormat),
            AudioUnitScope(kAudioUnitScope_Output),
            kInputElement,
            &desiredFormat,
            UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        // Store the format information.
        sampleRate = Float(desiredFormat.mSampleRate)

        // Get the buffer frame size.
        var bufferSizeFrames: UInt32 = 0
        var bufferSizeFramesSize = UInt32(MemoryLayout<UInt32>.size)

        osStatus = AudioUnitGetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioDevicePropertyBufferFrameSize),
            AudioUnitScope(kAudioUnitScope_Global),
            0,
            &bufferSizeFrames,
            &bufferSizeFramesSize
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        let bufferSizeBytes: UInt32 = bufferSizeFrames * UInt32(MemoryLayout<Float32>.size)
        let channels: UInt32 = deviceFormat.mChannelsPerFrame

        inputBufferList = AudioBufferList.allocate(maximumBuffers: Int(channels))
        for i in 0 ..< Int(channels) {
            inputBufferList?[i] = AudioBuffer(
                mNumberChannels: channels,
                mDataByteSize: UInt32(bufferSizeBytes),
                mData: malloc(Int(bufferSizeBytes))
            )
        }

        var callbackStruct = AURenderCallbackStruct(
            inputProc: { (
                inRefCon: UnsafeMutableRawPointer,
                ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
                inTimeStamp: UnsafePointer<AudioTimeStamp>,
                inBusNumber: UInt32,
                inNumberFrame: UInt32,
                _: UnsafeMutablePointer<AudioBufferList>?
            ) -> OSStatus in

                let owner = Unmanaged<AudioInputController>.fromOpaque(inRefCon).takeUnretainedValue()
                owner.inputCallback(
                    ioActionFlags: ioActionFlags,
                    inTimeStamp: inTimeStamp,
                    inBusNumber: inBusNumber,
                    inNumberFrame: inNumberFrame
                )
                return noErr
            },
            inputProcRefCon: Unmanaged.passUnretained(self).toOpaque()
        )

        osStatus = AudioUnitSetProperty(
            auHAL,
            AudioUnitPropertyID(kAudioOutputUnitProperty_SetInputCallback),
            AudioUnitScope(kAudioUnitScope_Global),
            0,
            &callbackStruct,
            UInt32(MemoryLayout<AURenderCallbackStruct>.size)
        )

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }

        osStatus = AudioUnitInitialize(auHAL)

        guard osStatus == noErr else {
            assertionFailure()
            return nil
        }
    }

    deinit {
        if let auHAL {
            AudioOutputUnitStop(auHAL)
            AudioComponentInstanceDispose(auHAL)
        }
        if let inputBufferList {
            for buffer in inputBufferList {
                free(buffer.mData)
            }
        }
    }

    private func inputCallback(
        ioActionFlags: UnsafeMutablePointer<AudioUnitRenderActionFlags>,
        inTimeStamp: UnsafePointer<AudioTimeStamp>,
        inBusNumber: UInt32,
        inNumberFrame: UInt32
    ) {
        guard let inputBufferList,
              let auHAL
        else {
            assertionFailure()
            return
        }

        let err = AudioUnitRender(
            auHAL,
            ioActionFlags,
            inTimeStamp,
            inBusNumber,
            inNumberFrame,
            inputBufferList.unsafeMutablePointer
        )
        guard err == noErr else {
            assertionFailure()
            return
        }
    }

    func start() {
        guard let auHAL else {
            assertionFailure()
            return
        }
        let status: OSStatus = AudioOutputUnitStart(auHAL)
        if status != noErr {
            assertionFailure()
        }
    }

    func stop() {
        guard let auHAL else {
            assertionFailure()
            return
        }
        let status: OSStatus = AudioOutputUnitStop(auHAL)
        if status != noErr {}
    }
}

Note: if you attempt to run this, make sure to add Audio Input to the Capabilities of the app and the NSMicrophoneUsageDescription key to the Info.plist

When I press the stem on my AirPods Pro (2nd generation), I get this: enter image description here

How can I make sure that either the AVAudioApplication.inputMuteStateChangeNotification or AVAudioApplication.shared.setInputMuteStateChangeHandler actually get called when the stem is pressed?


Solution

  • Looks like the order of registering the notification is important here. inputMuteStateChangeNotification must be listened to after setInputMuteStateChangeHandler, which seems strange, because I would've assumed that the notification registration was basically an idempotent operation. The following change to Manager works:

    class Manager: ObservableObject {
        private var controller: AudioInputController?
        private var cancellable: AnyCancellable?
    
        init() {
            do {
                try AVAudioApplication.shared.setInputMuteStateChangeHandler { isMuted in
                    print("Mute state", isMuted, Date())
                    return true
                }
            } catch {
                assertionFailure()
                print("Error setting up handler", error)
            }
    
            cancellable = NotificationCenter.default.publisher(for: AVAudioApplication.inputMuteStateChangeNotification)
            .sink { notification in
                print("Notification", notification)
            }
            
            controller = AudioInputController()!
            controller?.start()
        }
    }
    

    Thanks to Kuvonchbek Yakubov's answer for inspiring the idea that where the registration happens is important.