Search code examples
swiftaudiokitavaudioengine

AudioKit down sample audio


I have some existing code which uses the AVAudioEngine to take input from the microphone, downsample it and write it to a AVAudioFile

internal func setupNodeChain() {
    guard let audioEngine = audioEngine else { return } // Fatal error ?
    
    let engineInputNode = audioEngine.inputNode
    
    let bus = 0
    let engineInputNodeFormat = engineInputNode.outputFormat(forBus: bus)
    
    // This attempts to down sample the audio from the microphone
    let downSampleMixerNode = AVAudioMixerNode()
    let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)
    
    // Input -> (volume) -> down sample -> (volume) -> Output
    
    let inputVolumeMixerNode = AVAudioMixerNode()
    inputVolumeMixerNode.volume = Float(10 * microphoneVolume)
    
    audioEngine.attach(inputVolumeMixerNode)
    audioEngine.attach(downSampleMixerNode)
    
    self.downSampleMixerNode = downSampleMixerNode
    self.inputVolumeMixerNode = inputVolumeMixerNode
    
    let silenceNode = AVAudioMixerNode()
    silenceNode.outputVolume = 0
    
    self.silenceNode = silenceNode
    
    audioEngine.connect(engineInputNode, to: inputVolumeMixerNode, format: engineInputNodeFormat)
    audioEngine.connect(inputVolumeMixerNode, to: downSampleMixerNode, format: engineInputNodeFormat)
    
    // Try and stop the microphone audio from going through to the speaker
    audioEngine.attach(silenceNode)
    audioEngine.connect(downSampleMixerNode, to: silenceNode, format: mixerOutputFormat)
    audioEngine.connect(silenceNode, to: audioEngine.outputNode, format: mixerOutputFormat)

    downSampleMixerNode.installTap(onBus: bus, bufferSize: 1024 * 16, format: mixerOutputFormat) { (buffer: AVAudioPCMBuffer, time: AVAudioTime) in
        guard let tap = self.audioTap else { return }
        // Write buffer to AVAudioFile          
        tap.drip(buffer: buffer, time: time)
    }
}

This, mostly, works but I'm investigating replacing it with AudioKit but I'm having issues, I don't know how to create a mechanism to downsample the audio from the microphone to the recorder.

    AKSettings.enableEchoCancellation = true
    AKSettings.allowAirPlay = true
    AKSettings.useBluetooth = true
    
    do {
        try AKSettings.setSession(category: .playAndRecord,
                                                            with: [
                                                                .allowBluetoothA2DP,
        ])
        
        AKSettings.defaultToSpeaker = true
        
        let audioFile = try self.makeAudioFile(named: "Recording")
        
        let mixerOutputFormat = AVAudioFormat(standardFormatWithSampleRate: 8000, channels: 1)!

        let microphone = AKMicrophone()
        let microphoneBooster = AKBooster(microphone)
        microphoneBooster.gain = 0
        
        let recorder = try AKNodeRecorder(node: microphoneBooster)
        //recorder.recordFormat = mixerOutputFormat
        
        let silence = AKMixer(microphoneBooster)
        silence.volume = 0
        
        self.microphone = microphone
        self.microphoneBooster = microphoneBooster
        self.recorder = recorder
        self.silence = silence
        
        AKManager.output = silence
        
        log(debug: "Start")
        try AKManager.start()
        
        log(debug: "Record")
        try recorder.record()

        DispatchQueue.main.async {
            self.state = .recording
            self.plot?.node = microphone
            self.callButton.setImage(#imageLiteral(resourceName: "EndCall"), for: [])
        }
    } catch let error {
        log(error: "Failed to establish play and record session: \(error)")
    }

So, the question is - how would I go about creating a "down sampling" node/workflow, which would link the microphone to the "node" with the "default" format and the link the "node" to then next node in the chain with desired AVAudioFormat?

Microphone -> Down sample (default format)

Down sample -> Next node (target format) -> recorder


Solution

  • Essentially, I had to create my own "tap" to tap into the data

    First, I had a "converter". This basically takes audio coming from another mixer (via a "tap") converts it to a target format and writes it out to an audio file

    class TapConverter: NodeTapperDelegate {
        
        let audioConfig: AudioConfig
        
        internal var inputFormat: AVAudioFormat?
        internal var converter: AVAudioConverter?
        
        var onError: ((Error) -> Void)?
        
        init(audioConfig: AudioConfig) {
            self.audioConfig = audioConfig
        }
        
        func open(format: AVAudioFormat) throws {
            inputFormat = format
            converter = AVAudioConverter(from: format, to: audioConfig.audioFormat)
        }
        
        func drip(buffer: AVAudioPCMBuffer, time: AVAudioTime) {
            guard let converter = converter else {
                return
            }
            guard let inputFormat = inputFormat else {
                return
            }
            
            let inputBufferSize = inputFormat.sampleRate
            let sampleRateRatio = inputBufferSize / audioConfig.audioFormat.sampleRate
            let capacity = Int(Double(buffer.frameCapacity) / sampleRateRatio)
            
            let bufferPCM16 = AVAudioPCMBuffer(pcmFormat: audioConfig.audioFormat, frameCapacity: AVAudioFrameCount(capacity))!
            var error: NSError? = nil
    
            converter.convert(to: bufferPCM16, error: &error) { inNumPackets, outStatus in
                outStatus.pointee = AVAudioConverterInputStatus.haveData
                return buffer
            }
            if let error = error {
                // Handle error in someway
            } else {
                let audioFile = audioConfig.audioFile
                do {
                    log(debug: "Write buffer")
                    try audioFile.write(from: bufferPCM16)
                } catch let error {
                    log(error: "Failed to write buffer to audio file: \(error)")
                    onError?(error)
                }
            }
        }
        
        func close() {
            converter = nil
            inputFormat = nil
            // 🤞 we close the audio file
        }
    }
    

    AudioConfig is just a basic placeholder, it contains the audioFile which is been written to (must already be created) and the target AVAudioFormat

    struct AudioConfig {
        let url: URL
        let audioFile: AVAudioFile
        let audioFormat: AVAudioFormat
    }
    

    Creation might look something like...

    let settings: [String: Any] = [
        AVFormatIDKey: NSNumber(value: kAudioFormatMPEG4AAC),
        AVSampleRateKey: NSNumber(value: 8000),
        AVNumberOfChannelsKey: NSNumber(value: 1),
        AVEncoderBitRatePerChannelKey: NSNumber(value: 16),
        AVEncoderAudioQualityKey: NSNumber(value: AVAudioQuality.min.rawValue)
    ]
    let audioFile = try AVAudioFile(forWriting: sourceURL, settings: settings)
    
    let audioConfig = AudioConfig(url: sourceURL, audioFile: audioFile, audioFormat: audioFormat)
    

    From there, I needed a way to tap the node (get it's data) and pass it onto my converter, for that, I used something like...

    import Foundation
    import AudioKit
    
    protocol NodeTapperDelegate: class {
        func open(format: AVAudioFormat) throws
        func drip(buffer: AVAudioPCMBuffer, time: AVAudioTime)
        func close()
    }
    
    class NodeTapper: NSObject {
        // MARK: - Properties
        
        // The node we record from
        private(set) var node: AKNode?
        
        /// True if we are recording.
        @objc private(set) dynamic var isTapping = false
        
        /// The bus to install the recording tap on. Default is 0.
        private var bus: Int = 0
        
        /// Used for fixing recordings being truncated
        private var recordBufferDuration: Double = 16_384 / AKSettings.sampleRate
        
        weak var delegate: NodeTapperDelegate?
        
        // MARK: - Initialization
        
        /// Initialize the node recorder
        ///
        /// Recording buffer size is defaulted to be AKSettings.bufferLength
        /// You can set a different value by setting an AKSettings.recordingBufferLength
        ///
        /// - Parameters:
        ///   - node: Node to record from
        ///   - bus: Integer index of the bus to use
        ///
        @objc init(node: AKNode? = AKManager.output,
                             bus: Int = 0) throws {
            self.bus = bus
            self.node = node
        }
        
        // MARK: - Methods
        
        /// Start recording
        @objc func start() throws {
            if isTapping == true {
                return
            }
            
            guard let node = node else {
                return
            }
            
            guard let delegate = delegate else {
                return
            }
            
            let bufferLength: AVAudioFrameCount = AKSettings.recordingBufferLength.samplesCount
            isTapping = true
            
            // Note: if you install a tap on a bus that already has a tap it will crash your application.
            let nodeFormat = node.avAudioNode.outputFormat(forBus: 0)
            try delegate.open(format: nodeFormat)
    
            // note, format should be nil as per the documentation for installTap:
            // "If non-nil, attempts to apply this as the format of the specified output bus. This should
            // only be done when attaching to an output bus which is not connected to another node"
            // In most cases AudioKit nodes will be attached to something else.
            node.avAudioUnitOrNode.installTap(onBus: bus,
                                                                                bufferSize: bufferLength,
                                                                                format: nil, // Might need to the input node's format :/
                                                                                block: process(buffer:time:))
        }
        
        private func process(buffer: AVAudioPCMBuffer, time: AVAudioTime) {
            guard let sink = delegate else { return }
            sink.drip(buffer: buffer, time: time)
        }
        
        /// Stop recording
        @objc func stop() {
            if isTapping == false {
                return
            }
            
            isTapping = false
            
            if AKSettings.fixTruncatedRecordings {
                //  delay before stopping so the recording is not truncated.
                let delay = UInt32(recordBufferDuration * 1_000_000)
                usleep(delay)
            }
            node?.avAudioUnitOrNode.removeTap(onBus: bus)
            delegate?.close()
        }
    }
    

    And then, somehow, bind it altogether

    let microphone = AKMicrophone()
    microphone?.volume = 10 * volume
    
    let monoToStereo = AKStereoFieldLimiter(microphone, amount: 1)
    let microphoneMixer = AKMixer(monoToStereo)
    
    // This is where we're converting the audio from
    // the microphone and dripping it into the audio file
    let converter = TapConverter(audioConfig: audioConfig)
    // handleError is basically just a func in this case
    converter.onError = handleError
    // Here we tap the mixer/node and output to the converter
    let tapper = try NodeTapper(node: microphoneMixer)
    tapper.delegate = converter
    
    // Silence the output from the microphone, so it's not
    // fed back into the microphone
    let silence = AKMixer(microphoneMixer)
    silence.volume = 0
    
    self.microphoneMixer = microphoneMixer
    self.converter = converter
    self.tapper = tapper
    self.microphone = microphone
    self.silence = silence
    
    AKManager.output = silence
    
    log(debug: "Start")
    try AKManager.start()
    
    log(debug: "Record")
    try tapper.start()
    

    So much of this came from scraps of different ideas from different posts around the web, so is it the best option? I don't know, but it does what I need it to do