Search code examples
macosavaudioengine

MacOSX Console Application to slow or speedup speech without losing quality


I have a .mp3 file that contains just voice and I would like to adjust the speed (slower or faster) while maintaining the same clarity. No chipmunks! Then write out the modified file to disk. In order to do this, I am trying to use the AVAudioEngine Framework but am a complete N00B to the framework. All the examples, I have found, are geared to modifying music or recording voice then playing it audibly. I just want to convert to a different speed and have it spit out the modified file. This was all I could figure out so far, I documented my confusion in the comments of the code.

import Foundation
import AVFoundation
import AVKit

print("Change Voice Speed")

let engine = AVAudioEngine()
let speedControl = AVAudioUnitVarispeed()
let originalFile = URL(fileURLWithPath: "/Users/User/Desktop/speech.mp3")

do {
   // What do I do with the input file ?
   let file = try AVAudioFile(forReading: originalFile)
} catch let error as NSError {
    print("There's an error: \(error)")
}

speedControl.rate += 0.1
engine.attach(speedControl)
// Do I need to attach a AVAudioRecorder and record here ?

print("Write New File")

let outputFile = URL(fileURLWithPath: "/Users/User/Desktop/modifiedSpeech.mp3")


// Don't know what settings to put for a .mp3 file are. Also not
// sure if this actually writes the file to disk

do {
   try AVAudioFile(forWriting: outputFile, settings: <#T##[String : Any]#>)
} catch let error as NSError {
    print("There's an error: \(error)")
}


Solution

  • Here is proof-of-concept code that speeds up a stereo M4A file and saves it as a WAVE file. There is minimal error handling and no attempt at handling different channel layouts, but hopefully it should get you started:

    let engine = AVAudioEngine()
    
    // Use AVAudioUnitTimePitch to avoid pitch shifts
    let timePitch = AVAudioUnitTimePitch()
    let player = AVAudioPlayerNode()
    
    engine.attach(timePitch)
    engine.attach(player)
    
    engine.connect(player, to:timePitch, format: nil)
    engine.connect(timePitch, to:engine.mainMixerNode, format: nil)
    
    // Speed up the file 1.5x
    timePitch.rate = 1.5
    
    // Run the engine in manual rendering mode using chunks of 512 frames
    let renderSize: AVAudioFrameCount = 512
    
    let renderFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: 44100.0, channels: 2, interleaved: true)!
    let renderBuffer = AVAudioPCMBuffer(pcmFormat: renderFormat, frameCapacity: renderSize)!
    
    try! engine.enableManualRenderingMode(.offline, format: renderFormat, maximumFrameCount: renderBuffer.frameCapacity)
    
    try! engine.start()
    player.play()
    
    // The render format is also the output format
    let output = try! AVAudioFile(forWriting: URL(fileURLWithPath: "/tmp/foo.wav"), settings: renderFormat.settings, commonFormat: renderFormat.commonFormat, interleaved: renderFormat.isInterleaved)
    let file = try! AVAudioFile(forReading: URL(fileURLWithPath: "/tmp/test.m4a"))
    
    // Read using a buffer sized to produce `renderSize` frames of output
    let readSize = AVAudioFrameCount(Float(renderSize) * timePitch.rate)
    let readBuffer = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: readSize)!
    
    // Process the file
    while true {
        do {
            // Processing is finished if all frames have been read
            if file.framePosition == file.length {
                break
            }
    
            try file.read(into: readBuffer)
            player.scheduleBuffer(readBuffer, completionHandler: nil)
    
            let result = try engine.renderOffline(renderBuffer.frameCapacity, to: renderBuffer)
    
            // Try to avoid adding silence
            let expectedFrames = AVAudioFrameCount(Float(readBuffer.frameLength) / timePitch.rate)
            if expectedFrames < renderBuffer.frameLength {
                renderBuffer.frameLength = expectedFrames
            }
    
            // Write the adjusted-rate audio
            try output.write(from: renderBuffer)
            if result != .success {
                break
            }
        }
        catch {
            break
        }
    }
    
    player.stop()
    engine.stop()