Search code examples
iosobjective-cswiftspeech-recognitionspeech-to-text

How do I convert voice to text in iOS


As far as I know , apple native framework doesn't have APIs for converting voice to text and we have to go for third party framework to do that and it has so many drawbacks like user has to microphone to convert from voice to text.

But I can find lots of information for converting text to voice but not the other way

Couldn't find any clear information about this and mostly it has so many uncertain things.

If someone could shed some light it'd be really great !


Solution

  • Here is the full code for the same:

    import UIKit
    import Speech
    
    public class ViewController: UIViewController, SFSpeechRecognizerDelegate {
        // MARK: Properties
    
        private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))!
    
        private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
    
        private var recognitionTask: SFSpeechRecognitionTask?
    
        private let audioEngine = AVAudioEngine()
    
        @IBOutlet var textView : UITextView!
    
        @IBOutlet var recordButton : UIButton!
    
        // MARK: UIViewController
    
        public override func viewDidLoad() {
            super.viewDidLoad()
    
            // Disable the record buttons until authorization has been granted.
            recordButton.isEnabled = false
        }
    
        override public func viewDidAppear(_ animated: Bool) {
            speechRecognizer.delegate = self
    
            SFSpeechRecognizer.requestAuthorization { authStatus in
                /*
                    The callback may not be called on the main thread. Add an
                    operation to the main queue to update the record button's state.
                */
                OperationQueue.main.addOperation {
                    switch authStatus {
                        case .authorized:
                            self.recordButton.isEnabled = true
    
                        case .denied:
                            self.recordButton.isEnabled = false
                            self.recordButton.setTitle("User denied access to speech recognition", for: .disabled)
    
                        case .restricted:
                            self.recordButton.isEnabled = false
                            self.recordButton.setTitle("Speech recognition restricted on this device", for: .disabled)
    
                        case .notDetermined:
                            self.recordButton.isEnabled = false
                            self.recordButton.setTitle("Speech recognition not yet authorized", for: .disabled)
                    }
                }
            }
        }
    
        private func startRecording() throws {
    
            // Cancel the previous task if it's running.
            if let recognitionTask = recognitionTask {
                recognitionTask.cancel()
                self.recognitionTask = nil
            }
    
            let audioSession = AVAudioSession.sharedInstance()
            try audioSession.setCategory(AVAudioSessionCategoryRecord)
            try audioSession.setMode(AVAudioSessionModeMeasurement)
            try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
    
            recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
    
            guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") }
            guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
    
            // Configure request so that results are returned before audio recording is finished
            recognitionRequest.shouldReportPartialResults = true
    
            // A recognition task represents a speech recognition session.
            // We keep a reference to the task so that it can be cancelled.
            recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
                var isFinal = false
    
                if let result = result {
                    self.textView.text = result.bestTranscription.formattedString
                    isFinal = result.isFinal
                }
    
                if error != nil || isFinal {
                    self.audioEngine.stop()
                    inputNode.removeTap(onBus: 0)
    
                    self.recognitionRequest = nil
                    self.recognitionTask = nil
    
                    self.recordButton.isEnabled = true
                    self.recordButton.setTitle("Start Recording", for: [])
                }
            }
    
            let recordingFormat = inputNode.outputFormat(forBus: 0)
            inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
                self.recognitionRequest?.append(buffer)
            }
    
            audioEngine.prepare()
    
            try audioEngine.start()
    
            textView.text = "(Go ahead, I'm listening)"
        }
    
        // MARK: SFSpeechRecognizerDelegate
    
        public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
            if available {
                recordButton.isEnabled = true
                recordButton.setTitle("Start Recording", for: [])
            } else {
                recordButton.isEnabled = false
                recordButton.setTitle("Recognition not available", for: .disabled)
            }
        }
    
        // MARK: Interface Builder actions
    
        @IBAction func recordButtonTapped() {
            if audioEngine.isRunning {
                audioEngine.stop()
                recognitionRequest?.endAudio()
                recordButton.isEnabled = false
                recordButton.setTitle("Stopping", for: .disabled)
            } else {
                try! startRecording()
                recordButton.setTitle("Stop recording", for: [])
            }
        }
    }