Search code examples
node.jsazureasynchronousspeech-to-text

Promises and Azure Speech to Text


I want the following code to output the actual value of the response from inner() rather than a promise. I thought that the .then() would wait to output until the promise is resolved. What should I change? As of now the output will be undefined rather than the content of YourAudioFile.wav. I know the code works otherwise because if I console.log(response) inside inner() the actual value of response will be printed to the terminal rather than undefined or promise<>

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription(process.env.SPEECH_KEY, process.env.SPEECH_REGION);
speechConfig.speechRecognitionLanguage = "he-IL";
const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync("YourAudioFile.wav"));
const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

async function inner() {
    let response = ""

speechRecognizer.startContinuousRecognitionAsync();

speechRecognizer.recognized = (s, e) => {
    if (e.result.reason == sdk.ResultReason.RecognizedSpeech) {
        //console.log(`RECOGNIZED: Text=${e.result.text}`);
        response += (e.result.text + " ")
    }
    else if (e.result.reason == sdk.ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
    }
};

speechRecognizer.canceled = (s, e) => {
    console.log(`CANCELED: Reason=${e.reason}`);

    if (e.reason == sdk.CancellationReason.Error) {
        console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
        console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
        console.log("CANCELED: Did you set the speech resource key and region values?");
    }

    speechRecognizer.stopContinuousRecognitionAsync();
};

speechRecognizer.sessionStopped = (s, e) => {
    speechRecognizer.stopContinuousRecognitionAsync();
    return response
};

}

inner().then((msg) => {
    console.log(msg)
}).catch((err) => {
    console.log(err)
})

Solution

  • I made some changes to your code and got the text output with the input speech.

    Code:

    const fs = require('fs');
    const sdk = require("microsoft-cognitiveservices-speech-sdk");
    
    const speechKey = "<speech-key>";
    const speechRegion = "<speech-region>";
    
    const speechConfig = sdk.SpeechConfig.fromSubscription(speechKey, speechRegion);
    speechConfig.speechRecognitionLanguage = "en-US";
    const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync("<audio-wav-file>"));
    const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
    
    async function inner() {
      return new Promise((resolve, reject) => {
        let response = "";
    
        speechRecognizer.recognizing = (s, e) => {
          if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
            console.log(`RECOGNIZING: Text=${e.result.text}`);
            response += e.result.text;
          }
        };
    
        speechRecognizer.recognized = (s, e) => {
          if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
            console.log(`RECOGNIZED: Text=${e.result.text}`);
            response += e.result.text;
          } else if (e.result.reason === sdk.ResultReason.NoMatch) {
            console.log("NOMATCH: Speech could not be recognized.");
          }
        };
    
        speechRecognizer.sessionStopped = (s, e) => {
          console.log("Session stopped.");
          speechRecognizer.stopContinuousRecognitionAsync();
          resolve(response);
        };
    
        speechRecognizer.startContinuousRecognitionAsync();
      });
    }
    
    inner()
      .then((msg) => {
        console.log("Recognition completed. Final response:");
        console.log(msg);
      })
      .catch((err) => {
        console.log("Recognition error:");
        console.error(err);
      });
    

    package.json:

    {
      "name": "speech-to-text-example",
      "version": "1.0.0",
      "description": "Speech to Text Example",
      "main": "index.js",
      "scripts": {
        "start": "node index.js"
      },
      "dependencies": {
        "microsoft-cognitiveservices-speech-sdk": "^1.20.0"
      }
    }
    

    Output:

    It runs successfully and got the text output with the input speech as below,

    enter image description here