node.js azure asynchronous speech-to-text

Promises and Azure Speech to Text

I want the following code to output the actual value of the response from inner() rather than a promise. I thought that the .then() would wait to output until the promise is resolved. What should I change? As of now the output will be undefined rather than the content of YourAudioFile.wav. I know the code works otherwise because if I console.log(response) inside inner() the actual value of response will be printed to the terminal rather than undefined or promise<>

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription(process.env.SPEECH_KEY, process.env.SPEECH_REGION);
speechConfig.speechRecognitionLanguage = "he-IL";
const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync("YourAudioFile.wav"));
const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

async function inner() {
    let response = ""

speechRecognizer.startContinuousRecognitionAsync();

speechRecognizer.recognized = (s, e) => {
    if (e.result.reason == sdk.ResultReason.RecognizedSpeech) {
        //console.log(`RECOGNIZED: Text=${e.result.text}`);
        response += (e.result.text + " ")
    }
    else if (e.result.reason == sdk.ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
    }
};

speechRecognizer.canceled = (s, e) => {
    console.log(`CANCELED: Reason=${e.reason}`);

    if (e.reason == sdk.CancellationReason.Error) {
        console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
        console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
        console.log("CANCELED: Did you set the speech resource key and region values?");
    }

    speechRecognizer.stopContinuousRecognitionAsync();
};

speechRecognizer.sessionStopped = (s, e) => {
    speechRecognizer.stopContinuousRecognitionAsync();
    return response
};

}

inner().then((msg) => {
    console.log(msg)
}).catch((err) => {
    console.log(err)
})

Solution

I made some changes to your code and got the text output with the input speech.

Code:

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");

const speechKey = "<speech-key>";
const speechRegion = "<speech-region>";

const speechConfig = sdk.SpeechConfig.fromSubscription(speechKey, speechRegion);
speechConfig.speechRecognitionLanguage = "en-US";
const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync("<audio-wav-file>"));
const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

async function inner() {
  return new Promise((resolve, reject) => {
    let response = "";

    speechRecognizer.recognizing = (s, e) => {
      if (e.result.reason === sdk.ResultReason.RecognizingSpeech) {
        console.log(`RECOGNIZING: Text=${e.result.text}`);
        response += e.result.text;
      }
    };

    speechRecognizer.recognized = (s, e) => {
      if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
        console.log(`RECOGNIZED: Text=${e.result.text}`);
        response += e.result.text;
      } else if (e.result.reason === sdk.ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
      }
    };

    speechRecognizer.sessionStopped = (s, e) => {
      console.log("Session stopped.");
      speechRecognizer.stopContinuousRecognitionAsync();
      resolve(response);
    };

    speechRecognizer.startContinuousRecognitionAsync();
  });
}

inner()
  .then((msg) => {
    console.log("Recognition completed. Final response:");
    console.log(msg);
  })
  .catch((err) => {
    console.log("Recognition error:");
    console.error(err);
  });

package.json:

{
  "name": "speech-to-text-example",
  "version": "1.0.0",
  "description": "Speech to Text Example",
  "main": "index.js",
  "scripts": {
    "start": "node index.js"
  },
  "dependencies": {
    "microsoft-cognitiveservices-speech-sdk": "^1.20.0"
  }
}

Output:

It runs successfully and got the text output with the input speech as below,

enter image description here