I am trying to implement a pronunciation assessment system using Azure's JS SDK (see doc).
I get the following error in console:
"Could not deserialize speech context. websocket error code: 1007"
Here is my implementation:
assessPronunciation(fileUrl) {
const speechConfig = window.SpeechSDK.SpeechConfig.fromSubscription("xxx", "westeurope");
speechConfig.speechRecognitionLanguage = "en-GB";
// Fetch the WAV file and create an AudioConfig
fetch(fileUrl)
.then(response => response.blob())
.then(blob => {
// Convert the blob to a File object
const file = new File([blob], "audio.wav", { type: "audio/wav" });
// Create an AudioConfig using the File object
const audioConfig = window.SpeechSDK.AudioConfig.fromWavFileInput(file);
var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig({
referenceText: "Hello this is a test",
gradingSystem: "HundredMark",
granularity: "Phoneme"
});
var speechRecognizer = new window.SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.sessionStarted = (s, e) => {
console.log(`SESSION ID: ${e.sessionId}`);
};
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.recognizeOnceAsync(
function(speechRecognitionResult) {
if (speechRecognitionResult.reason === window.SpeechSDK.ResultReason.RecognizedSpeech) {
// The pronunciation assessment result as a Speech SDK object
var pronunciationAssessmentResult = SpeechSDK.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
// The pronunciation assessment result as a JSON string
var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(SpeechSDK.PropertyId.SpeechServiceResponse_JsonResult);
console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
} else {
console.error("Speech not recognized. Reason:", speechRecognitionResult);
}
},
function(error) {
console.error("Error during recognition:", error);
if (error instanceof window.SpeechSDK.SpeechRecognitionCanceledEventArgs) {
console.error("Recognition canceled. Reason:", error.reason);
console.error("Error details:", error.errorDetails);
}
}
);
})
.catch(error => {
console.error("Error fetching WAV file:", error);
});
}
I checked the recording (fileUrl) and it's a working Wav file as expected.
Recording configuration:
startRecording(event) {
event.preventDefault();
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
this.recorder = new RecordRTC(stream, {
type: 'audio',
mimeType: 'audio/wav',
recorderType: RecordRTC.StereoAudioRecorder,
desiredSampRate: 16000,
numberOfAudioChannels: 1,
audioBitsPerSecond: 128000
});
this.startRecorder(event);
}).catch((error) => {
console.log("The following error occurred: " + error);
alert("Please grant permission for microphone access");
});
} else {
alert("Your browser does not support audio recording, please use a different browser or update your current browser");
}
}
Any idea what's the issue? Thanks.
SOLUTION
var audioConfig = window.SpeechSDK.AudioConfig.fromStreamInput(pushStream);
var pronunciationAssessmentConfig = new window.SpeechSDK.PronunciationAssessmentConfig(
"My voice is my passport, verify me.",
window.SpeechSDK.PronunciationAssessmentGradingSystem.HundredMark,
window.SpeechSDK.PronunciationAssessmentGranularity.Phoneme
);
try this code block out:
var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs")
// not supported in node
// const audioConfig = sdk.AudioConfig.fromWavFileInput('myVoiceIsMyPassportVerifyMe01.wav');
// workaround
var filename = "myVoiceIsMyPassportVerifyMe01.wav"; // 16000 Hz, Mono
var pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream(filename).on('data', function (arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function () {
pushStream.close();
});
var audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
const conf = sdk.SpeechConfig.fromSubscription(
'xxxx',
'eastus'
);
conf.speechRecognitionLanguage = "en-GB";
var speechRecognizer = new sdk.SpeechRecognizer(conf, audioConfig);
var pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
ReferenceText = "My voice is my passport, verify me.",
GradingSystem = "HundredMark",
Granularity = "Phoneme"
);
pronunciationAssessmentConfig.applyTo(speechRecognizer);
speechRecognizer.sessionStarted = (s, e) => {
console.log('SESSION ID:'+ e.sessionId);
};
speechRecognizer.recognizeOnceAsync(
function (speechRecognitionResult) {
// console.log("speechRecognitionResult:", speechRecognitionResult);
if (speechRecognitionResult.reason === sdk.ResultReason.RecognizedSpeech) {
// The pronunciation assessment result as a Speech SDK object
var pronunciationAssessmentResult = sdk.PronunciationAssessmentResult.fromResult(speechRecognitionResult);
console.log("pronunciationAssessmentResult", pronunciationAssessmentResult);
// The pronunciation assessment result as a JSON string
var pronunciationAssessmentResultJson = speechRecognitionResult.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult);
console.log("pronunciationAssessmentResultJson", pronunciationAssessmentResultJson);
} else {
console.error("Speech not recognized. Reason:", speechRecognitionResult);
}
},
function (error) {
console.error("Error during recognition:", error);
if (error instanceof sdk.SpeechRecognitionCanceledEventArgs) {
console.error("Recognition canceled. Reason:", error.reason);
console.error("Error details:", error.errorDetails);
}
}
);
a few catches:
AudioConfig.fromWavFileInput might not supported in Node. I just used the workaround mentioned in the link, it worked. https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/813
the PronunciationAssessmentConfig needs to be passed as individual parameter values, not a json
i used a sample wav from here. you can edit to yours https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/master/sampledata/audiofiles/myVoiceIsMyPassportVerifyMe01.wav