java azure registration speech-to-text http-status-code-400

Azure STT is giving error HttpResponseProxy{HTTP/1.1 400 Bad Request

Azure STT is giving error : HttpResponseProxy{HTTP/1.1 400 Bad Request [Content-Type: text/plain; charset=utf-8, Date: Fri, 02 Feb 2024 14:33:46 GMT, Server: Kestrel, Transfer-Encoding: chunked, Strict-Transport-Security: max-age=31536000; includeSubDomains] ResponseEntityProxy{[Content-Type: text/plain; charset=utf-8,Chunked: true]}}. HttpClient httpclient = HttpClients.createDefault(); URIBuilder builder = new URIBuilder(env.getProperty("voice.text.api")); BASE64DecodedMultipartFile audio = new BASE64DecodedMultipartFile(Base64.getDecoder().decode(audioReq));

    builder.setParameter("language", sourceLang);
    URI uri = builder.build();
    HttpPost request = new HttpPost(uri);
    request.setHeader(CONTENT_TYPE, AUDIO_WAV_TYPE);
    request.setHeader("Ocp-Apim-Subscription-Key", env.getProperty("Voice.SubKey1"));
    //request.setHeader("Accept","application/json");
    request.setEntity(new FileEntity(convert(audio), ContentType.APPLICATION_OCTET_STREAM));
    HttpResponse response = null;
    try {
        response = httpclient.execute(request);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

Solution

The proxy 400 error occurs due to incorrect configuration or invalid data being sent to the Azure Speech-to-Text service.

To fix the error, ensure that the correct endpoint URI for the Azure Speech-to-Text service is used, along with a valid speech key and properly formatted audio data in a .wav file.

Below is the correct endpoint URI:

https://<speech_region>.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US

The following code has been updated with the correct endpoint URI and speech key, enabling it to convert speech to text without any errors.

Code:

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.FileEntity;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.ContentType;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.io.File;
import java.util.Scanner;

public class SpeechToTextExample {

    public static void main(String[] args) {
        HttpClient httpclient = HttpClients.createDefault();
        URIBuilder builder = null;
        try {
            builder = new URIBuilder("https://<speech_region>.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US");
        } catch (URISyntaxException e) {
            e.printStackTrace();
            return;
        }
        String filePath = "path/to/.wav file";
        String sourceLang = "en-US";

        builder.setParameter("language", sourceLang);
        URI uri = null;
        try {
            uri = builder.build();
        } catch (URISyntaxException e) {
            e.printStackTrace();
            return;
        }
        HttpPost request = new HttpPost(uri);
        request.setHeader("Content-Type", "audio/wav");
        request.setHeader("Ocp-Apim-Subscription-Key", "<speech_key>");

        try {
            File audioFile = new File(filePath);
            request.setEntity(new FileEntity(audioFile, ContentType.create("audio/wav")));

            HttpResponse response = httpclient.execute(request);
            HttpEntity entity = response.getEntity();
            if (entity != null) {
                Scanner scanner = new Scanner(entity.getContent());
                while (scanner.hasNextLine()) {
                    System.out.println(scanner.nextLine());
                }
                scanner.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
            return;
        }
    }
}

Output :

It ran successfully, and the speech was converted to text as shown below.

{"RecognitionStatus":"Success","Offset":1100000,"Duration":72600000,"DisplayText":"Hello, this is a test of the speech synthesis service."}

enter image description here