Search code examples
c#jsonrestunity-game-enginegoogle-speech-api

Bad Request Error with Google Speech Recognition REST API with C#


I would like to use Google Speech Recognition API Rest in C#. The reason I want to use the API Rest instead of a Client Library is because I would like to use Unity3D and it doesn't currently support the client library.

I use HttpClient to connect and Newtonsoft Json to serialize the json (I'm working right now with windows forms and will move to Unity when it works in Winforms).

I always get a bad request response from Google, it doesn't give more detail, but I have noticed that if I change my API key value to an invalid one I get the same result.

Here is my code:

The classes:

class Speech
{
    public RecognitionConfig config { get; set; }
    public RecognitionAudio audio { get; set; }

    public bool sendToApi(string baseUri, string url, ref string apiResponse)
    {
        try
        {
            HttpClient client = new HttpClient();

            // Update port # in the following line.
            client.BaseAddress = new Uri(baseUri);
            client.DefaultRequestHeaders.Accept.Clear();
            client.DefaultRequestHeaders.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json"));

            //var speechJson = new JavaScriptSerializer().Serialize(certificado);
            string speechJson = JsonConvert.SerializeObject(this);
            var contenido = new StringContent(speechJson.ToString(), Encoding.UTF8, "application/json");
            HttpResponseMessage response = client.PostAsync(url, contenido).Result;

            if (response.IsSuccessStatusCode)
            {
                string responseJson = response.Content.ReadAsStringAsync().Result;

                apiResponse = responseJson;
            }
            else
            {
                apiResponse = "ERROR " + JsonConvert.DeserializeObject(JsonConvert.SerializeObject(response));
            }

            return true;

        }

        catch (Exception e)
        {
            apiResponse = e.Message;
            return false;
        }
    }
}

class RecognitionConfig
{
    public string encoding { get; set; }
    public int sampleRateHertz { get; set; }
    public string languageCode { get; set; }
 //   public int maxAlternatives { get; set; }
 //   public bool profanityFilter { get; set; }
 //  public List<SpeechContext> speechContexts { get; set; }
 //   public bool enableWordTimeOffsets { get; set; }

}

class SpeechContext
{
    public List<string> phrases { get; set; }

}
class RecognitionAudio
{
    public string content { get; set; }
   // public string uri { get; set; }

    public bool setContentBase64FromAudio(string path)
    {
        try
        {
            FileStream fileStream = File.OpenRead(path);

            MemoryStream memoryStream = new MemoryStream();
            memoryStream.SetLength(fileStream.Length);
            fileStream.Read(memoryStream.GetBuffer(), 0, (int)fileStream.Length);
            byte[] BA_AudioFile = memoryStream.GetBuffer();
            this.content = System.Convert.ToBase64String(BA_AudioFile);

            return true;
        }
        catch(Exception e)
        {
            return false;
        }

    }
}

The call:

   private void button1_Click(object sender, EventArgs e)
    {
        Speech speech = new Speech();

        speech.config = new RecognitionConfig();
        speech.audio = new RecognitionAudio();
        speech.config.encoding = "FLAC";
        speech.config.sampleRateHertz = 44100;
        speech.config.languageCode = "en-US";

        RecognitionAudio audio = new RecognitionAudio();
        audio.setContentBase64FromAudio("C:\\Users\\Manena\\Downloads\\good-morning-google.flac");

        speech.audio = audio;

        string response = ""; 
        speech.sendToApi("https://speech.googleapis.com/", "v1/speech:recognize?key=<mykey>", ref response);
        textBox1.Text = response;

    }
}

Edit: Here is the Json I send:

{  
       "config":{  
          "encoding":"FLAC",
          "sampleRateHertz":44100,
          "languageCode":"en-US"
       },
       "audio":{  
          "content":"base64 audio"
       }
    }

And what I receive:

    {
      "Version": {
        "Major": 1,
        "Minor": 1,
        "Build": -1,
        "Revision": -1,
        "MajorRevision": -1,
        "MinorRevision": -1
      },
      "Content": {
        "Headers": [
          {
            "Key": "Content-Type",
            "Value": [
              "application/json; charset=UTF-8"
            ]
          }
        ]
      },
      "StatusCode": 400,
      "ReasonPhrase": "Bad Request",
      "Headers": [
        {
          "Key": "Vary",
          "Value": [
            "X-Origin",
            "Referer",
            "Origin",
            "Accept-Encoding"
          ]
        },
        {
          "Key": "X-XSS-Protection",
          "Value": [
            "1; mode=block"
          ]
        },
        {
          "Key": "X-Frame-Options",
          "Value": [
            "SAMEORIGIN"
          ]
        },
        {
          "Key": "X-Content-Type-Options",
          "Value": [
            "nosniff"
          ]
        },
        {
          "Key": "Alt-Svc",
          "Value": [
            "hq=\":443\"; ma=2592000; quic=51303431; quic=51303339; quic=51303338; quic=51303337; quic=51303335,quic=\":443\"; ma=2592000; v=\"41,39,38,37,35\""
          ]
        },
        {
          "Key": "Transfer-Encoding",
          "Value": [
            "chunked"
          ]
        },
        {
          "Key": "Accept-Ranges",
          "Value": [
            "none"
          ]
        },
        {
          "Key": "Cache-Control",
          "Value": [
            "private"
          ]
        },
        {
          "Key": "Date",
          "Value": [
            "Sat, 30 Dec 2017 09:06:19 GMT"
          ]
        },
        {
          "Key": "Server",
          "Value": [
            "ESF"
          ]
        }
      ],
      "RequestMessage": {
        "Version": {
          "Major": 1,
          "Minor": 1,
          "Build": -1,
          "Revision": -1,
          "MajorRevision": -1,
          "MinorRevision": -1
        },
        "Content": {
          "Headers": [
            {
              "Key": "Content-Type",
              "Value": [
                "application/json; charset=utf-8"
              ]
            },
            {
              "Key": "Content-Length",
              "Value": [
                "106"
              ]
            }
          ]
        },
        "Method": {
          "Method": "POST"
        },
        "RequestUri": "https://speech.googleapis.com/v1/speech:recognize?key=mykey",
        "Headers": [
          {
            "Key": "Accept",
            "Value": [
              "application/json"
            ]
          }
        ],
        "Properties": {}
      },
      "IsSuccessStatusCode": false
    }

I know my code is mabe not the most elegant, but right now I'm only interested in getting a good response from Google API. Any clue?


Solution

  • I have solved the issue.

    The problem was that I was using a 2 channel audio file, and Google Speech API currently accepts only mono audios.

    So the code in the question works for 1 channel audios, it could be useful for someone

    Thanks