I am a Junior Java developer and not too great with C#, so I would like some help in solving a problem. What I am trying to do is use existing code shared on github that takes a microphone input, uploads it to the Google Speech-to-Text API, and then returns JSON response with the transcript. This part is all working fine.
What isnt working, is storing the value of the "Transcript:" returned in the JSON into a string variable.
I have tried to do this multiple ways, I have looked for ways to convert the JSON response to an object using SimpleJSON and Newtonsoft, and I have also tried using a regular expression to read the JSON line that starts with "transcript." I saw an example of what I wanted done (linked below) but I was getting compiler errors. If someone could either help me with this, or point me to a better result I would appreciate it.
Here is the code I am using.
// Acquired from https://github.com/steelejay/LowkeySpeech
using UnityEngine;
using System;
using System.IO;
using System.Collections;
using System.Collections.Generic;
using System.Net;
using System.Web;
[RequireComponent(typeof(AudioSource))]
public class GoogleVoiceSpeech : MonoBehaviour {
public GUIText TextBox;
struct ClipData {
public int samples;
}
const int HEADER_SIZE = 44;
private int minFreq;
private int maxFreq;
private bool micConnected = false;
//A handle to the attached AudioSource
private AudioSource goAudioSource;
public string apiKey;
// Use this for initialization
void Start() {
//Check if there is at least one microphone connected
if (Microphone.devices.Length <= 0) {
//Throw a warning message at the console if there isn't
Debug.LogWarning("Microphone not connected!");
} else //At least one microphone is present
{
//Set 'micConnected' to true
micConnected = true;
//Get the default microphone recording capabilities
Microphone.GetDeviceCaps(null, out minFreq, out maxFreq);
//According to the documentation, if minFreq and maxFreq are zero, the microphone supports any frequency...
if (minFreq == 0 && maxFreq == 0) {
//...meaning 44100 Hz can be used as the recording sampling rate
maxFreq = 44100;
}
//Get the attached AudioSource component
goAudioSource = this.GetComponent<AudioSource>();
}
}
void OnGUI() {
//If there is a microphone
if (micConnected) {
//If the audio from any microphone isn't being recorded
if (!Microphone.IsRecording(null)) {
//Case the 'Record' button gets pressed
if (GUI.Button(new Rect(Screen.width / 2 - 100, Screen.height / 2 - 25, 200, 50), "Record")) {
//Start recording and store the audio captured from the microphone at the AudioClip in the AudioSource
goAudioSource.clip = Microphone.Start(null, true, 7, maxFreq); //Currently set for a 7 second clip
}
} else //Recording is in progress
{
//Case the 'Stop and Play' button gets pressed
if (GUI.Button(new Rect(Screen.width / 2 - 100, Screen.height / 2 - 25, 200, 50), "Stop and Play!")) {
float filenameRand = UnityEngine.Random.Range(0.0f, 10.0f);
string filename = "testing" + filenameRand;
Microphone.End(null); //Stop the audio recording
Debug.Log("Recording Stopped");
if (!filename.ToLower().EndsWith(".wav")) {
filename += ".wav";
}
var filePath = Path.Combine("testing/", filename);
filePath = Path.Combine(Application.persistentDataPath, filePath);
Debug.Log("Created filepath string: " + filePath);
// Make sure directory exists if user is saving to sub dir.
Directory.CreateDirectory(Path.GetDirectoryName(filePath));
SavWav.Save(filePath, goAudioSource.clip); //Save a temporary Wav File
Debug.Log("Saving @ " + filePath);
//Insert your API KEY here.
string apiURL = "https://speech.googleapis.com/v1/speech:recognize?&key=AIzaSyAV65cThBBZAqmzW7MbWaccybtBrwY4Udc";
string Response;
Debug.Log("Uploading " + filePath);
Response = HttpUploadFile(apiURL, filePath, "file", "audio/wav; rate=44100");
Debug.Log("Response String: " + Response);
var jsonresponse = SimpleJSON.JSON.Parse(Response);
if (jsonresponse != null) {
string resultString = jsonresponse["result"][0].ToString();
var jsonResults = SimpleJSON.JSON.Parse(resultString);
string transcripts = jsonResults["alternative"][0]["transcript"].ToString();
Debug.Log("transcript string: " + transcripts);
TextBox.text = transcripts;
}
//goAudioSource.Play(); //Playback the recorded audio
File.Delete(filePath); //Delete the Temporary Wav file
}
GUI.Label(new Rect(Screen.width / 2 - 100, Screen.height / 2 + 25, 200, 50), "Recording in progress...");
}
} else // No microphone
{
//Print a red "Microphone not connected!" message at the center of the screen
GUI.contentColor = Color.red;
GUI.Label(new Rect(Screen.width / 2 - 100, Screen.height / 2 - 25, 200, 50), "Microphone not connected!");
}
}
public string HttpUploadFile(string url, string file, string paramName, string contentType) {
System.Net.ServicePointManager.ServerCertificateValidationCallback += (o, certificate, chain, errors) => true;
Debug.Log(string.Format("Uploading {0} to {1}", file, url));
Byte[] bytes = File.ReadAllBytes(file);
String file64 = Convert.ToBase64String(bytes,
Base64FormattingOptions.None);
Debug.Log(file64);
try {
var httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.ContentType = "application/json";
httpWebRequest.Method = "POST";
using (var streamWriter = new StreamWriter(httpWebRequest.GetRequestStream())) {
string json = "{ \"config\": { \"languageCode\" : \"en-US\" }, \"audio\" : { \"content\" : \"" + file64 + "\"}}";
Debug.Log(json);
streamWriter.Write(json);
streamWriter.Flush();
streamWriter.Close();
}
var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Debug.Log(httpResponse);
using (var streamReader = new StreamReader(httpResponse.GetResponseStream())) {
var result = streamReader.ReadToEnd();
Debug.Log("Response:" + result);
}
}
catch (WebException ex) {
var resp = new StreamReader(ex.Response.GetResponseStream()).ReadToEnd();
Debug.Log(resp);
}
return "empty";
}
}
I am getting back the correct console message with the JSON result. I just need to get the "transcript" value into a String. Here is a sample of the response from the Google API.
Response:{
"results": [
{
"alternatives": [
{
"transcript": "this is a test",
"confidence": 0.98762906
}
]
}
]
}
The actual creator of the SimpleJSON framework responded to someone with a similar issue a few years back, but when I try to implement a similar fix, I get errors because my response is singular.
https://answers.unity.com/questions/1443367/get-json-array-object-string-value.html
I would appreciate anyone help or guidance with this. I have been looking online for a few days trying to get this working, and asked co-workers (who couldn't help me due to their inexperience with C#) before posting here.
Newtonsoft is a better choice and I'll walk you through how to use it.
First, create the C# classes you'll need to hold the result of the parsing. In your example, they will look like this:
public class SpeechResponse
{
public Result[] results { get; set; }
}
public class Result
{
public Alternative[] alternatives { get; set; }
}
public class Alternative
{
public string transcript { get; set; }
public float confidence { get; set; }
}
You already know how to obtain the JSON data, so let's assume it's been saved in String json
. You can turn the string into the C# classes with this command:
var response = JsonConvert.DeserializeObject<SpeechResponse>( json );
And the specific piece of data that you're looking for can be obtained like this:
string phrase = response.results[0].alternatives[0].transcript;
Bonus Tip
If you're using Visual Studio, you can easily create the class definitions by copying the JSON example data and selecting "Edit -> Paste Special -> Paste JSON as Classes" (Read More).