I am making speech to text app in C sharp window form. it was working fine and running in visual studio but
I'm using this code to recognized using Microsoft Azure Cognitive Services. Once the whole thing has been recognized can I get a confidence score in c sharp window form?
How can I solve this?
My code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System.IO;
using System.Threading;
namespace WindowsFormsApp2
{
public partial class Form1 : Form
{
private bool isRecognizing = false;
private SpeechRecognizer recognizer;
public Form1()
{
InitializeComponent();
}
private void initRecognizer()
{
SpeechConfig config = SpeechConfig.FromSubscription("key", "region");
if (Properties.Settings.Default.Punctuation)
{
config.SetServiceProperty("punctuation", "explicit", ServicePropertyChannel.UriQueryParameter);
}
//AudioConfig audioConfig = AudioConfig.FromMicrophoneInput();
recognizer = new SpeechRecognizer(config/*, audioConfig*/);
recognizer.Recognized += SpeechRecognizer_Recognized;
}
private void Form1_Load(object sender, EventArgs e)
{
initRecognizer();
}
private void SpeechRecognizer_Recognized(object sender, SpeechRecognitionEventArgs e)
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
if (e.Result.Text.ToLower().Equals("new line") || e.Result.Text.ToLower().Equals("newline"))
{
SendKeys.SendWait(Environment.NewLine);
}
else
{
SendKeys.SendWait(e.Result.Text);
}
}
}
private void Startstop()
{
if (isRecognizing)
{
recognizer.StopContinuousRecognitionAsync();
picture_btn.Image = Properties.Resources.green;
startToolStripMenuItem.Text = "Start";
pictureBox1.Enabled = true;
isRecognizing = false;
timer1.Stop();
timer1.Enabled = false;
}
else
{
picture_btn.Image = Properties.Resources.red;
startToolStripMenuItem.Text = "Stop";
pictureBox1.Enabled = false;
recognizer.StartContinuousRecognitionAsync();
isRecognizing = true;
timer1.Interval = 600;
timer1.Start();
timer1.Enabled = true;
}
}
private void pictureBox1_Click(object sender, EventArgs e)
{
Startstop();
}
private void Form1_Move(object sender, EventArgs e)
{
if (this.WindowState == FormWindowState.Normal)
{
ShowInTaskbar = true;
notifyIcon1.Visible = true;
this.Hide();
notifyIcon1.ShowBalloonTip(1000);
}
}
private void Form1_MouseDoubleClick(object sender, MouseEventArgs e)
{
ShowInTaskbar = true;
notifyIcon1.Visible = false;
WindowState = FormWindowState.Normal;
this.WindowState = FormWindowState.Normal;
notifyIcon1.Visible = false;
}
private void exitToolStripMenuItem_Click(object sender, EventArgs e)
{
Application.Exit();
}
void SettingFormClosed(object sender, FormClosedEventArgs e)
{
initRecognizer();
}
private void startToolStripMenuItem_Click(object sender, EventArgs e)
{
Startstop();
}
private void timer1_Tick(object sender, EventArgs e)
{
if (picture_btn.Tag.Equals("red"))
{
picture_btn.Image = Properties.Resources.grey;
picture_btn.Tag = "grey";
}
else
{
picture_btn.Image = Properties.Resources.red;
picture_btn.Tag = "red";
}
}
private void pictureBox1_Click_1(object sender, EventArgs e)
{
var myForm = new Form2();
myForm.FormClosed += SettingFormClosed;
myForm.Show();
}
private void notifyIcon1_MouseDoubleClick(object sender, MouseEventArgs e)
{
this.Show();
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
{
if (e.CloseReason == CloseReason.UserClosing)
{
notifyIcon1.Visible = true;
this.Hide();
e.Cancel = true;
}
}
}
}
You should always read through the documentation of a service before deep diving into it. The documentation will cover important configuration aspect and should detail limitations.
Azure - Cognitive Services - Speech Service - Speech-to-text documentation
This is the landing page for all the standard resources for the speech-to-text service, read through all these to understand how the service was designed to work and to gain code examples for common scenarios.
- Get speech recognition results
this is the guidance specific to theSpeechRecognizer
events andResults
- Recognized offset and duration
This section explains how to get the detailed results.
You need to configure your SpeechRecognizer
instance to return Detailed output:
SpeechConfig config = SpeechConfig.FromSubscription("key", "region");
// Detailed output will include confidence factor
config.OutputFormat = OutputFormat.Detailed;
if (Properties.Settings.Default.Punctuation)
{
config.SetServiceProperty("punctuation", "explicit", ServicePropertyChannel.UriQueryParameter);
}
Best()
extension methodThis is documented in this section: Recognized offset and duration however the general idea is that we call the e.Result.Best()
extension method to retrieve the details about the results.
Make sure you have the following using statement to make the
Best()
method available:using Microsoft.CognitiveServices.Speech;
private void SpeechRecognizer_Recognized(object sender, SpeechRecognitionEventArgs e)
{
if (e.Result.Reason == ResultReason.RecognizedSpeech)
{
if (e.Result.Text.ToLower().Equals("new line") || e.Result.Text.ToLower().Equals("newline"))
SendKeys.SendWait(Environment.NewLine);
else
SendKeys.SendWait(e.Result.Text);
// Get the detailed results
var detailedResults = e.Result.Best();
if (detailedResults != null && detailedResults.Any())
{
// The first item in detailedResults corresponds to the recognized text.
// This is not necessarily the item with the highest confidence number.
var bestResults = detailedResults?.ToList()[0];
Console.WriteLine(String.Format("\tConfidence: {0}\n\tText: {1}\n\tLexicalForm: {2}\n\tNormalizedForm: {3}\n\tMaskedNormalizedForm: {4}",
bestResults.Confidence,
bestResults.Text,
bestResults.LexicalForm,
bestResults.NormalizedForm,
bestResults.MaskedNormalizedForm));
}
}
As an example, you can get this level of information back from the service, I have also enabled the config.RequestWordLevelTimestamps()
to get word-level timestamps: