I've recently tried my hand at a simple search engine using TTS. However, I tried integrating Speech to text for the search field in Pysimple GUI and I have hit a road block.
I'm able to get the pysimpleGUI to recognize the words that I say, and search the results.However, the search engine mechanism doesn't recognize my speech to text as the value that I input and only gives me back the first letter that I used in my speech to text.
For example, I said "What is the weather like today" and it returned me back the definition of the letter "W"
These are the packages that I used:
import wolframalpha
import wikipedia
import speech_recognition as sr
r = sr.Recognizer()
m = sr.Microphone()
import PySimpleGUI as sg
import pyttsx3
layout = [ [sg.Text('Welcome Back Sir')],
[sg.Text('How can I be of assistance'), sg.InputText()],
[sg.ReadButton('Speak'), sg.Button('Ok'), sg.Button('Cancel')]]
window = sg.Window('Pybot', layout)
engine = pyttsx3.init()
while True:
event, values = window.read()
if event in (None, 'Cancel'):
break
if event == 'Speak':
with m as source:
r.adjust_for_ambient_noise(source)
audio = r.listen(source)
values = r.recognize_google(audio, language='en-US')
print(values)
try:
wiki_res = wikipedia.summary(values[0], sentences=2)
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking("Wolfram Result: "+wolfram_res,"Wikipedia Result: "+wiki_res)
except wikipedia.exceptions.DisambiguationError:
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking(wolfram_res)
except wikipedia.exceptions.PageError:
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking(wolfram_res)
except:
wiki_res = wikipedia.summary(values[0], sentences=2)
engine.say(wiki_res)
sg.PopupNonBlocking(wiki_res)
engine.runAndWait()
print (values[0])
window.close()
You use same variable values
, so values[0]
is w
.
while True:
event, values = window.read()
...
values = r.recognize_google(audio, language='en-US')
...
revised code
while True:
event, values = window.read()
if event in (None, 'Cancel'):
break
# print(event, values)
if event == 'Speak':
with m as source:
r.adjust_for_ambient_noise(source)
audio = r.listen(source)
value = r.recognize_google(audio, language='en-US')
print(value)
window[0].update(value)
window.write_event_value('Ok', '')
elif event == 'Ok':
if values[0] == '':
continue
try:
wiki_res = wikipedia.summary(values[0], sentences=2)
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking("Wolfram Result: "+wolfram_res,"Wikipedia Result: "+wiki_res)
except wikipedia.exceptions.DisambiguationError:
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking(wolfram_res)
except wikipedia.exceptions.PageError:
wolfram_res = next(client.query(values[0]).results).text
engine.say(wolfram_res)
sg.PopupNonBlocking(wolfram_res)
except:
wiki_res = wikipedia.summary(values[0], sentences=2)
engine.say(wiki_res)
sg.PopupNonBlocking(wiki_res)
engine.runAndWait()
window.close()
It will block when audio processing, maybe you can try multi-threaded programming, otherwise GUI will no response sometime.