python speech-recognition voice google-cloud-speech

How to make voice assistant wait for a command

I'm building my personal voice assistant on Python.

Here is the code

import pyttsx3
import speech_recognition as sr
import sys
import subprocess

engine = pyttsx3.init()

en_voice_id_m = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0"
en_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0"
gb_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0"

voices = engine.getProperty('voices')
engine.setProperty('voice', en_voice_id_f)
engine.setProperty('rate', 195)
#engine.say("Hello. I'm Elsi, your voice assistant. I can do anything u want")
engine.runAndWait()
while True:
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Say something!")
        audio = r.listen(source)
        if r.recognize_google(audio).lower() == 'hello':
            engine = pyttsx3.init()
            engine.say("Hi! How are you?")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'who are you':
            engine = pyttsx3.init()
            engine.say("I am Elsi, voice assistant")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'what can you do':
            engine = pyttsx3.init()
            engine.say("I can turn on film or music, open application and that's all :)")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'how can I call you?':
            engine = pyttsx3.init()
            engine.say("You can call me Elsi")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'stop':
            engine = pyttsx3.init()
            engine.say("Turning off")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'exit':
            engine = pyttsx3.init()
            engine.say("Goodbye ;)")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'turn off':
            engine = pyttsx3.init()
            engine.say("One moment please...")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'telegram':
            engine = pyttsx3.init()
            engine.say("One moment")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'open telegram':
            engine = pyttsx3.init()
            engine.say("Opening....")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'Elsi open telegram':
            engine = pyttsx3.init()
            engine.say("Yes sir")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'viber':
            engine = pyttsx3.init()
            engine.say("One moment")
            engine.runAndWait()
            subprocess.check_output(viber, shell=True)
        elif r.recognize_google(audio).lower() == 'open viber':
            engine = pyttsx3.init()
            engine.say("Opening....")
            engine.runAndWait()
            subprocess.check_output(viber, shell=True)
        else:
            engine = pyttsx3.init()
            engine.say("Didn't catch it, repeat please")
            engine.runAndWait()

When I run the program and sit silently it gives this error -

    Traceback (most recent call last):
      File "C:\Users\___\!Python!\Elsi\version#2.py", line 38, in <module>
    
    if r.recognize_google(audio).lower() == 'hello':


File "C:\Users\___\AppData\Local\Programs\Python\Python36-32\lib\site-packages\speech_recognition\__init__.py", line 858, in recognize_google

        if not isinstance(actual_result, dict) or len(actual_result.get("alternative", [])) == 0: raise UnknownValueError()
    speech_recognition.UnknownValueError

This is my tries of doing this -

r = sr.Recognizer()
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, 1)
    audio = r.listen(source, 1)
    recognize_sphinx()  offline
    while r.recognize_google(audio).lower() != 'Elsi' or 'Elsea' or 'Elsa' or 'Elsia' or 'Chelsea':

This gives the same error.

Solution

Remove all if, elif, and else conditions associated with recognizing a phrase.
Place phrases in a dict as the key. The value can be a list, where index 0 is the text response and index 2 is a command ('exit', viber, telegram) or None.
- Use the .get() method to retrieve values or return unknown_command_phrase if there is no key match.
r.recognize_google(audio).lower() to the variable pattern
Add a function to deal with the response from the speech recognition engine.
- Real Python: The Ultimate Guide To Speech Recognition With Python for additional resources.
  - Working With Microphones
  - Putting It All Together

import pyttsx3
import speech_recognition as sr
import sys
import subprocess


# This function is from Real Python: https://realpython.com/python-speech-recognition/#putting-it-all-together-a-guess-the-word-game
def recognize_speech_from_mic(recognizer, microphone) -> dict:
    """Transcribe speech from recorded from `microphone`.

    Returns a dictionary with three keys:
    "success": a boolean indicating whether or not the API request was successful
    "error":   `None` if no error occured, otherwise a string containing an error message if the API could not be reached or speech was unrecognizable
    "transcription": `None` if speech could not be transcribed, otherwise a string containing the transcribed text
    """
    # check that recognizer and microphone arguments are appropriate type
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # adjust the recognizer sensitivity to ambient noise and record audio from the microphone
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    # set up the response object
    response = {"success": True,
                "error": None,
                "transcription": None}

    # try recognizing the speech in the recording if a RequestError or UnknownValueError exception is caught, update the response object accordingly
    try:
        response["transcription"] = recognizer.recognize_google(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response


my_phrases = {'hello': ['Hi!, How are you?', None],
              'who are you': ['I am Elsi, voice assistant', None],
              'what can you do': ["I can turn on film or music, open application and that's all :)", None],
              'how can I call you?': ['You can call me Elsi', None],
              'stop': ['Turning off', 'exit'],
              'exit': ['Goodbye ;)', 'exit'],
              'turn off': ['One moment please...', 'exit'],
              'telegram': ['One moment', telegram],
              'open telegram': ['Opening....', telegram],
              'Elsi open telegram': ['Yes sir', telegram],
              'viber': ['One moment', viber],
              'open viber': ['Opening....', viber]}

unknown_command_phrase = ["Didn't catch it, repeat please", None]

engine = pyttsx3.init()

en_voice_id_m = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0"
en_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0"
gb_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0"

voices = engine.getProperty('voices')
engine.setProperty('voice', en_voice_id_f)
engine.setProperty('rate', 195)
#engine.say("Hello. I'm Elsi, your voice assistant. I can do anything u want")
while True:
    engine.runAndWait()
    
    # create recognizer and mic instances
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()
    print("Say something!")
    # call function
    response = recognize_speech_from_mic(recognizer, microphone)
    pattern = response['transcription']  # get transcription from response dict
    say, command = my_phrases.get(pattern, unknown_command_phrase)  # retrieve response from my_phrases
    engine = pyttsx3.init()
    engine.say(say)
    if command == None:
        print(f'The response returned by the speech recognition engine was:\n{pattern}.\n')
        pass
    elif command == 'exit':
        sys.exit()
    else: 
        subprocess.check_output(command, shell=True)  # assumes you have these properly configured
        pass