Search code examples
pythonspeech-recognitionvoicegoogle-cloud-speech

How to make voice assistant wait for a command


I'm building my personal voice assistant on Python.

Here is the code

import pyttsx3
import speech_recognition as sr
import sys
import subprocess

engine = pyttsx3.init()

en_voice_id_m = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0"
en_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0"
gb_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0"

voices = engine.getProperty('voices')
engine.setProperty('voice', en_voice_id_f)
engine.setProperty('rate', 195)
#engine.say("Hello. I'm Elsi, your voice assistant. I can do anything u want")
engine.runAndWait()
while True:
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Say something!")
        audio = r.listen(source)
        if r.recognize_google(audio).lower() == 'hello':
            engine = pyttsx3.init()
            engine.say("Hi! How are you?")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'who are you':
            engine = pyttsx3.init()
            engine.say("I am Elsi, voice assistant")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'what can you do':
            engine = pyttsx3.init()
            engine.say("I can turn on film or music, open application and that's all :)")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'how can I call you?':
            engine = pyttsx3.init()
            engine.say("You can call me Elsi")
            engine.runAndWait()
        elif r.recognize_google(audio).lower() == 'stop':
            engine = pyttsx3.init()
            engine.say("Turning off")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'exit':
            engine = pyttsx3.init()
            engine.say("Goodbye ;)")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'turn off':
            engine = pyttsx3.init()
            engine.say("One moment please...")
            engine.runAndWait()
            sys.exit()
        elif r.recognize_google(audio).lower() == 'telegram':
            engine = pyttsx3.init()
            engine.say("One moment")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'open telegram':
            engine = pyttsx3.init()
            engine.say("Opening....")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'Elsi open telegram':
            engine = pyttsx3.init()
            engine.say("Yes sir")
            engine.runAndWait()
            subprocess.check_output(telegram, shell=True)
        elif r.recognize_google(audio).lower() == 'viber':
            engine = pyttsx3.init()
            engine.say("One moment")
            engine.runAndWait()
            subprocess.check_output(viber, shell=True)
        elif r.recognize_google(audio).lower() == 'open viber':
            engine = pyttsx3.init()
            engine.say("Opening....")
            engine.runAndWait()
            subprocess.check_output(viber, shell=True)
        else:
            engine = pyttsx3.init()
            engine.say("Didn't catch it, repeat please")
            engine.runAndWait()

When I run the program and sit silently it gives this error -

    Traceback (most recent call last):
      File "C:\Users\___\!Python!\Elsi\version#2.py", line 38, in <module>
    
    if r.recognize_google(audio).lower() == 'hello':


File "C:\Users\___\AppData\Local\Programs\Python\Python36-32\lib\site-packages\speech_recognition\__init__.py", line 858, in recognize_google

        if not isinstance(actual_result, dict) or len(actual_result.get("alternative", [])) == 0: raise UnknownValueError()
    speech_recognition.UnknownValueError

This is my tries of doing this -

r = sr.Recognizer()
with sr.Microphone() as source:
    r.adjust_for_ambient_noise(source, 1)
    audio = r.listen(source, 1)
    recognize_sphinx()  offline
    while r.recognize_google(audio).lower() != 'Elsi' or 'Elsea' or 'Elsa' or 'Elsia' or 'Chelsea':
  • This gives the same error.

Solution

    • Remove all if, elif, and else conditions associated with recognizing a phrase.
    • Place phrases in a dict as the key. The value can be a list, where index 0 is the text response and index 2 is a command ('exit', viber, telegram) or None.
      • Use the .get() method to retrieve values or return unknown_command_phrase if there is no key match.
    • r.recognize_google(audio).lower() to the variable pattern
    • Add a function to deal with the response from the speech recognition engine.
    import pyttsx3
    import speech_recognition as sr
    import sys
    import subprocess
    
    
    # This function is from Real Python: https://realpython.com/python-speech-recognition/#putting-it-all-together-a-guess-the-word-game
    def recognize_speech_from_mic(recognizer, microphone) -> dict:
        """Transcribe speech from recorded from `microphone`.
    
        Returns a dictionary with three keys:
        "success": a boolean indicating whether or not the API request was successful
        "error":   `None` if no error occured, otherwise a string containing an error message if the API could not be reached or speech was unrecognizable
        "transcription": `None` if speech could not be transcribed, otherwise a string containing the transcribed text
        """
        # check that recognizer and microphone arguments are appropriate type
        if not isinstance(recognizer, sr.Recognizer):
            raise TypeError("`recognizer` must be `Recognizer` instance")
    
        if not isinstance(microphone, sr.Microphone):
            raise TypeError("`microphone` must be `Microphone` instance")
    
        # adjust the recognizer sensitivity to ambient noise and record audio from the microphone
        with microphone as source:
            recognizer.adjust_for_ambient_noise(source)
            audio = recognizer.listen(source)
    
        # set up the response object
        response = {"success": True,
                    "error": None,
                    "transcription": None}
    
        # try recognizing the speech in the recording if a RequestError or UnknownValueError exception is caught, update the response object accordingly
        try:
            response["transcription"] = recognizer.recognize_google(audio)
        except sr.RequestError:
            # API was unreachable or unresponsive
            response["success"] = False
            response["error"] = "API unavailable"
        except sr.UnknownValueError:
            # speech was unintelligible
            response["error"] = "Unable to recognize speech"
    
        return response
    
    
    my_phrases = {'hello': ['Hi!, How are you?', None],
                  'who are you': ['I am Elsi, voice assistant', None],
                  'what can you do': ["I can turn on film or music, open application and that's all :)", None],
                  'how can I call you?': ['You can call me Elsi', None],
                  'stop': ['Turning off', 'exit'],
                  'exit': ['Goodbye ;)', 'exit'],
                  'turn off': ['One moment please...', 'exit'],
                  'telegram': ['One moment', telegram],
                  'open telegram': ['Opening....', telegram],
                  'Elsi open telegram': ['Yes sir', telegram],
                  'viber': ['One moment', viber],
                  'open viber': ['Opening....', viber]}
    
    unknown_command_phrase = ["Didn't catch it, repeat please", None]
    
    engine = pyttsx3.init()
    
    en_voice_id_m = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0"
    en_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0"
    gb_voice_id_f = "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-GB_HAZEL_11.0"
    
    voices = engine.getProperty('voices')
    engine.setProperty('voice', en_voice_id_f)
    engine.setProperty('rate', 195)
    #engine.say("Hello. I'm Elsi, your voice assistant. I can do anything u want")
    while True:
        engine.runAndWait()
        
        # create recognizer and mic instances
        recognizer = sr.Recognizer()
        microphone = sr.Microphone()
        print("Say something!")
        # call function
        response = recognize_speech_from_mic(recognizer, microphone)
        pattern = response['transcription']  # get transcription from response dict
        say, command = my_phrases.get(pattern, unknown_command_phrase)  # retrieve response from my_phrases
        engine = pyttsx3.init()
        engine.say(say)
        if command == None:
            print(f'The response returned by the speech recognition engine was:\n{pattern}.\n')
            pass
        elif command == 'exit':
            sys.exit()
        else: 
            subprocess.check_output(command, shell=True)  # assumes you have these properly configured
            pass