Hi so essentially I'm writing an application that should provide a GUI along with speech recognition commands and the program should answer in TTS. I wrote a little test program because I wanted to learn threading with pyQt5 as it is needed to keep the GUI responsive - that's my understanding so far and it seems to work unless it tries to TTS.
Now I have the problem that as long as I don't TTS the input, everything works fine. But with pyttsx3, the .runAndWait() function exits the execution of my code. This is the code in question: (The GUI has the slider to check if the threading works)
import sys
import speech_recognition as sr
import pyttsx3
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
recognizer = sr.Recognizer()
speaker = pyttsx3.init()
voices = speaker.getProperty('voices')
# speaker.setProperty('voice', voices[33].id)
# speaker.setProperty('rate', 190)
class DlgMain(QDialog):
def __init__(self):
super().__init__()
self.setWindowTitle("Test")
self.btnStart = QPushButton("Start")
self.btnStart.clicked.connect(self.evt_btnStart_clicked)
self.dial = QSlider()
self.lcd = QLCDNumber()
self.dial.valueChanged.connect(self.lcd.display)
self.lytLCD = QHBoxLayout()
self.lytLCD.addWidget(self.dial)
self.lytLCD.addWidget(self.lcd)
self.lytMain = QVBoxLayout()
self.lytMain.addWidget(self.btnStart)
self.lytMain.addLayout(self.lytLCD)
self.setLayout(self.lytMain)
def evt_btnStart_clicked(self):
# In this function create an instance of the worker class
self.worker = WorkerThread()
self.worker.start()
# Catching our own "update" signal
#self.worker.update_progress.connect(self.evt_update_progress)
# Whatever should be run in the thread must now run in the worker class!
class WorkerThread(QThread):
# Create our own signal to send current info to GUI
#update_progress = pyqtSignal(int)
def run(self):
global recognizer
while True:
try:
with sr.Microphone(device_index=0) as source:
recognizer.adjust_for_ambient_noise(source, duration=.2)
print("Listening...")
recognizer.energy_threshold = 4000
audio = recognizer.listen(source)
print("Recognizing...")
message = recognizer.recognize_google(audio).lower()
speaker.say(message)
speaker.runAndWait()
# print(message)
except sr.UnknownValueError:
recognizer = sr.Recognizer()
if __name__ == '__main__':
app = QApplication(sys.argv)
dlgMain = DlgMain()
dlgMain.show()
sys.exit(app.exec_())
Do I have to give the TTS its own thread? Or can I somehow solve this with a pyqtSignal? I'm really not sure what to even search for and so far have not found a similar question.
I hope someone can help me. Thanks in advance!
Ok, I was able to work around the problem myself. It was in fact the runAndWait() function from pyttsx3 that was breaking the program. Instead, I now use a combination of gTTS, pydub, soundfile, playsound, and pyrubberband.
The speak function now looks like this:
from gtts import gTTS
from pydub import AudioSegment
import playsound
import soundfile as sf
import pyrubberband as pyrb
def speakMsg(message):
# Write the message to mp3
tts = gTTS(text=message, lang="en")
tts.save("clear_msg.mp3")
# Change the file to wav format so it can be edited
sound = AudioSegment.from_mp3("clear_msg.mp3")
sound.export("clear_msg.wav", format="wav")
# Make playback faster (and pitch it down)
data, samplerate = sf.read("clear_msg.wav")
data_stretch = pyrb.time_stretch(data, samplerate, 1.4)
data_shift = pyrb.pitch_shift(data_stretch, samplerate, 0)
# Safe it as new file and play it.
sf.write("edit_msg.wav", data_shift, samplerate, format='wav')
playsound.playsound("edit_msg.wav")
What looks super unnecessarily complicated is in fact surprisingly performant! And also the program now works :D You could do this with gTTS only but I figured the gTTS voice was too slow. So I sped it up and lowered the tone so it doesn't get too "chipmunky". That is why I did so much extra!