I'm working on an application that uses the Microsoft Cognitive services Speech-to-Text API. I'm trying to create a GUI where the transcribed text should show up in a textbox once the start button is pushed and the transcription is stopped once a stop-button is pressed. I'm pretty new to creating GUI's and have been using PyQt5. I have divided the application according to MVC (Model-View-Controller). The code for the GUI is as follows:
import sys
import time
from functools import partial
import azure.cognitiveservices.speech as speechsdk
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtGui import *
class test_view(QMainWindow):
def __init__(self):
super().__init__()
self.generalLayout = QVBoxLayout()
self._centralWidget = QWidget(self)
self.setCentralWidget(self._centralWidget)
self._centralWidget.setLayout(self.generalLayout)
self._createApp()
def _createApp(self):
self.startButton = QPushButton('Start')
self.stopButton = QPushButton('Stop')
buttonLayout = QHBoxLayout()
self.startButton.setFixedWidth(220)
self.stopButton.setFixedWidth(220)
buttonLayout.addWidget(self.startButton)
buttonLayout.addWidget(self.stopButton)
self.text_box = QTextEdit()
self.text_box.setReadOnly(True)
self.text_box.setFixedSize(1500, 400)
layout_text = QHBoxLayout()
layout_text.addWidget(self.text_box)
layout_text.setAlignment(Qt.AlignCenter)
self.generalLayout.addLayout(buttonLayout)
self.generalLayout.addLayout(layout_text)
def appendText(self, text):
self.text_box.append(text)
self.text_box.setFocus()
def clearText(self):
return self.text_box.setText('')
class test_ctrl:
def __init__(self, view):
self._view = view
def main():
application = QApplication(sys.argv)
view = test_view()
view.showMaximized()
test_ctrl(view=view)
sys.exit(application.exec_())
if __name__ == "__main__":
main()
The Speech-to-Text Transcribe code is:
import azure.cognitiveservices.speech as speechsdk
import time
def setupSpeech():
speech_key, service_region = "speech_key", "service_region"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)
return speech_recognizer
def main():
speech_recognizer = setupSpeech()
done = False
def stop_cb(evt):
print('CLOSING on {}'.format(evt))
speech_recognizer.stop_continuous_recognition()
nonlocal done
done = True
all_results = []
def handle_final_result(evt):
all_results.append(evt.result.text)
speech_recognizer.recognizing.connect(lambda evt: print(evt))
speech_recognizer.recognized.connect(handle_final_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
print(all_results)
if __name__ == "__main__":
main()
I know for sure that both of the pieces of code work, but I'm not sure how to build the speech-to-text code into the MVC code. I think it should work with a model and it should be connected through the controller to the view. I tried doing this in multiple ways but I just can't figure it out. I also figured I need some kind of threading to keep the code from freezing the GUI. I hope someone can help me with this.
You need to replace this part
print(all_results)
and push all_results asynchronously to ur code for processing the text.
If not, expose a button in the UI to invoke the speech_recognizer.start_continuous_recognition() as a separate function and pick the results to process. This way you can avoid freezing the UI