Search code examples
pythonpython-3.xtext-to-speech

Pyttsx3 callbacks not triggering when using threading


I have used threading as a way to get around the "run and wait" default functionality of pyttsx3 so I can interrupt something being said mid speech. However, this blocks the callbacks of the library so I cannot detect when a block of text is finished being said -I have to guess at it using length in seconds.

The callback I wish to detect is finished-utterance, which is connected with self.engine.connect('finished-utterance', self.onEnd). This follows the example given in the documentation. When I use this in my real program, one text block can be hundreds of words long and the next can be only a few words long. I want to know when one is done being said so the program can automatically advance to the next "say" function.

I have read that one person successfully got around this by using multiprocessing but I dont know how to do this. Is there a way to get callbacks to work with threading? I am using Windows 10

from threading import Event, Thread
import pyttsx3

class Voice(object):
    def __init__(self, skip, play_beep):
        self.t = None
        self._running = False
        self.engine = pyttsx3.init()
        self.skip = skip
        self.engine.connect('finished-utterance', self.onEnd)

    def onEnd(self, name, completed):
        print('finishing : ', name, completed)
        self.stop()

    def on_finished_utterance(self, name, completed):
        print('END')
        t = Thread(target=self.killme, args=(self.engine), daemon=True)
        t.start()

    def process_speech(self, text):
        self.engine.say(str(text))
        self.engine.startLoop(False)
        while self._running:
            self.engine.iterate()

    def say(self, text, length=2):
        # check if thread is running
        if self.t and self._running:
            # stop it if it is
            self.stop()
        # iterate speech in a thread

        self.t = Thread(target=self.process_speech, args=(text,), daemon=True)
        self._running = True
        self.t.start()

        elapsed_seconds = 0
        poll_interval = .1
        while not self.skip.is_set() and elapsed_seconds < length:
            self.skip.wait(poll_interval)
            elapsed_seconds += poll_interval

    def stop(self):
        self._running = False
        try:
            self.engine.endLoop()
        except:
            pass
        try:
            self.t.join()
        except Exception as e:
            pass

skip = Event()
myVoice = Voice(skip, 0)
myVoice.say("test", 2)
myVoice.say("test two", 2)

Solution

  • Question: multiple say() to be fired / end based on the event 'finished-utterance'

    This implementation are based on running-a-driver-event-loop from pyttsx3.readthedocs.io.

    Note: The TTSThread starts at instantiating and is running forever!.
    You have to call .terminate() at EXIT __main__ to prevent waiting forevever!

    # TTS.py
    import threading, time, pyttsx3
    
    
    class TTSThread(threading.Thread):
        def __init__(self, rate=115, event=None):
            super().__init__()
    
            if event:
                setattr(self, event, threading.Event())
    
            self._cancel = threading.Event()
            self.rate = rate
            self.engine = None
    
            self._say = threading.Event()
            self._text_lock = threading.Lock()
            self._text = []
    
            self._is_alive = threading.Event()
            self._is_alive.set()
            self.start()
    
        def _init_engine(self, rate):
            engine = pyttsx3.init()
            engine.setProperty('rate', rate)  # setting up new voice rate    
            engine.connect('finished-utterance', self._on_completed)
            engine.connect('started-word', self._on_cancel)
            return engine
    
        def say(self, text, stop=None):
            if self._is_alive.is_set():
                self._cancel.clear()
    
                if isinstance(text, str):
                    text = [(text, stop)]
    
                if isinstance(text, (list, tuple)):
                    for t in text:
                        if isinstance(t, str):
                            t = t, None
    
                        with self._text_lock:
                            self._text.append(t)
    
                        self._say.set()
    
        def cancel(self):
            self._cancel.set()
    
        def _on_cancel(self, name, location, length):
            if self._cancel.is_set():
                self.stop()
    
        def stop(self):        
            self.engine.stop()
            time.sleep(0.5)
            self.engine.endLoop()
    
        def _on_completed(self, name, completed):
            if completed:
                self.engine.endLoop()
                self.on_finished_utterance(name, completed)
    
        def on_finished_utterance(self, name, completed):
            pass
    
        def terminate(self):
            self._is_alive.clear()
            self._cancel.set()
            self.join()
    
        def run(self):
            self.engine = engine = self._init_engine(self.rate)
            while self._is_alive.is_set():
                while self._say.wait(0.1):
                    self._say.clear()
    
                    while not self._cancel.is_set() and len(self._text):
                        with self._text_lock:
                            engine.say(*self._text.pop(0))
                        engine.startLoop()
    

    Usage 1: Pass multiple sentences at once and auto run enging.say(...) for all sentences.

    from TTS import TTSThread
    
    SAY = ["Use your head to save your feet.", "Time will tell.", "Strike while the iron is hot."]
    
    class Voice(TTSThread):
        def __init__(self):
            super().__init__(rate=115)
    
    if __name__ == "__main__":
        voice = Voice()
        voice.say(SAY)
    
        # Simulate __main__.is_alive
        count = 0.0
        while True:
            time.sleep(0.1)
            count += 1
    
            if count >= 100:
                voice.terminate()
                break
    
    print('EXIT __main__'.format())
    

    Usage 2: Pass one sentence after the other, dependent at event 'finished-utterance'.

    from TTS import TTSThread
    import time
    
    class Voice(TTSThread):
        def __init__(self):
            self.completed = None
            super().__init__(rate=115, event='completed')
    
        def on_finished_utterance(self, name, completed):
            """
            Overloads `TTSThread.on_finished_utterance`
            which is connected to event 'finished-utterance'
            """
            if len(SAY):
                print('finishing[{}], delay next sentence {} sec.'.format(count, 1.5))
                time.sleep(1.5)
                self.completed.set()
            else:
                print('finishing')
    
    
    if __name__ == "__main__":
        voice = Voice()
    
        # Start simulation, `say(...)` while __main__ is running
        voice.completed.set()
    
        _terminate = 100
        count = 0.0
    
        while True:
            time.sleep(0.1)
            count += 1
    
            if voice.completed.is_set():
                voice.completed.clear()
    
                if len(SAY):
                    print('.say add[{}]: "{}..."'.format(count, SAY[0][:10]))
                    voice.say(SAY.pop(0))
    
            if count % 20 == 0:
                print('__main__ {}'.format(count))
    
            if count >= _terminate:
                voice.terminate()
    
    print('EXIT __main__'.format())
    

    Output:

    .say add[1.0]: "Use your h..."
    finishing[18.0], delay next sentence 1.5 sec.
    __main__ 20.0
    .say add[34.0]: "Time will ..."
    __main__ 40.0
    finishing[51.0], delay next sentence 1.5 sec.
    __main__ 60.0
    .say add[67.0]: "Strike whi..."
    __main__ 80.0
    finishing
    __main__ 100.0
    EXIT __main__
    

    Tested with Python: 3.5 - pyttsx3 Version: 2.71