Pyttsx3 callbacks not triggering when using threading

I have used threading as a way to get around the "run and wait" default functionality of pyttsx3 so I can interrupt something being said mid speech. However, this blocks the callbacks of the library so I cannot detect when a block of text is finished being said -I have to guess at it using length in seconds.

The callback I wish to detect is finished-utterance, which is connected with self.engine.connect('finished-utterance', self.onEnd). This follows the example given in the documentation. When I use this in my real program, one text block can be hundreds of words long and the next can be only a few words long. I want to know when one is done being said so the program can automatically advance to the next "say" function.

I have read that one person successfully got around this by using multiprocessing but I dont know how to do this. Is there a way to get callbacks to work with threading? I am using Windows 10

from threading import Event, Thread
import pyttsx3

class Voice(object):
    def __init__(self, skip, play_beep):
        self.t = None
        self._running = False
        self.engine = pyttsx3.init()
        self.skip = skip
        self.engine.connect('finished-utterance', self.onEnd)

    def onEnd(self, name, completed):
        print('finishing : ', name, completed)
        self.stop()

    def on_finished_utterance(self, name, completed):
        print('END')
        t = Thread(target=self.killme, args=(self.engine), daemon=True)
        t.start()

    def process_speech(self, text):
        self.engine.say(str(text))
        self.engine.startLoop(False)
        while self._running:
            self.engine.iterate()

    def say(self, text, length=2):
        # check if thread is running
        if self.t and self._running:
            # stop it if it is
            self.stop()
        # iterate speech in a thread

        self.t = Thread(target=self.process_speech, args=(text,), daemon=True)
        self._running = True
        self.t.start()

        elapsed_seconds = 0
        poll_interval = .1
        while not self.skip.is_set() and elapsed_seconds < length:
            self.skip.wait(poll_interval)
            elapsed_seconds += poll_interval

    def stop(self):
        self._running = False
        try:
            self.engine.endLoop()
        except:
            pass
        try:
            self.t.join()
        except Exception as e:
            pass

skip = Event()
myVoice = Voice(skip, 0)
myVoice.say("test", 2)
myVoice.say("test two", 2)

Solution

Question: multiple say() to be fired / end based on the event 'finished-utterance'

This implementation are based on running-a-driver-event-loop from pyttsx3.readthedocs.io.

Note: The TTSThread starts at instantiating and is running forever!.
You have to call .terminate() at EXIT __main__ to prevent waiting forevever!

# TTS.py
import threading, time, pyttsx3


class TTSThread(threading.Thread):
    def __init__(self, rate=115, event=None):
        super().__init__()

        if event:
            setattr(self, event, threading.Event())

        self._cancel = threading.Event()
        self.rate = rate
        self.engine = None

        self._say = threading.Event()
        self._text_lock = threading.Lock()
        self._text = []

        self._is_alive = threading.Event()
        self._is_alive.set()
        self.start()

    def _init_engine(self, rate):
        engine = pyttsx3.init()
        engine.setProperty('rate', rate)  # setting up new voice rate    
        engine.connect('finished-utterance', self._on_completed)
        engine.connect('started-word', self._on_cancel)
        return engine

    def say(self, text, stop=None):
        if self._is_alive.is_set():
            self._cancel.clear()

            if isinstance(text, str):
                text = [(text, stop)]

            if isinstance(text, (list, tuple)):
                for t in text:
                    if isinstance(t, str):
                        t = t, None

                    with self._text_lock:
                        self._text.append(t)

                    self._say.set()

    def cancel(self):
        self._cancel.set()

    def _on_cancel(self, name, location, length):
        if self._cancel.is_set():
            self.stop()

    def stop(self):        
        self.engine.stop()
        time.sleep(0.5)
        self.engine.endLoop()

    def _on_completed(self, name, completed):
        if completed:
            self.engine.endLoop()
            self.on_finished_utterance(name, completed)

    def on_finished_utterance(self, name, completed):
        pass

    def terminate(self):
        self._is_alive.clear()
        self._cancel.set()
        self.join()

    def run(self):
        self.engine = engine = self._init_engine(self.rate)
        while self._is_alive.is_set():
            while self._say.wait(0.1):
                self._say.clear()

                while not self._cancel.is_set() and len(self._text):
                    with self._text_lock:
                        engine.say(*self._text.pop(0))
                    engine.startLoop()

Usage 1: Pass multiple sentences at once and auto run enging.say(...) for all sentences.

from TTS import TTSThread

SAY = ["Use your head to save your feet.", "Time will tell.", "Strike while the iron is hot."]

class Voice(TTSThread):
    def __init__(self):
        super().__init__(rate=115)

if __name__ == "__main__":
    voice = Voice()
    voice.say(SAY)

    # Simulate __main__.is_alive
    count = 0.0
    while True:
        time.sleep(0.1)
        count += 1

        if count >= 100:
            voice.terminate()
            break

print('EXIT __main__'.format())

Usage 2: Pass one sentence after the other, dependent at event 'finished-utterance'.

from TTS import TTSThread
import time

class Voice(TTSThread):
    def __init__(self):
        self.completed = None
        super().__init__(rate=115, event='completed')

    def on_finished_utterance(self, name, completed):
        """
        Overloads `TTSThread.on_finished_utterance`
        which is connected to event 'finished-utterance'
        """
        if len(SAY):
            print('finishing[{}], delay next sentence {} sec.'.format(count, 1.5))
            time.sleep(1.5)
            self.completed.set()
        else:
            print('finishing')


if __name__ == "__main__":
    voice = Voice()

    # Start simulation, `say(...)` while __main__ is running
    voice.completed.set()

    _terminate = 100
    count = 0.0

    while True:
        time.sleep(0.1)
        count += 1

        if voice.completed.is_set():
            voice.completed.clear()

            if len(SAY):
                print('.say add[{}]: "{}..."'.format(count, SAY[0][:10]))
                voice.say(SAY.pop(0))

        if count % 20 == 0:
            print('__main__ {}'.format(count))

        if count >= _terminate:
            voice.terminate()

print('EXIT __main__'.format())

Output:

.say add[1.0]: "Use your h..."
finishing[18.0], delay next sentence 1.5 sec.
__main__ 20.0
.say add[34.0]: "Time will ..."
__main__ 40.0
finishing[51.0], delay next sentence 1.5 sec.
__main__ 60.0
.say add[67.0]: "Strike whi..."
__main__ 80.0
finishing
__main__ 100.0
EXIT __main__

Tested with Python: 3.5 - pyttsx3 Version: 2.71