Search code examples
pythonmultithreadingsqliteqthreadpyside2

Concerned about race conditions while accessing SQLite3 database connection that's accessed in thread invoked by Pynput listener inside a QThread


I'm writing a Windows application with Pyside2. Due to the nature of how I'm using multithreading, I'm having to interact with the same Sqlite3 database in multiple threads. I've created a <100 line Minimal, complete, verifiable example that nearly identically replicates the problem.

The problem: I'm currently using the pynput module to monitor key activity in the background once the PushButton has been pressed, while the Qt GUI is out of focus for a hotkey combination of "j" + "k". Once the hot key combination is pressed, a screenshot is taken, the image is processed via OCR and saved to a database along with the OCR text. The image path is sent through a a series of connected signals to the main GUI thread. The key monitoring happens in another QThread to prevent the key monitoring and image processing from affecting the main Qt event loop from running. Once the QThread starts and emits it's start signal I call the monitor_for_hot_key_combo function in the key_monitor instance which instantiates listeneras a threading.Thread, which is assigned key_monitor member functions on_release and on_press as callbacks, which are called every time a key is pressed.

This is where the problem lies. Those callbacks interact with the imageprocessing_obj instance of the image_processclass in a different thread than the class was instantiated in. Therefore, when image_process member functions are interacted with that use the SQlite database, they do so in a separate thread than the database connection was created in. Now, SQLite "can be safely used by multiple threads provided that no single database connection is used simultaneously in two or more threads". To allow this you have to set the check_same_thread argument for sqlite3.connect() to False. However, Id rather avoid this mulithreaded access of the database if possible to prevent undefined behavior.

The Possible solution: I've been wondering if the two threads, both threading.Thread and QThread aren't necessary and it can all be done within the Pynput thread. However, I can't seem to figure out how to just use the Pynput thread while still being able to send signals back to the main Qt event loop.

qtui.py

from PySide2 import QtCore, QtWidgets
from PySide2.QtCore import *
import HotKeyMonitor

class Ui_Form(object):
    def __init__(self):
        self.worker = None

    def setupUi(self, Form):
        Form.setObjectName("Form")
        Form.resize(400, 300)
        self.pressbutton = QtWidgets.QPushButton(Form)
        self.pressbutton.setObjectName("PushButton")
        self.pressbutton.clicked.connect(self.RunKeyMonitor)
        self.retranslateUi(Form)
        QtCore.QMetaObject.connectSlotsByName(Form)

    def retranslateUi(self, Form):
        Form.setWindowTitle(QtWidgets.QApplication.translate("Form", "Form", None, -1))
        self.pressbutton.setText(QtWidgets.QApplication.translate("Form", "Press me", None, -1))

    def RunKeyMonitor(self):
        self.Thread_obj = QThread()
        self.HotKeyMonitor_Obj = HotKeyMonitor.key_monitor()
        self.HotKeyMonitor_Obj.moveToThread(self.Thread_obj)
        self.HotKeyMonitor_Obj.image_processed_km.connect(self.print_OCR_result)
        self.Thread_obj.started.connect(self.HotKeyMonitor_Obj.monitor_for_hotkey_combo)
        self.Thread_obj.start()

    def print_OCR_result(self, x):
        print("Slot being called to print image path string")
        print(x)
if __name__ == "__main__":
    import sys
    app = QtWidgets.QApplication(sys.argv)
    Form = QtWidgets.QWidget()
    ui = Ui_Form()
    ui.setupUi(Form)
    Form.show()
    sys.exit(app.exec_())

HotKeyMonitor.py

from pynput import keyboard
from PySide2.QtCore import QObject, Signal

import imageprocess
class key_monitor(QObject):
    image_processed_km = Signal(str)
    def __init__(self):
        super().__init__()
        self.prev_key = None
        self.listener = None
        self.imageprocessing_obj = imageprocess.image_process()
        self.imageprocessing_obj.image_processed.connect(self.image_processed_km.emit)


    def on_press(self,key):
        pass

    def on_release(self,key):
        if type(key) == keyboard._win32.KeyCode:

            if key.char.lower() == "j":
                self.prev_key = key.char.lower()
            elif key.char.lower() == "k" and self.prev_key == "j":
                print("key combination j+k pressed")
                self.prev_key = None
                self.imageprocessing_obj.process_image()
        else:
            self.prev_key = None

    def stop_monitoring(self):
        self.listener.stop()

    def monitor_for_hotkey_combo(self):
        with keyboard.Listener(on_press=self.on_press, on_release = self.on_release) as self.listener:self.listener.join()

imageprocess.py

import uuid,os,sqlite3,pytesseract
from PIL import ImageGrab
from PySide2.QtCore import QObject, Signal

class image_process(QObject):
    image_processed = Signal(str)
    def __init__(self):
        super().__init__()
        self.screenshot = None
        self.db_connection = sqlite3.connect("testdababase.db", check_same_thread=False)
        self.cursor = self.db_connection.cursor()
        self.cursor.execute("CREATE TABLE IF NOT EXISTS testdb (OCRstring text, filepath text)")

    def process_image(self):
        self.screenshot = ImageGrab.grab()
        self.screenshot_path =  os.getcwd() + "\\" + uuid.uuid4().hex + ".jpg"
        self.screenshot.save(self.screenshot_path )
        self.ocr_string = pytesseract.image_to_string(self.screenshot)
        self.cursor.execute("INSERT INTO testdb (OCRstring, filepath) VALUES (?,?)",(self.ocr_string, self.screenshot_path))
        self.image_processed.emit(self.screenshot_path)

Solution

  • First of all a QThread is not a Qt thread, that is, it is not a new type of thread, QThread is a class that manages the native threads of each platform. so the thread that handles QThread has the same characteristics of threading.Thread.

    On the other hand the goal of using threads in a GUI is not to block the main thread called GUI thread, in your pynput it already has its thread so there would be no problems. The other task that is blocking is that of the OCR, so we must execute it in a new thread. The task of the database is not expensive, so it is not necessary to create a thread.

    keymonitor.py

    from pynput import keyboard
    import time
    from PySide2 import QtCore
    
    class KeyMonitor(QtCore.QObject):
        letterPressed = QtCore.Signal(str)
    
        def __init__(self, parent=None):
            super().__init__(parent)
            self.listener = keyboard.Listener(on_release = self.on_release)
    
        def on_release(self,key):
            if type(key) == keyboard._win32.KeyCode:
                self.letterPressed.emit(key.char.lower())
    
        def stop_monitoring(self):
            self.listener.stop()
    
        def start_monitoring(self):
            self.listener.start()
    

    imageprocess.py

    import uuid
    import pytesseract
    
    from PIL import ImageGrab
    
    from PySide2 import QtCore
    
    class ProcessWorker(QtCore.QObject):
        processSignal = QtCore.Signal(str, str)
    
        def doProcess(self):
            screenshot = ImageGrab.grab()
            screenshot_path =  QtCore.QDir.current().absoluteFilePath(uuid.uuid4().hex+".jpg")
            screenshot.save(screenshot_path )
            print("start ocr")
            ocr_string = pytesseract.image_to_string(screenshot)
            print(ocr_string, screenshot_path)
            self.processSignal.emit(ocr_string, screenshot_path)
            self.thread().quit()
    

    main.py

    from keymonitor import KeyMonitor
    from imageprocess import ProcessWorker
    from PySide2 import QtCore, QtWidgets
    
    import sqlite3
    
    class Widget(QtWidgets.QWidget):
        def __init__(self, parent=None):
            super().__init__(parent)
    
            self.last_letter = ""
            self.current_letter = ""
    
            lay = QtWidgets.QVBoxLayout(self)
            button = QtWidgets.QPushButton("Start")
            button.clicked.connect(self.onClicked)
            lay.addWidget(button)
    
            self.keymonitor = KeyMonitor()
            self.keymonitor.letterPressed.connect(self.onLetterPressed)
    
            self.db_connection = sqlite3.connect("testdababase.db")
            self.cursor = self.db_connection.cursor()
            self.cursor.execute("CREATE TABLE IF NOT EXISTS testdb (OCRstring text, filepath text)")
            self.threads = []
    
        def onClicked(self):
            self.keymonitor.start_monitoring()
    
        def onLetterPressed(self, letter):
            if self.last_letter:
                if self.current_letter:
                    self.last_letter = self.current_letter
                self.current_letter = letter
            else:
                self.last_letter = letter
    
            if self.last_letter == "j" and self.current_letter == "k":
                print("j+k")
                self.start_processing()
    
        def start_processing(self):
            thread = QtCore.QThread()
            self.worker = ProcessWorker()
            self.worker.processSignal.connect(self.onProcessSignal)
            self.worker.moveToThread(thread)
            thread.started.connect(self.worker.doProcess)
            thread.finished.connect(self.worker.deleteLater)
            thread.finished.connect(lambda th=thread: self.threads.remove(th))
            thread.start()
            self.threads.append(thread)
    
        def onProcessSignal(self, ocr, path):
            print(ocr, path)
            self.cursor.execute("INSERT INTO testdb (OCRstring, filepath) VALUES (?,?)",(ocr, path))
            self.db_connection.commit()
    
    if __name__ == "__main__":
        import sys
        app = QtWidgets.QApplication(sys.argv)
        w = Widget()
        w.show()
        sys.exit(app.exec_())