Search code examples
pythonuser-interfacetkinterartificial-intelligencemediapipe

How do I print my MediaPipe model's output on Tkinter GUI? The gestureRecognizerResult prints in terminal and only shows as a NoneType variable


making a sign language interpreter's GUI with a mediapipe model with a ASL dataset from Kaggle. This is my first time working in Tkinter and I'm trying to print the result from the print_result and print it into the result_label label in my tkinter window. I created a result_string var and a get_result function to update that var and then update the button's text but I'm getting an error

WARNING: Logging before InitGoogleLogging() is written to STDERR
AttributeError: type object 'GestureRecognizerResult' has no attribute 'gestures'

I'm using a .task file from my mediapipe model imported in my workspace

UPDATED START FUNCTION:

''' def start(self): # Get a frame from the video source #self.Signing = True while True: ret, frame = self.vid.get_frame()

        if ret:
            #cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
            SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
            image = frame
            img = cv2.resize(image, (SIZE, SIZE))
            flip = cv2.flip(img, 1)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
            result_string = " "

            images = []
            results = []
            BaseOptions = mp.tasks.BaseOptions
            GestureRecognizer = mp.tasks.vision.GestureRecognizer
            GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
            GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
            VisionRunningMode = mp.tasks.vision.RunningMode

            def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                for gesture_category in result.gestures[0]:
                    category_name = gesture_category.category_name
                    return_string = category_name
                return result_string


        
            # Create a gesture recognizer instance with the live stream mode:
            options = GestureRecognizerOptions(
                base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
                running_mode=VisionRunningMode.LIVE_STREAM,
                result_callback=print_result)
            with GestureRecognizer.create_from_options(options) as recognizer:
                # STEP 3: Recognize gestures in the input image.
                recognition_result = recognizer.recognize_async(mp_image, 100000)


                print(format(result_string)) #just to check

            if result_string:
                self.result_label.config(text=result_string)
            else:
                self.result_label.config(text="No gesture detected")

        time.sleep(2)

'''

COMPLETE CODE WITH OLD START FUNCTION:


import tkinter
import cv2
import PIL.Image, PIL.ImageTk
import time
import matplotlib as plt
import cv2
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.tasks.python import text 
from mediapipe.tasks import python
from mediapipe.framework.formats import landmark_pb2

class App:
     def __init__(self, window, window_title, video_source=0):
        self.window = window
        self.window.title(window_title)
        frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
        frame.pack()
        self.video_source = video_source
        #self.Signing = False
 
         # open video source (by default this will try to open the computer webcam)
        self.vid = MyVideoCapture(self.video_source)
 
         # Create a canvas that can fit the above video source size
        self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
        self.canvas.pack()
 
         # Button that lets the user take a snapshot
        self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
        
        self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
        

        self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
      
        self.result_label.pack(anchor=tkinter.CENTER,expand=True)
        self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
        self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)

         # After it is called once, the update method will be automatically called every delay milliseconds
        self.delay = 60
        self.update()
        self.window.mainloop()
 
     def start(self):
        # Get a frame from the video source
        #self.Signing = True
        while True:
            ret, frame = self.vid.get_frame()

            if ret:
                #cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
                SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
                image = frame
                img = cv2.resize(image, (SIZE, SIZE))
                flip = cv2.flip(img, 1)
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
                result_string = " "

                images = []
                results = []
                BaseOptions = mp.tasks.BaseOptions
                GestureRecognizer = mp.tasks.vision.GestureRecognizer
                GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
                GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
                VisionRunningMode = mp.tasks.vision.RunningMode

                def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                    for gesture_category in result.gestures[0]:
                        category_name = gesture_category.category_name
                        return_string = category_name
                        print(format(category_name))

                def get_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
                    for gesture_category in result.gestures[0]:
                        category_name = gesture_category.category_name
                        result_string = category_name
            

                # Create a gesture recognizer instance with the live stream mode:
                options = GestureRecognizerOptions(
                    base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
                    running_mode=VisionRunningMode.LIVE_STREAM,result_callback=print_result)
                with GestureRecognizer.create_from_options(options) as recognizer:
                    # STEP 3: Recognize gestures in the input image.
                    recognition_result = recognizer.recognize_async(mp_image, 100000)

                    get_result(GestureRecognizerResult,mp_image,100000)

                    if result_string:
                        category_name = result_string[0].category_name
                        self.result_label.config(text=category_name)
                    else:
                        self.result_label.config(text="No gesture detected")

            time.sleep(2)

        

     def stop(self):
            self.btn_start['state'] = tkinter.DISABLED

            
     def update(self):
         # Get a frame from the video source
        ret, frame = self.vid.get_frame()
 
        if ret:
            self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
            self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
 
        self.window.after(self.delay, self.update)
    
       
              
 
class MyVideoCapture:
    def __init__(self, video_source=0):
        # Open the video source
        self.vid = cv2.VideoCapture(video_source)
        if not self.vid.isOpened():
            raise ValueError("Unable to open video source", video_source)
 
         # Get video source width and height
        self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
 
    def get_frame(self):
        if self.vid.isOpened():
            ret, frame = self.vid.read()
            if ret:
                # Return a boolean success flag and the current frame converted to BGR
                return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            else:
                 return (ret, None)
        else:
            return (ret, None)
 
     # Release the video source when the object is destroyed
    def __del__(self):
        if self.vid.isOpened():
            self.vid.release()

    
 #Create a window and pass it to the Application object
App(tkinter.Tk(), "ASL Interpreter")

Solution

  • Try out this modified script, make sure to change path to gesture_recognizer.task:

    import tkinter
    import cv2
    import PIL.Image, PIL.ImageTk
    import cv2
    import mediapipe as mp
    
    
    class App:
        def __init__(self, window, window_title, video_source=0):
            self.window = window
            self.window.title(window_title)
            frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
            frame.pack()
            self.video_source = video_source
            self.is_recognition_enabled = False
            self.recognized_gesture = None
            # open video source (by default this will try to open the computer webcam)
            self.vid = MyVideoCapture(self.video_source)
     
            # Create a canvas that can fit the above video source size
            self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
            self.canvas.pack()
     
            # Button that lets the user take a snapshot
            self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
            
            self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
            self.btn_stop["state"] = tkinter.DISABLED
    
            self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
            self.result_label.pack(anchor=tkinter.CENTER,expand=True)
    
            self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
            self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)
    
            self.timestamp = 0
            self.__create_recognizer()
    
            # After it is called once, the update method will be automatically called every delay milliseconds
            self.delay = 60
            self.update()
            self.window.mainloop()
     
        def start(self):
            self.btn_start["state"] = tkinter.DISABLED
            self.btn_stop["state"] = tkinter.NORMAL
            self.is_recognition_enabled = True
    
        def result_callback(self, result, output_image, timestamp_ms):
            first_gesture = "No gestures"
            if len(result.gestures) > 0:
                first_gesture = "Category: " + result.gestures[0][0].category_name
                print(f"First recognized gesture: {first_gesture}")
            self.recognized_gesture = first_gesture
    
        def __create_recognizer(self): # recognizer should be created only once
            BaseOptions = mp.tasks.BaseOptions
            GestureRecognizer = mp.tasks.vision.GestureRecognizer
            GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
            VisionRunningMode = mp.tasks.vision.RunningMode
            # Create a gesture recognizer instance with the live stream mode:
            options = GestureRecognizerOptions(
                    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
                    running_mode=VisionRunningMode.LIVE_STREAM,
                    result_callback=self.result_callback)
            self.recognizer = GestureRecognizer.create_from_options(options)
    
        def recognize(self, img):
            SIZE = 64   #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
            img = cv2.resize(img, (SIZE, SIZE))
            flip = cv2.flip(img, 1)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
            # STEP 3: Recognize gestures in the input image.
            self.recognizer.recognize_async(mp_image, self.timestamp) # returns None
            # timestamp must be *always* monotonically increasing (otherwise exception)
            # this is required due to LIVE_STREAM mode
            self.timestamp = self.timestamp + 1 
    
        def stop(self):
            self.is_recognition_enabled = False
            self.btn_stop["state"] = tkinter.DISABLED
            self.btn_start["state"] = tkinter.NORMAL
            self.result_label.config(text="Stopped")
                
        def update(self):
            # Get a frame from the video source
            ret, frame = self.vid.get_frame()
            if ret:
                self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
                self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
     
                if self.is_recognition_enabled:
                    self.recognize(frame)   
                    self.result_label.config(text=self.recognized_gesture)
    
            self.window.after(self.delay, self.update)
        
    
    class MyVideoCapture:
        def __init__(self, video_source=0):
            # Open the video source
            self.vid = cv2.VideoCapture(video_source)
            if not self.vid.isOpened():
                raise ValueError("Unable to open video source", video_source)
     
            # Get video source width and height
            self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
            self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
     
        def get_frame(self):
            if self.vid.isOpened():
                ret, frame = self.vid.read()
                if ret:
                    # Return a boolean success flag and the current frame converted to BGR
                    return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                else:
                    return (ret, None)
            else:
                return (ret, None)
     
        # Release the video source when the object is destroyed
        def __del__(self):
            if self.vid.isOpened():
                self.vid.release()
    
        
    #Create a window and pass it to the Application object
    App(tkinter.Tk(), "ASL Interpreter")
    

    Please note:

    • GestureRecognizer should not be recreated at each frame, otherwise, it completely erases the benefit of the LIVE_STREAM mode.
    • Instead of running the recognition in the btn_start handler, it should use the update().
    • It is not possible to update the Label directly from the result_callback (probably some tkinter-related threading specifics), that is why I use self.recognized_gesture.

    I additionally took care of button's (de/)activation, just to make it little bit more user-friendly.

    You can see a short demo here:

    demo

    The output is not very stable, however it is issue of gesture_recognizer, not the script. Also, take a look at another answer where I also handle potential race conditions using Lock.