making a sign language interpreter's GUI with a mediapipe model with a ASL dataset from Kaggle. This is my first time working in Tkinter and I'm trying to print the result from the print_result and print it into the result_label label in my tkinter window. I created a result_string var and a get_result function to update that var and then update the button's text but I'm getting an error
WARNING: Logging before InitGoogleLogging() is written to STDERR
AttributeError: type object 'GestureRecognizerResult' has no attribute 'gestures'
I'm using a .task file from my mediapipe model imported in my workspace
UPDATED START FUNCTION:
''' def start(self): # Get a frame from the video source #self.Signing = True while True: ret, frame = self.vid.get_frame()
if ret:
#cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
SIZE = 64 #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
image = frame
img = cv2.resize(image, (SIZE, SIZE))
flip = cv2.flip(img, 1)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
result_string = " "
images = []
results = []
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
for gesture_category in result.gestures[0]:
category_name = gesture_category.category_name
return_string = category_name
return result_string
# Create a gesture recognizer instance with the live stream mode:
options = GestureRecognizerOptions(
base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
running_mode=VisionRunningMode.LIVE_STREAM,
result_callback=print_result)
with GestureRecognizer.create_from_options(options) as recognizer:
# STEP 3: Recognize gestures in the input image.
recognition_result = recognizer.recognize_async(mp_image, 100000)
print(format(result_string)) #just to check
if result_string:
self.result_label.config(text=result_string)
else:
self.result_label.config(text="No gesture detected")
time.sleep(2)
'''
COMPLETE CODE WITH OLD START FUNCTION:
import tkinter
import cv2
import PIL.Image, PIL.ImageTk
import time
import matplotlib as plt
import cv2
import mediapipe as mp
from mediapipe.tasks.python import vision
from mediapipe.tasks.python import text
from mediapipe.tasks import python
from mediapipe.framework.formats import landmark_pb2
class App:
def __init__(self, window, window_title, video_source=0):
self.window = window
self.window.title(window_title)
frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
frame.pack()
self.video_source = video_source
#self.Signing = False
# open video source (by default this will try to open the computer webcam)
self.vid = MyVideoCapture(self.video_source)
# Create a canvas that can fit the above video source size
self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
self.canvas.pack()
# Button that lets the user take a snapshot
self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
self.result_label.pack(anchor=tkinter.CENTER,expand=True)
self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)
# After it is called once, the update method will be automatically called every delay milliseconds
self.delay = 60
self.update()
self.window.mainloop()
def start(self):
# Get a frame from the video source
#self.Signing = True
while True:
ret, frame = self.vid.get_frame()
if ret:
#cv2.imwrite("frame-" + time.strftime("%d-%m-%Y-%H-%M-%S") + ".jpg", cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
SIZE = 64 #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
image = frame
img = cv2.resize(image, (SIZE, SIZE))
flip = cv2.flip(img, 1)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
result_string = " "
images = []
results = []
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
for gesture_category in result.gestures[0]:
category_name = gesture_category.category_name
return_string = category_name
print(format(category_name))
def get_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
for gesture_category in result.gestures[0]:
category_name = gesture_category.category_name
result_string = category_name
# Create a gesture recognizer instance with the live stream mode:
options = GestureRecognizerOptions(
base_options=BaseOptions(model_asset_path='C:\\Users\\HP\\Desktop\\asl_gui\\gesture_recognizer.task'),
running_mode=VisionRunningMode.LIVE_STREAM,result_callback=print_result)
with GestureRecognizer.create_from_options(options) as recognizer:
# STEP 3: Recognize gestures in the input image.
recognition_result = recognizer.recognize_async(mp_image, 100000)
get_result(GestureRecognizerResult,mp_image,100000)
if result_string:
category_name = result_string[0].category_name
self.result_label.config(text=category_name)
else:
self.result_label.config(text="No gesture detected")
time.sleep(2)
def stop(self):
self.btn_start['state'] = tkinter.DISABLED
def update(self):
# Get a frame from the video source
ret, frame = self.vid.get_frame()
if ret:
self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
self.window.after(self.delay, self.update)
class MyVideoCapture:
def __init__(self, video_source=0):
# Open the video source
self.vid = cv2.VideoCapture(video_source)
if not self.vid.isOpened():
raise ValueError("Unable to open video source", video_source)
# Get video source width and height
self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
def get_frame(self):
if self.vid.isOpened():
ret, frame = self.vid.read()
if ret:
# Return a boolean success flag and the current frame converted to BGR
return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
else:
return (ret, None)
else:
return (ret, None)
# Release the video source when the object is destroyed
def __del__(self):
if self.vid.isOpened():
self.vid.release()
#Create a window and pass it to the Application object
App(tkinter.Tk(), "ASL Interpreter")
Try out this modified script, make sure to change path to gesture_recognizer.task
:
import tkinter
import cv2
import PIL.Image, PIL.ImageTk
import cv2
import mediapipe as mp
class App:
def __init__(self, window, window_title, video_source=0):
self.window = window
self.window.title(window_title)
frame = tkinter.Frame(master=window,bg="skyblue",padx=10)
frame.pack()
self.video_source = video_source
self.is_recognition_enabled = False
self.recognized_gesture = None
# open video source (by default this will try to open the computer webcam)
self.vid = MyVideoCapture(self.video_source)
# Create a canvas that can fit the above video source size
self.canvas = tkinter.Canvas(window, width = self.vid.width, height = self.vid.height)
self.canvas.pack()
# Button that lets the user take a snapshot
self.btn_start=tkinter.Button(window, text="Start Signing", width=50, command=self.start,state=tkinter.NORMAL)
self.btn_stop=tkinter.Button(window,text="End Message",width=50,command=self.stop)
self.btn_stop["state"] = tkinter.DISABLED
self.result_label = tkinter.Label(window,text="Result: ",font=('Calibri 15 bold'))
self.result_label.pack(anchor=tkinter.CENTER,expand=True)
self.btn_start.pack(anchor=tkinter.CENTER, expand=True)
self.btn_stop.pack(anchor=tkinter.CENTER, expand=True)
self.timestamp = 0
self.__create_recognizer()
# After it is called once, the update method will be automatically called every delay milliseconds
self.delay = 60
self.update()
self.window.mainloop()
def start(self):
self.btn_start["state"] = tkinter.DISABLED
self.btn_stop["state"] = tkinter.NORMAL
self.is_recognition_enabled = True
def result_callback(self, result, output_image, timestamp_ms):
first_gesture = "No gestures"
if len(result.gestures) > 0:
first_gesture = "Category: " + result.gestures[0][0].category_name
print(f"First recognized gesture: {first_gesture}")
self.recognized_gesture = first_gesture
def __create_recognizer(self): # recognizer should be created only once
BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
# Create a gesture recognizer instance with the live stream mode:
options = GestureRecognizerOptions(
base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
running_mode=VisionRunningMode.LIVE_STREAM,
result_callback=self.result_callback)
self.recognizer = GestureRecognizer.create_from_options(options)
def recognize(self, img):
SIZE = 64 #USING THE SNAPSHOT, RESIZING AND PUTTING THROUGH MODEL
img = cv2.resize(img, (SIZE, SIZE))
flip = cv2.flip(img, 1)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=flip)
# STEP 3: Recognize gestures in the input image.
self.recognizer.recognize_async(mp_image, self.timestamp) # returns None
# timestamp must be *always* monotonically increasing (otherwise exception)
# this is required due to LIVE_STREAM mode
self.timestamp = self.timestamp + 1
def stop(self):
self.is_recognition_enabled = False
self.btn_stop["state"] = tkinter.DISABLED
self.btn_start["state"] = tkinter.NORMAL
self.result_label.config(text="Stopped")
def update(self):
# Get a frame from the video source
ret, frame = self.vid.get_frame()
if ret:
self.photo = PIL.ImageTk.PhotoImage(image = PIL.Image.fromarray(frame))
self.canvas.create_image(0, 0, image = self.photo, anchor = tkinter.NW)
if self.is_recognition_enabled:
self.recognize(frame)
self.result_label.config(text=self.recognized_gesture)
self.window.after(self.delay, self.update)
class MyVideoCapture:
def __init__(self, video_source=0):
# Open the video source
self.vid = cv2.VideoCapture(video_source)
if not self.vid.isOpened():
raise ValueError("Unable to open video source", video_source)
# Get video source width and height
self.width = self.vid.get(cv2.CAP_PROP_FRAME_WIDTH)
self.height = self.vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
def get_frame(self):
if self.vid.isOpened():
ret, frame = self.vid.read()
if ret:
# Return a boolean success flag and the current frame converted to BGR
return (ret, cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
else:
return (ret, None)
else:
return (ret, None)
# Release the video source when the object is destroyed
def __del__(self):
if self.vid.isOpened():
self.vid.release()
#Create a window and pass it to the Application object
App(tkinter.Tk(), "ASL Interpreter")
Please note:
GestureRecognizer
should not be recreated at each frame, otherwise, it completely erases the benefit of the LIVE_STREAM
mode.btn_start
handler, it should use the update()
.Label
directly from the result_callback
(probably some tkinter-related threading specifics), that is why I use self.recognized_gesture
.I additionally took care of button's (de/)activation, just to make it little bit more user-friendly.
You can see a short demo here:
The output is not very stable, however it is issue of gesture_recognizer
, not the script. Also, take a look at another answer where I also handle potential race conditions using Lock
.