Search code examples
pythonmultithreadingasynchronousasync-awaitollama

How to immediately cancel an Asyncio Task that uses the Ollama Python library to generate an answer?


I'm using Ollama to generate answers from large language models (LLMs) with the Ollama Python API. I want to cancel the response generation by clicking the stop button. The problem is that the task cancellation works only if the response generation has already started printing. If the task is still processing and getting ready to print, the cancellation does not work, and the response gets printed regardless. To be more specific, this function prompt_mistral("Testing") still executes and prints the response even after clicking the button.

My code:

import ollama
import asyncio
import threading
from typing import Optional
import tkinter as tk

# Create the main window
root = tk.Tk()
root.title("Tkinter Button Example")

worker_loop: Optional[asyncio.AbstractEventLoop] = None
task_future: Optional[asyncio.Future] = None

async def get_answer_from_phi3():

    print("Trying")

    messages = [
            {"role": "system", "content": "Hello"}
        ]

    client = ollama.AsyncClient()
    
    stream = await client.chat(
    model='phi3',
    messages=messages,
    stream=True,
    options= {
    "top_k": 1})

    try:
        async for chunk in stream:
            # Store generated answer
           print(chunk['message']['content'], end='', flush=True)
            

    except asyncio.exceptions.CancelledError as e:
        print("Cancelled")
        pass

    except Exception as e:
        print(e)
        return "Sorry,vv an error occurred while processing your request."


async def prompt_mistral(query):
    messages = []
    messages.append({"role": "assistant", "content": "Write a song that celebrates the beauty, diversity, and importance of our planet, Earth. The song should evoke vivid imagery of the natural world, from lush forests and majestic mountains to serene oceans and vast deserts. It should capture the essence of Earth as a living, breathing entity that sustains all forms of life. Incorporate themes of harmony, unity, and interconnectedness, emphasizing how all elements of nature are intertwined and how humanity is an integral part of this complex web. The lyrics should reflect a sense of wonder and appreciation for the planet's resources and ecosystems, highlighting the delicate balance that sustains life. Include references to various landscapes, climates, and wildlife, painting a picture of Earth's diverse environments. The song should also touch on the responsibility we have to protect and preserve the planet for future generations, addressing issues like climate change, deforestation, pollution, and conservation efforts. Use poetic language and metaphors to convey the grandeur and fragility of Earth, and infuse the song with a hopeful and inspiring tone that encourages listeners to take action in safeguarding our shared home. The melody should be uplifting and emotionally resonant, complementing the powerful message of the lyrics"})
    generated_answer = ''
    try:
        client = ollama.AsyncClient()
        stream = await client.chat(
            model='mistral',
            messages=messages,
            stream=True,
            options= {
                "top_k": 1}
        )

        async for chunk in stream:
            # Store generated answer
            generated_answer += chunk['message']['content']
            print(chunk['message']['content'])
            
    
    except asyncio.exceptions.CancelledError as e:
        print("Cancelled reponse")
        return

    except Exception as e:
        print(e)

        return "Sorry,vv an error occurred while processing your request."

def prompt_llama(message):

    async def prompt():

        messages = []
        messages.append({"role": "assistant", "content": message})
        try:
            client = ollama.AsyncClient()
            stream = await client.chat(
                model='llama2',
                messages=messages,
                stream=True,
                options= {
                    "top_k": 1}
            )

            generated_answer = ''
            
            async for chunk in stream:
                # Store generated answer
                generated_answer += chunk['message']['content']
                print(chunk['message']['content'])    
        
            if "help" in generated_answer:
                await prompt_mistral("Testing")
            else:
                print(generated_answer)

        except asyncio.exceptions.CancelledError as e:
            print("Cancelled")
            return

        except Exception as e:
            print(e)

            return "Sorry,vv an error occurred while processing your request."

    def mistral_worker_function():
        global worker_loop, task_future
        worker_loop = asyncio.new_event_loop()
        task_future = worker_loop.create_task(prompt())
        worker_loop.run_until_complete(task_future) 

    print("Starting thread")
    thread = threading.Thread(target=mistral_worker_function)
    thread.start()
    client = ollama.AsyncClient()

# Define the function to be called when the button is pressed
def on_button_click():
    global worker_loop, task_future
    # the loop and the future are not threadsafe
    worker_loop.call_soon_threadsafe(
        lambda: task_future.cancel()
    )
    

    def phi3_worker_function():
        global worker_loop, task_future
        worker_loop = asyncio.new_event_loop()
        task_future = worker_loop.create_task(get_answer_from_phi3())
        worker_loop.run_until_complete(task_future)

    print("Starting thread")
    thread = threading.Thread(target=phi3_worker_function())
    thread.start()

# Create the button
button = tk.Button(root, text="Stop", command=on_button_click)

# Place the button on the window
button.pack(pady=20)

prompt_llama("Hi")

# Start the Tkinter event loop
root.mainloop()

Solution

  • Update Ollama to the newest version with curl https://ollama.ai/install.sh | sh on Linux.

    It will automatically fix this issue. The code works exactly as it is. Just have to update Ollama.