Search code examples
pythonasynchronouspython-asyncio

Why is this code using asyncio for loop not faster than synchronous code?


I'm learning about Python asyncio from a book named "Using Asyncio in Python" from O'Reilly.

In this book there is an example of async for loop:

# Example 3-26. Easier with an async generator
import asyncio


# Mock Redis interface
class Redis:
    async def get(self, key):
        await asyncio.sleep(1)
        return 'value'


# Mock create_redis
# Real one: aioredis.create_redis
async def create_redis(socket):
    await asyncio.sleep(1)
    return Redis()


async def do_something_with(value):
    await asyncio.sleep(1)


# Our function is now declared with async def , making it a coroutine
# function, and since this function also contains the yield keyword, we refer
# to it as an asynchronous generator function.
async def one_at_a_time(redis, keys):
    for k in keys:
        # We don’t have to do the convoluted things necessary in the previous
        # example with self.ikeys: here, we just loop over the keys directly
        # and obtain the value...
        value = await redis.get(k)
        # ...and then yield it to the caller, just like a normal generator.
        yield value


# The main() function is identical to the version in Example 3-25.
async def main():
    redis = await create_redis(('localhost', 6379))
    keys = ['Americas', 'Africa', 'Europe', 'Asia']
    async for value in one_at_a_time(redis, keys):
        await do_something_with(value)

start = time.time()
asyncio.run(main())
end = time.time()
print(end - start)

#print result is 9.012349128723145

But I think it is not run asynchronously. This is my sync code for comparing with above code:

class Redis:
    def get(self, key):
        time.sleep(1)
        return 'value'

def create_redis(socket):
    time.sleep(1)
    return Redis()


def do_something_with(value):
    time.sleep(1)


def one_at_a_time(redis, keys):
    for k in keys:
        value = redis.get(k)
        yield value


# The main() function is identical to the version in Example 3-25.
def main():
    redis = create_redis(('localhost', 6379))
    keys = ['Americas', 'Africa', 'Europe', 'Asia']
    tasks = []
    for value in one_at_a_time(redis, keys):
        do_something_with(value)

start = time.time()
main()
end = time.time()
print(end-start)

# print result 9.025717973709106

The code running time is the same.

Is the first code async code? I think it is not and I can't understand why the book says it is async code.


Solution

  • Your code runs one task redis.get(k) and it waits for its end to run next redis.get(k) - so it can't run faster.

    You have to create more complex code to start many tasks at once

    This code creates all tasks (without using await) and it uses gather() to start all of them at the same time.

    async def one_at_a_time(redis, keys):
        tasks = []
        for k in keys:
            tasks.append(redis.get(k))
    
        results = await asyncio.gather(*tasks)
    
        for item in results:
            yield item
    

    And it takes only 6.0 seconds.

    But it still can work faster because it takes results in 1 second instead of 4 seconds but it wastes time for running do_something_with() one after another instead of running them at the same time.


    Full working code:

    import asyncio
    import time
    import random 
    
    
    class Redis:
        async def get(self, key):
            print('get')
            await asyncio.sleep(1)
            return random.randint(0, 10)
    
    
    async def create_redis(socket):
        await asyncio.sleep(1)
        return Redis()
    
    
    async def do_something_with(value):
        print('do something with', value)
        await asyncio.sleep(1)
    
    
    async def one_at_a_time(redis, keys):
        tasks = []
        for k in keys:
            tasks.append(redis.get(k))
    
        results = await asyncio.gather(*tasks)
    
        print('results:', results) 
        for item in results:
            yield item
    
    
    async def main():
        redis = await create_redis(('localhost', 6379))
        keys = ['Americas', 'Africa', 'Europe', 'Asia']
        async for value in one_at_a_time(redis, keys):
            await do_something_with(value)
    
    
    start = time.time()
    asyncio.run(main())
    end = time.time()
    print('time:', end - start)
    
    #print result is 6.0089991092681885
    

    And here code which uses the same method with gather()
    to run all do_something_with() at the same time.

    And this gives time 3.0 seconds

    import asyncio
    import time
    import random 
    
    
    class Redis:
        async def get(self, key):
            print('get')
            await asyncio.sleep(1)
            return random.randint(0, 10)
    
    
    async def create_redis(socket):
        await asyncio.sleep(1)
        return Redis()
    
    
    async def do_something_with(value):
        print('do something with', value)
        await asyncio.sleep(1)
    
    
    async def one_at_a_time(redis, keys):
        tasks = []
        for k in keys:
            tasks.append(redis.get(k))
    
        results = await asyncio.gather(*tasks)
    
        print('results:', results) 
        for item in results:
            yield item
    
    
    async def main():
        redis = await create_redis(('localhost', 6379))
        keys = ['Americas', 'Africa', 'Europe', 'Asia']
    
        tasks = []
        async for value in one_at_a_time(redis, keys):
            tasks.append(do_something_with(value))
        await asyncio.gather(*tasks)
    
    
    print('start')    
    start = time.time()
    asyncio.run(main())
    end = time.time()
    print('time:', end - start)
    
    #print result is 3.0061380863189697
    #print result is 6.0089991092681885 
    #print result is 9.012349128723145
    

    Simpler example which shows difference between

    • await function() ...
    • x = function() ... await x
    • x = create_task(function()) ... await x
    import asyncio
    import time
    
    async def my_function(text):
        print(text, 'my_function 1')
        await asyncio.sleep(0.5)
        print(text, 'my_function 2')
        await asyncio.sleep(0.5)
        print(text, 'my_function 3')
        await asyncio.sleep(0.5)        
        print(text, 'my_function 4')
        await asyncio.sleep(0.5)        
        
    async def example1():
        # create, start, and wait for result (blocking)
        await my_function('A')
        await my_function('B')
        
        await asyncio.sleep(1)
        print('something else 1')
        await asyncio.sleep(1)
        print('something else 2')
        await asyncio.sleep(1)
        print('something else 3')
        await asyncio.sleep(1)
        print('something else 4')
        await asyncio.sleep(1)
    
    async def example2():
        # create (not blocking)
        a = my_function('A')
        b = my_function('B')
        
        await asyncio.sleep(1)
        print('something else 1')
        await asyncio.sleep(1)
        print('something else 2')
        await asyncio.sleep(1)
        print('something else 3')
        await asyncio.sleep(1)
        print('something else 4')
        await asyncio.sleep(1)
    
        # start, and wait for result (blocking)
        await a
        await b
        
    async def example3():
    
        # create and start (not blocking)
        a = asyncio.create_task(my_function('A'))
        b = asyncio.create_task(my_function('B'))
    
        await asyncio.sleep(1)
        print('something else 1')
        await asyncio.sleep(1)
        print('something else 2')
        await asyncio.sleep(1)
        print('something else 3')
        await asyncio.sleep(1)
        print('something else 4')
        await asyncio.sleep(1)
    
        # wait for result (blocking)
        await a
        await b
        # OR
        #await asyncio.gather(a, b)
    
    async def main():
        print('--- example 1 ---')
        start = time.time()    
        await example1()
        end = time.time()
        print('time:', (end - start))
        
        print('--- example 2 ---')
        start = time.time()    
        await example2()
        end = time.time()
        print('time:', (end - start))
        
        print('--- example 3 ---')
        start = time.time()    
        await example3()
        end = time.time()
        print('time:', (end - start))
        
        
    asyncio.run(main())