Search code examples
pythonhttppython-asyncioimagehash

How can I use aiohttp to fetch an image and process it directly without saving it?


I need to calculate the hash of images that I download from a cdn server. When using synchronous libraries everything works out well. I directly pass the results of the request (res) to Image.open() and receive an object with an image as output (img).

import requests
import imagehash
from PIL import Image

# sync
def get_picture():
    url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
    res = requests.get(url, stream=True).raw
    img = Image.open(res)
    hash = imagehash.phash(img, hash_size=6)
    print(f'hash: {hash}')

but I need to check a lot of photos and asynchronous execution of requests is required, and here I ran into a problem in that I could not directly transfer the request object with the image to Image.open(), only by first saving it to a file, but this option is not suitable for me ( I don’t need files on the server).

import imagehash
from PIL import Image
from aiohttp import ClientSession
from config.settings import BASE_DIR

# async
async def get_aio_picture():
    url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
    async with ClientSession() as session:
        async with session.get(url=url) as response:
            response.auto_decompress = False
            with open('picture.jpg', 'wb') as fd:
                async for line in response.content:
                    fd.write(line)
            img = Image.open(f'{BASE_DIR}/picture.jpg')
            hash = imagehash.phash(img, hash_size=6)
            print(f'hash: {hash}')


async def main():
    task = asyncio.create_task(get_aio_picture())
    await task


def aio_hash():
    asyncio.run(main())

If I change the function code in such a way as to directly pass the result to Image.open():

async def get_aio_picture():
    url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
    async with ClientSession() as session:
        async with session.get(url=url) as response:
            response.auto_decompress = False
            async for line in response.content:
                img = Image.open(line)
                hash = imagehash.phash(img, hash_size=6)
                print(f'hash: {hash}')

Then I get the error :(

...
File "/home/.../main/services/aio/help.py", line 37, in get_aio_picture
    img = Image.open(line)
File "/home/.../env/lib/python3.10/site-packages/PIL/Image.py", line 3247, in open
    fp = builtins.open(filename, "rb")
ValueError: embedded null byte

How can I change the code to avoid creating files on disk? Help me please!


Solution

  • You can use the function response.read() and store the result in a buffer of bytes variable using io.BytesIO. Generate the hash from this buffer.

    import asyncio
    import imagehash
    from PIL import Image
    from aiohttp import ClientSession
    import io
    
    # async
    async def get_aio_picture():
        url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
        async with ClientSession() as session:
            async with session.get(url=url) as response:
                response.auto_decompress = False
                buffer = io.BytesIO(await response.read())
                img = Image.open(buffer)
                hash = imagehash.phash(img, hash_size=6)
                print(f'hash: {hash}')
                    
    async def main():
        task = asyncio.create_task(get_aio_picture())
        await task
    
    
    def aio_hash():
        asyncio.run(main())
    
    aio_hash()
    

    Outputs:

    hash: fbc843946