Search code examples
pythonpython-asyncioaiohttp

Requests to YouTube API using asyncio and aiohttp, always freezing at task number 101


Im trying to get details about youtube videos based on their IDs. I have a list of 40k IDs and i divide it to list of lists containing 50 ids, since yt api can handle 50 ids at once.

My code just freezes at task number 101.

import asyncio
import time
import aiohttp
from aiohttp.client import ClientSession
import requests

results = []
chunks = [click the link for IDs]

start = time.time()

def get_tasks(session):
    tasks = []
    for x in range(len(chunks)):
        params = {'key': 'yourapikey',
                  'part': 'statistics,contentDetails,snippet',
                  'id': chunks[x]
                  }
        url = "https://www.googleapis.com/youtube/v3/videos"
        tasks.append(session.get(url=url, params=params, ssl=False))
        # print(x)
        print(len(tasks))
    return tasks

async def get_data():
    async with aiohttp.ClientSession() as session:
        tasks = get_tasks(session)
        print("len ", len(tasks))
        responses = await asyncio.gather(*tasks)
        for response in responses:
            results.append(await response.json())

asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

asyncio.run(get_data())
print(time.time() - start)

I'm not sure what is happening here, how can i debug this problem? Link to the chunks lists:

https://justpaste.it/8gwq6


Solution

  • I don't know why you had errors, I just know if you do it like below, you wont have errors (as long as your params are correct). In essence, we are just taking this example from aiohttp docs, sticking the session part in a loop, and modifying the response for your needs.

    import asyncio, aiohttp, ssl, certifi, requests, random, time
    
    CHROME = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
              'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
              'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36')
    
    SSL_CONTEXT           = ssl.create_default_context(cafile=certifi.where())
    HEADERS               = requests.utils.default_headers()
    HEADERS['User-Agent'] = random.choice(CHROME)
    URL                   = "https://www.googleapis.com/youtube/v3/videos"
    
    
    start  = time.time()
    chunks = [["Ul3w34H-byY", "OJdiW61Q9bM"], ["R3En-aLGwhw", "nrwXOn1rITY"]]
    params = {'key' : 'APIKEY',
              'part': 'statistics,contentDetails,snippet'}
    
    async def get_data():   
        results = []
        async with aiohttp.ClientSession(headers=HEADERS) as session:
            for chunk in chunks:
                params['id'] = chunk
                async with session.get(url=URL, params=params, ssl=SSL_CONTEXT) as resp:
                    if not (resp.status==200): continue
                    results.append(await resp.json())
    
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    
    asyncio.run(get_data())
    print(time.time() - start)