I have been trying to figure out how I can use asyncio and aiohttp inside of a Class. If I just try running the script without the Class (just use the functions as is), everything works fine. As soon as I bring all the functions into a Class and try using the Class in Main.py the script locks up without any errors. Not exactly sure where to go from here, I am guessing I have to set up my Class differently for it to work. If anyone has any knowledge as to why this does not work, it would be greatly appreciated it if you shared what I am doing wrong. Thank you for your time.
import asyncio
from aiohttp import ClientSession
class Fetch:
def __init__(self, proxy=None):
self.proxy = proxy
self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
def set_headers(self, headers):
if not headers:
headers = self.headers
return headers
def set_proxy(self, proxy):
if proxy:
p = proxy
else:
p = self.proxy
return "http://{}".format(p)
async def get_fetch(self, session, url, headers=None, proxy=None, params=None, timeout=9):
array = []
while True:
try:
async with session.get(url, headers=self.set_headers(headers), proxy=self.set_proxy(proxy), params=params, timeout=timeout) as r:
print (r.status)
if r.status == 200:
obj = await r.read()
array.append(obj)
break
except:
pass
return array
async def get_bound(self, sem, session, url):
async with sem:
array = await self.get_fetch(session, url)
return array
async def get_run(self, urls, semaphores=400):
tasks = []
sem = asyncio.Semaphore(semaphores)
async with ClientSession() as session:
for url in urls:
task = asyncio.ensure_future(self.get_bound(sem, session, url))
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
def get(self, urls):
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(self.get_run(urls))
array = loop.run_until_complete(future)
loop.close()
return [ent for sublist in array for ent in sublist]
from Browser import Fetch
from bs4 import BeautifulSoup
proxy = 'xxx.xxx.xxx.xxx:xxxxx'
fetch = Fetch(proxy)
if __name__ == '__main__':
urls = ['http://ip4.me','http://ip4.me','http://ip4.me']
array = fetch.get(urls)
for obj in array:
soup = BeautifulSoup(obj, 'html.parser')
for ip in soup.select('tr + tr td font'):
print(ip.get_text())
Your indentation is wrong.
async with ClientSession() as session:
for url in urls:
task = asyncio.ensure_future(self.get_bound(sem, session, url))
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
Bring the last two lines back within the with
block.
Your code looks similar to https://pawelmhm.github.io/asyncio/python/aiohttp/2016/04/22/asyncio-aiohttp.html. In this reference, the await responses
and related statements are well within the with
block, otherwise your code lets the ClientSession
instance go out of scope (and the underlying session be closed) before the http calls come back.
On a side note, please consider a standard indentation style for your code. A single space makes it really hard to spot these easy mistakes.