I have code for downloading a huge csv which store in .gz archives.
import asyncio
import re
import zlib
import aiohttp
from aiohttp import ClientTimeout
from aiohttp.client_exceptions import InvalidURL
timeout = ClientTimeout(total=600)
async def download(link, session):
out_file_path = link.split("/")[-1][:-3]
try:
async with sem, session.get(
'http://111.11.111.111/test/' + link) as resp:
d = zlib.decompressobj(zlib.MAX_WBITS | 32)
with open(out, 'wb') as file:
async for data in resp.content.iter_chunks():
chunk = d.decompress(data)
file.write(chunk)
return True
except InvalidURL as invalid_url:
...
except TimeoutError as e:
...
async def main():
links = ['test/1.csv.gz']
sem = asyncio.Semaphore(10)
async with aiohttp.ClientSession(
auth=aiohttp.BasicAuth(
'test',
'test'),
timeout=timeout
) as session:
tasks = (download(
link=link,
session=session,
sem=sem
) for link in links)
results = await asyncio.gather(*tasks)
return results
asyncio.run(main())
This code work perfect, but, all my downloaded files just have only 100mb. All archives, which i download have mach more content length.
How i can fix it and be able to download a full data?
Resolve my problem by the next way:
async with downloading_queue, aiohttp.ClientSession(
auth=aiohttp.BasicAuth(
self.config['log'],
self.config['pwd']),
timeout=CLIENT_TIMEOUT
).get(url=url) as resp:
file = BytesIO(await resp.content.read())
with gzip.open(file, 'rt') as decompressed_file:
with open(out_file_path, 'w') as outfile:
shutil.copyfileobj(decompressed_file, outfile)