I having problem downloading multiple urls. My code still only download 1 url per session. Still need to finish the first one before downloading the next one.
I want to download like 3 urls at the same time.
Here's my code:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
def download(path, video_url, bar: tqdm):
res = requests.get(video_url, headers, stream=True)
with open(path, 'wb') as f:
for b in res.iter_content(1024):
def get_length(video_url):
res = requests.get(video_url, headers, stream=True)
le = int(res.headers['Content-Length'])
return le
def download_all(urls: list, thread: int = cpu_count()):
total = len(urls)
count = 0
pool = ThreadPool(thread) # https://stackoverflow.com/a/56528204/14951175
for url in urls:
output_file = get_url_path(url)
count += 1
content_length = get_length(video_url=url)
with tqdm(total=content_length, unit='B', ncols=(150-1), desc=f'Downloading {count} of {total}', unit_divisor=1024, ascii=True, unit_scale=True) as bar:
pool.apply_async(download(output_file, url, bar))
urls = read_lines('urls.txt')
This line
pool.apply_async(download(output_file, url, bar))
must be
pool.apply_async(download, (output_file, url, bar))
Otherwise you call the download
method instead of passing it (and the args) to the ThreadPool.
Use starmap
to map the urls to func
where you perform the download (btw: you can safe the duplicate get-request). And add the position
To be honest, the bars don't works very smooth, but I don't really have experience with tqdm
or ThreadPool
. But in general the downloads seem to work.
def download_all(urls: list, thread: int = cpu_count()):
total = len(urls)
pool = ThreadPool(thread)
def func(count, url):
output_file = get_url_path(url)
req = requests.get(url, headers=headers, stream=True)
content_length = int(req.headers['Content-Length'])
with tqdm(total=content_length, unit='B', desc=f'Downloading {count + 1} of {total}',
unit_divisor=1024, ascii=True, unit_scale=True, position=count, file=sys.stdout) as bar:
with open(output_file, 'wb') as f:
for b in req.iter_content(1024):
pool.starmap(func, enumerate(urls))