I having problem downloading multiple urls. My code still only download 1 url per session. Still need to finish the first one before downloading the next one.
I want to download like 3 urls at the same time.
Here's my code:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
}
def download(path, video_url, bar: tqdm):
res = requests.get(video_url, headers, stream=True)
with open(path, 'wb') as f:
for b in res.iter_content(1024):
f.write(b)
bar.update(len(b))
def get_length(video_url):
res = requests.get(video_url, headers, stream=True)
le = int(res.headers['Content-Length'])
return le
def download_all(urls: list, thread: int = cpu_count()):
total = len(urls)
count = 0
pool = ThreadPool(thread) # https://stackoverflow.com/a/56528204/14951175
for url in urls:
output_file = get_url_path(url)
count += 1
content_length = get_length(video_url=url)
with tqdm(total=content_length, unit='B', ncols=(150-1), desc=f'Downloading {count} of {total}', unit_divisor=1024, ascii=True, unit_scale=True) as bar:
pool.apply_async(download(output_file, url, bar))
pool.close()
pool.join()
urls = read_lines('urls.txt')
download_all(urls)
This line
pool.apply_async(download(output_file, url, bar))
must be
pool.apply_async(download, (output_file, url, bar))
Otherwise you call the download
method instead of passing it (and the args) to the ThreadPool.
Edit
Use starmap
to map the urls to func
where you perform the download (btw: you can safe the duplicate get-request). And add the position
argument.
To be honest, the bars don't works very smooth, but I don't really have experience with tqdm
or ThreadPool
. But in general the downloads seem to work.
def download_all(urls: list, thread: int = cpu_count()):
total = len(urls)
pool = ThreadPool(thread)
def func(count, url):
output_file = get_url_path(url)
req = requests.get(url, headers=headers, stream=True)
content_length = int(req.headers['Content-Length'])
with tqdm(total=content_length, unit='B', desc=f'Downloading {count + 1} of {total}',
unit_divisor=1024, ascii=True, unit_scale=True, position=count, file=sys.stdout) as bar:
with open(output_file, 'wb') as f:
for b in req.iter_content(1024):
f.write(b)
bar.update(len(b))
pool.starmap(func, enumerate(urls))
pool.close()
pool.join()