Search code examples
pythonloopsfileurlsize

loop over file and get file size of url request - python


i am trying to run a simple loop over a .txt file that contains a number of url download requests for files. ultimately, i am after knowing the file size of these url requests.

when i execute the code below, they read as 0.00 MB but if i run the code directly with the url it returns the correct file size. Not sure what is going on.

import requests

with open("textDB2021.txt") as f:
   for url in f:
       # pass URL as first argument
       response = requests.head(url, allow_redirects=True)
       size = response.headers.get('content-length', -1)
       # size in megabytes (Python 2, 3)
       print('{:<40}: {:.2f} MB'.format('FILE SIZE', int(size) / float(1 << 20)))

Output:

FILE SIZE                               : 0.00 MB
FILE SIZE                               : 0.00 MB
FILE SIZE                               : 0.00 MB

Solution

  • from urllib.request import urlopen
    from urllib.error import URLError
    import math
    
    
    def convert_size(size_bytes: int) -> str:
        """
        :param size_bytes:
        :return:
        get size in bytes and return string e.g. '241.19 MB'
        """
        if size_bytes == 0:
            return "0B"
        size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
        i = int(math.floor(math.log(size_bytes, 1024)))
        p = math.pow(1024, i)
        s = round(size_bytes / p, 2)
        return "%s %s" % (s, size_name[i])
    
    
    def get_file_size_by_url(url: str) -> str:
        try:
            obj_info = urlopen(url)
            size_int = int(obj_info.getheader('Content-Length'))
            size_pretty = convert_size(size_bytes=size_int)
        except URLError as e:
            print('with file {} error: {}'.format(url, e))
            size_pretty = 'N/A'
        return size_pretty
    
    
    def check_sizes(urls: list) -> None:
        for url in urls:
            url_file = url.split('/')[-1]
            s = get_file_size_by_url(url=url)
            print('file {} size is {}'.format(url_file, s))
        return
    
    
    def main() -> None:
        urls = [
            'https://cdn.sstatic.net/Sites/stackoverflow/img/logo.png',
            'https://upload.wikimedia.org/wikipedia/commons/8/8d/Google_logo_%282010-2013%29.svg',
            'https://upload.wikimedia.org/wikipedia/commons/b/b3/Wikipedia-logo-v2-en.svg',
            'https://cdn.sstatic.net/Sites/stackoverflow/img/NoSuchFile.png'
        ]
        check_sizes(urls=urls)
        return
    
    
    if __name__ == '__main__':
        main()
    

    Output:

    C:\Users\GiladEiniKbyLake\.conda\envs\wu\python.exe D:/workspace/2021wizzi_utils/temp/url.py
    file logo.png size is 5.93 KB
    file Google_logo_%282010-2013%29.svg size is 44.24 KB
    file Wikipedia-logo-v2-en.svg size is 202.96 KB
    with file https://cdn.sstatic.net/Sites/stackoverflow/img/NoSuchFile.png error: HTTP Error 404: Not Found
    file NoSuchFile.png size is N/A