Search code examples

Checking file checksum in Python

I have to write in Python that performs the following tasks:

1- Download the Movielens datasets from the url ‘’
2- Download the Movielens checksum from the url ‘’
3- Check whether the checksum of the archive corresponds to the downloaded one
4- In case of positive check, print the names of the files contained by the downloaded archive

This is what I wrote up to now:

   from zipfile import ZipFile 
    from urllib import request 
    import hashlib
    def md5(fname):
        hash_md5 = hashlib.md5()
        with open(fname, "rb") as f:
            for chunk in iter(lambda:, b""):
        return hash_md5.hexdigest()
    url_datasets = ''
    datasets = ''
    url_checksum = ''
    request.urlretrieve( url_datasets, datasets)
    request.urlretrieve (url_checksum, checksum)
    checksum = ''
    with ZipFile(datasets, 'r') as zipObj:
     listOfiles = zipObj.namelist()
     for elem in listOfiles:

So what I'm missing is a way to compare the checksum I computed with the one I downloaded and maybe I can create a function "printFiles" that checks the checksum and in the positive case prints the list of files.

Is there something else I can improve?


  • Your code isn't actually making any of the requests.

    from zipfile import ZipFile 
    import hashlib
    import requests
    def md5(fname):
        hash_md5 = hashlib.md5()
        hash_md5.update( open(fname,'rb').read() )
        return hash_md5.hexdigest()
    url_datasets = ''
    datasets = ''
    url_checksum = ''
    checksum = ''
    ds = requests.get( url_datasets, allow_redirects=True)
    cs = requests.get( url_checksum, allow_redirects=True)
    open( datasets, 'wb').write( ds.content )
    ds_md5 = md5(datasets)
    cs_md5 = cs.content.decode('utf-8').split()[0]
    print( ds_md5 )
    print( cs_md5 )
    if ds_md5 == cs_md5:
        print( "MATCH" )
        with ZipFile(datasets, 'r') as zipObj:
            listOfiles = zipObj.namelist()
            for elem in listOfiles:
        print( "Checksum fail" )