Search code examples
pythonloopszip

zip files in a loop in Python


I have a data frame, with students ID and names and other information. I have done a groupby on students name(stu NAME) , I need to create zip file(based on student NAME) for each student indiviually and then zip them all. I can zip all files, but cannot zip each students profile based on Student Name. I just need a line in my loop after(out_file.write(csv)) to zip each csv file.

groupby = df_concat.groupby('stu_NAME')

for n,g in groupby:
    csv=g.to_csv(index=false)

    with open('{}{}.csv'.format(path,n),'w' as out_file:
        out_file.write(csv)


shutil.make_archive('path1','zip', 'path2')

Solution

  • How about:

    import pandas as pd
    import zipfile
    
    # Create a zip file
    def create_zip(srcs, dst, filenames, op):
        zf = zipfile.ZipFile(dst, op, zipfile.ZIP_DEFLATED)
        for src, filename in zip(srcs, filenames):
            zf.write(src, filename)
        zf.close()
    
    def main():
        dct = {'stu_NAME': ['student_2', 'student_1'],
               'other_info': [2, 1]}
    
        df = pd.DataFrame(dct)
    
        groupby = df.groupby(['stu_NAME'])
    
        zip_all_zips = []
        zip_all_csvs = []
    
        for n,g in groupby:
            csv=g.to_csv(index=False)
            filename = '{}{}'.format('path_',n)
            filename_csv = filename + '.csv'
            filename_zip = filename + '.zip'
            with open(filename_csv,'w') as out_file:
                out_file.write(csv)
            zip_all_zips.append(filename_zip)
            zip_all_csvs.append(filename_csv)
            # Create a zip file for each student
            create_zip([filename_csv], filename_zip, [filename_csv], 'w')
    
        # Create a zip file with all students (.zip of .zips)
        create_zip(zip_all_zips, 'all_students_zip.zip', zip_all_zips, 'w')
    
        # Create a zip file with all students (.zip of .csvs)
        create_zip(zip_all_csvs, 'all_students_csv.zip', zip_all_csvs, 'w')
    
    if __name__ == '__main__':
        main()
    

    Yield

    all_students_csv.zip  
    path_student_1.csv  
    path_student_2.csv
    all_students_zip.zip  
    path_student_1.zip  
    path_student_2.zip
    

    It creates (1) a .zip of each .csv, (2) a .zip with all .csv and (3) a .zip with all .zip. So you comment out what you don't need. If you want to delete the .csv after creating the .zip then you can do:

    import os
    for filename_csv in zip_all_csvs:
        os.remove(filename_csv)