Search code examples
pythonpandasglob

Renaming multiple csv files within a folder in Python


I have a folder with 50 .csv files. The .csv files are auto-generated and a results/ output from a process-based model (long and automatically named). For example, sandbox_username_vetch_scaleup_IA_1.csv; sandbox_username_vetch_scaleup_IA_2.csv, and it continues till sandbox_username_vetch_scaleup_IA_50.csv.

I am trying to shorten the file names in a way so that the files are names are IA_1, IA_2 ...up to IA_50 and subsequently the new .csv file name gets added as a column to the data frame. Here is what I have tried so far

# import necessary libraries
import pandas as pd
import os
import glob
import sys
from pathlib import Path
import re
data_p = "/Users/Username/Documents/HV_Scale/CWAD"
output_p = "/Users/Username/Documents/HV_Scale/CWAD"
retval = os.getcwd() 
print (retval) # see in which folder you are

os.chdir(data_p) # move to the folder with your data
os.getcwd()


filenames = sorted(glob.glob('*.csv'))
fnames = list(filenames) # get the names of all your files
#print(fnames)

#Loop over
for f in range(len(fnames)):
    print(f'fname: {fnames[f]}\n')
    pfile = pd.read_csv(fnames[f], delimiter=",") # read in file

    #extract filename
    filename = fnames[f]
    parts = filename.split(".") # giving you the number in file name and .csv
    only_id = parts[0].split("_") # if there is a bracket included 

    # get IA from your file
    filestate = pfile["IA"][0] # assuming this is on the first row
    filestate = str(filestate)

    # get new filename
    newfilename = only_id[0]+"-"+filestate+parts[1]

    # save your file (don't put a slash at the end of your directories on top)
    pfile.to_csv(output_p+"/"+newfilename, index = False, header = True)

Here is the code for adding the csv file name as a column

import glob
import os
import shutil
import sys
import pandas as pd

path = '/Users/Username/Documents/HV_Scale/IA_CWAD/short'                 
all_files = glob.glob(os.path.join(path, "*.csv")) 
names = [os.path.basename(x) for x in glob.glob(path+'\*.csv')] 

df = pd.DataFrame()
for file_ in all_files:
    file_df = pd.read_csv(file_,sep=';', parse_dates=[0], infer_datetime_format=True,header=None )
    file_df['file_name'] = file_
    df = df.append(file_df)  

#However, this adds the old csv file name and not the renamed one


Solution

  • In order to rename and move these files, all you need is:

    import glob
    import os
    import shutil
    import sys
    
    SOURCE = '<Your source directory>'
    TARGET = '<Your target directory>'
    
    for file in glob.glob(os.path.join(SOURCE, '*_IA_*.csv')):
        idx = file.index('_IA_')
        filename = file[idx+1:]
        target = os.path.join(TARGET, filename)
        if os.path.exists(target):
            print(f'Target file {target} already exists', file=sys.stderr)
        else:
            shutil.copy(file, target)
    

    As there's nothing in the OP's question that tries to handle modification of the CSV files, that is left as an exercise for the OP.

    Source and target directories should be different otherwise this can lead to ambiguous results