python python-3.x csv import concatenation

Reading multiple CSV files from different subfolders based on date range (in Python)

I have a Main Folder in which a new subfolder gets placed every day. In these subfolders, there are multiple CSV files. Of these multiple CSV files I need to concatenate one specific file from each subfolder based on a pre-defined date interval.

the subfolders are always named the date (etc. "20230227") and the files are always named "Some_File-/*.csv"

I'm assuming the answer would be something in line of this:

import pandas as pd
import glob
from pathlib import Path

Start_date = '20230101'
End_date = '20230227'

directory = "\Main-Folder-Path\subfolders....."

#Read CSV file
df_temp = []
for sub_folder in #all subfolders in the above date range: 
    for file in Path(directory).glob("Some_File-/*.csv"):
        df_temp.append(pd.read_csv(file))

#concatenate into a single data frame
df = pd.concat(df_temp)

Solution

I found the solution:

def read_csv_file(start_date, end_date, location):
    """Function to read csv file from a given location and return a dataframe"""
    parsed_start_date = datetime.datetime.strptime(start_date, "%Y%m%d").date()
    parsed_end_date = datetime.datetime.strptime(end_date, "%Y%m%d").date()
    final_dataframe = pd.DataFrame()

    # loop over sub-folders
    for folder_name in os.listdir(location):
        try:
            # try to parse the folder name as a date, if not possible, skip it
            parsed_date = datetime.datetime.strptime(folder_name, "%Y%m%d").date()
        except:
            continue

        # if the sub-folder fulfills the date criteria
        if parsed_start_date <= parsed_date <= parsed_end_date:
            print(f"processing folder {folder_name}")
            # go into the folder and load csv file starting with name 'front'
            for file_name in os.listdir(location + '/' + folder_name):
                # if the file name starts with 'front' and ends with '.csv'
                if file_name.startswith('CSV_FILE_NAME-') and file_name.endswith('.csv'):
                    df = pd.read_csv(location + '/' + folder_name + '/' + file_name, encoding= 'unicode_escape', low_memory=False)
                    # concat the dataframe into the final_dataframe
                    final_dataframe = pd.concat([final_dataframe, df])
                    break

    print("--- the job is done ---")
    return final_dataframe


# Define the start and end date
my_start_date = '20230101'
my_end_date = '20230301'

# Run the script
result = read_csv_file(my_start_date, my_end_date, '/MAIN/FOLDER/LOCATION')
# Save result pandas dataframe into csv file inside code folder
result.to_csv('CONCATENATED_FILE.csv', index=False)