Search code examples
pythoncsvxlsx

XSLX Conversion to CSV file whenever a new file is added into the folder


I try to convert a .xlsx file into .csv file whenever a new file is added into the Inputfolder and put the conversion .csv file in the OutputFolder.

import glob
import time
import os
import pandas as pd

#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")

#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0


#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)

while True:
  checkFolder = folderSize
  folderSize = 0

  #Check the size of the Input Folder
  for path, dirs, files in os.walk(input_filepath):
      for f in files:
          fp = os.path.join(path, f)
          folderSize += os.path.getsize(fp)
  print(folderSize)

  #Create new .csv file if the Input folder has new file added
  if(folderSize > checkFolder):
      list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
      latest_file = max(list_of_files, key=os.path.getctime)
      format_csv(latest_file)
      print(latest_file)

  time.sleep(15)

Right now the program will only convert the first .xlsx file only. If I add a new .xlsx file into InputFolder, the file is not converted.


Solution

  • I already improvise my original code. So, whenever I put a new excel file into InputFolder, the program will convert the file to .csv format and insert the formatted file in OutputFolder

    import glob
    import time
    import os
    import pandas as pd
    from watchdog.observers import Observer
    from watchdog.events import FileSystemEventHandler
    
    #Function if new file is created in the folder
    def on_created(event):
        list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
        latest_file = max(list_of_files, key=os.path.getctime)
        format_csv(latest_file)
    
    #Function to convert .xlsx to .csv
    def format_csv(latest_file):
    
      # Get timestamp
      timestr = time.strftime("%d%m%Y_%H%M%S")
    
      #Output file path
      filenamepath = 'C:/Users/Documents/OutputFolder/' + timestr + '.csv'
      read_Excelfile = pd.read_excel(latest_file)
      read_Excelfile.to_csv(filenamepath, index=None, header=True)
      print(filenamepath)
    
    if __name__ == "__main__":
      event_handler = FileSystemEventHandler()
    
      #Calling function for file insertion
      event_handler.on_created = on_created
    
      #Input Folder
      path = 'C:/Users/Documents/InputFolder'
    
      #Function to observe file
      observer = Observer()
      observer.schedule(event_handler, path, recursive=True)
      observer.start()
    
      try:
          #Check every one second
          while True:
              time.sleep(1)
      except KeyboardInterrupt:
          #Program stop if keyboard interupt
          observer.stop()
      observer.join()