Search code examples
pythoncsvglob

opening text files in different folders and write to a csv cell


I am trying to get texts from different folders and write each text into a single cell in a CSV format with its file name (*.txt)

import os
folders = os.listdir("/Users/hilo/Documents/digitization/ReleasedDataset_mp3")
folders

import  glob, csv



here I tried to get the list of folder names, and they are like:

['Becton Dickinson_20170803',
 'CIGNA Corp._20170202',
 'The Bank of New York Mellon Corp._20170720',
 'JPMorgan Chase & Co._20170714']

here I try to apply a loop to open and extract all text from each *txt file and write all text into a cell in the csv file with the key( *)

for i in folders:
    files=glob.glob("/Users/hilo/Documents/digitization/ReleasedDataset_mp3/i/*.txt")
with open('writeData.csv', mode='w') as new_file:
  writer = csv.writer(new_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
  for filename in files:

    # Take all sentences from a given file
    file = open(filename, 'rt')
    text = file.read()
    file.close()

    for text in text:
      writer.writerow((filename, text))

this keeps producing an empty CSV. does anyone have a suggestion to solve the issue in the code?


Solution

  • I think this will work, based on the additional information you provided in the comments:

    import csv
    import glob
    import os
    from pprint import pprint, pp
    
    
    #root_folder = "/Users/hilo/Documents/digitization/ReleasedDataset_mp3"
    root_folder = "/Stack Overflow/_test_files_root"
    
    #folders = ['Becton Dickinson_20170803',
    #           'CIGNA Corp._20170202',
    #           'The Bank of New York Mellon Corp._20170720',
    #           'JPMorgan Chase & Co._20170714']
    folders = ['Subfolder1', 'Subfolder3']
    
    filepaths = []
    for subfolder in folders:
        filepaths.extend(glob.glob(os.path.join(root_folder, subfolder, "*.txt")))
    
    if os.name == 'nt':  # Improve readability on Windows (optional)
        filepaths[:] = [filepath.replace('\\', '/') for filepath in filepaths]
    pprint(filepaths, width=128)  # Show files to be processed (optional)
    
    # Process the files.
    with open('writeData.csv', mode='w', newline='') as new_file:
        writer = csv.writer(new_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        for filename in filepaths:
            # Take all sentences from a given file.
            with open(filename, 'rt') as file:
                text = file.read()
            # Write them into CSV along with filename.
            writer.writerow((filename, text))
    
    print('-FINI-')
    

    Here's what the file created looks like in Excel:
    (I used text from various online news articles for testing.)

    screenshot of file in Excel