Search code examples
pythoncsvsplit

Splitting one csv into multiple files


I have a csv file of about 5000 rows in python i want to split it into five files.

I wrote a code for it but it is not working

import codecs
import csv
NO_OF_LINES_PER_FILE = 1000
def again(count_file_header,count):
    f3 = open('write_'+count_file_header+'.csv', 'at')
    with open('import_1458922827.csv', 'rb') as csvfile:
        candidate_info_reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_ALL)
        co = 0      
        for row in candidate_info_reader:
            co = co + 1
            count  = count + 1
            if count <= count:
                pass
            elif count >= NO_OF_LINES_PER_FILE:
                count_file_header = count + NO_OF_LINES_PER_FILE
                again(count_file_header,count)
            else:
                writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL)
                writer.writerow(row)

def read_write():
    f3 = open('write_'+NO_OF_LINES_PER_FILE+'.csv', 'at')
    with open('import_1458922827.csv', 'rb') as csvfile:


        candidate_info_reader = csv.reader(csvfile, delimiter=',', quoting=csv.QUOTE_ALL)

        count = 0       
        for row in candidate_info_reader:
            count  = count + 1
            if count >= NO_OF_LINES_PER_FILE:
                count_file_header = count + NO_OF_LINES_PER_FILE
                again(count_file_header,count)
            else:
                writer = csv.writer(f3,delimiter = ',', lineterminator='\n',quoting=csv.QUOTE_ALL)
                writer.writerow(row)

read_write()

The above code creates many fileswith empty content.

How to split one files into five csv files?


Solution

  • I suggest you not inventing a wheel. There is existing solution. Source here

    import os
    
    
    def split(filehandler, delimiter=',', row_limit=1000,
              output_name_template='output_%s.csv', output_path='.', keep_headers=True):
        import csv
        reader = csv.reader(filehandler, delimiter=delimiter)
        current_piece = 1
        current_out_path = os.path.join(
            output_path,
            output_name_template % current_piece
        )
        current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
        current_limit = row_limit
        if keep_headers:
            headers = reader.next()
            current_out_writer.writerow(headers)
        for i, row in enumerate(reader):
            if i + 1 > current_limit:
                current_piece += 1
                current_limit = row_limit * current_piece
                current_out_path = os.path.join(
                    output_path,
                    output_name_template % current_piece
                )
                current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
                if keep_headers:
                    current_out_writer.writerow(headers)
            current_out_writer.writerow(row)
    

    Use it like:

    split(open('/your/pat/input.csv', 'r'));