Search code examples
pythonexcelxlrd

Python/Excel - Slice extracted excel data - exclude rows maintain structure


So I'm attempting exclude the top three rows during a data extraction.

for col_num in xrange(sheet.ncols):
    col = sheet.col_values(col_num, start_rowx=3, end_rowx=None)
    writer.writerow(col) #this syntax also may be skewing my results as well

This for loop eliminates the top 3 rows put then turns the rows into columns.

Any advice on how to maintain the data structure but at the same time eliminate rows?

Full script below:

import glob
import os
import xlrd
import csv

ROOTDIR = r'C:\Users\username\Desktop\Mults'
wb_pattern = os.path.join(ROOTDIR, '*.xlsx')

workbooks = glob.glob(wb_pattern)

with open('merged.csv', 'wb') as outcsv:
writer = csv.writer(outcsv)
    for wb in workbooks:
        book_path = os.path.join(ROOTDIR, wb)
        book =  xlrd.open_workbook(book_path)
        sheet = book.sheet_by_index(0)
        for colx in xrange(sheet.ncols):
            col = sheet.col_values(colx, start_rowx=2, end_rowx=None)
            writer.writerow(col) #this syntax also may be skewing my results

Thank you!

Any help is much appreciated!


Solution

  • If you want row values, why are you pulling the columns to write as rows? Pull the row values and write those:

    import glob
    import os
    import xlrd
    import csv
    
    ROOTDIR = r'C:\Users\username\Desktop\Mults'
    wb_pattern = os.path.join(ROOTDIR, '*.xlsx')
    
    workbooks = glob.glob(wb_pattern)
    start_rownum = 3 # or wherever you want to start copying
    with open('merged.csv', 'wb') as outcsv:
        writer = csv.writer(outcsv)
        for wb in workbooks:
            book_path = os.path.join(ROOTDIR, wb)
            book =  xlrd.open_workbook(book_path)
            sheet = book.sheet_by_index(0)
            for rownum in xrange(start_rownum, sheet.numrows):
                row = sheet.row_values(rownum)
                writer.writerow(row)