So I'm attempting exclude the top three rows during a data extraction.
for col_num in xrange(sheet.ncols):
col = sheet.col_values(col_num, start_rowx=3, end_rowx=None)
writer.writerow(col) #this syntax also may be skewing my results as well
This for loop eliminates the top 3 rows put then turns the rows into columns.
Any advice on how to maintain the data structure but at the same time eliminate rows?
Full script below:
import glob
import os
import xlrd
import csv
ROOTDIR = r'C:\Users\username\Desktop\Mults'
wb_pattern = os.path.join(ROOTDIR, '*.xlsx')
workbooks = glob.glob(wb_pattern)
with open('merged.csv', 'wb') as outcsv:
writer = csv.writer(outcsv)
for wb in workbooks:
book_path = os.path.join(ROOTDIR, wb)
book = xlrd.open_workbook(book_path)
sheet = book.sheet_by_index(0)
for colx in xrange(sheet.ncols):
col = sheet.col_values(colx, start_rowx=2, end_rowx=None)
writer.writerow(col) #this syntax also may be skewing my results
Thank you!
Any help is much appreciated!
If you want row values, why are you pulling the columns to write as rows? Pull the row values and write those:
import glob
import os
import xlrd
import csv
ROOTDIR = r'C:\Users\username\Desktop\Mults'
wb_pattern = os.path.join(ROOTDIR, '*.xlsx')
workbooks = glob.glob(wb_pattern)
start_rownum = 3 # or wherever you want to start copying
with open('merged.csv', 'wb') as outcsv:
writer = csv.writer(outcsv)
for wb in workbooks:
book_path = os.path.join(ROOTDIR, wb)
book = xlrd.open_workbook(book_path)
sheet = book.sheet_by_index(0)
for rownum in xrange(start_rownum, sheet.numrows):
row = sheet.row_values(rownum)
writer.writerow(row)