I have a python program which reads excel documents. I need to allow only the first appearance of certain column combinations. Eg:
A | B
-------------
1. 200 | 201
2. 200 | 202
3. 200 | 201
4. 200 | 203
5. 201 | 201
6. 201 | 202
.............
I want remove/skip the third row where the duplication found and write it to a CSV file. Here is the function I've been trying so far. But it's not working.
def validateExcel(filename):
xls=xlrd.open_workbook(filename)
setcount = 0
column = 0
count = 0
# sheetcount = 0
for sheet in xls.sheets():
header=""
# sheetcount = sheetcount + 1
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
sheetname = sheet.name
mylist = []
for row in range (1, number_of_rows):
mylist = []
for col in range(0, 2):
mylist.append(sheet.cell_value(row, col))
print mylist
myset = set(mylist)
print myset
It is worked for me: In python 2.7
def validateExcel(filename):
xls=xlrd.open_workbook(filename)
setcount = 0
column = 0
count = 0
# sheetcount = 0
for sheet in xls.sheets():
header=""
# sheetcount = sheetcount + 1
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
sheetname = sheet.name
mylist = []
for row in range(1, number_of_rows):
mylist.append((sheet.cell_value(row, 0), sheet.cell_value(row, 1)))
myset = sorted(set(mylist), key=mylist.index)
return myset