Search code examples
pythoniterationxlrdxlwt

Python xlrd/xlwt: comparing 2 .xls files and aggregation of values over several iterations


Task:
I have two spreadsheets (A and B), see #...# for references in code.

First, I start with the first row in A. I get a specific value V in column 7 (#1#). I also look at column 15, which determines in which row of spreadsheet B I want to add V in the next step (#2#).

Second, I switch to B, and look for the right row which I defined in the previous step with the spreadsheet A (#3#). Then I add V to this row in a column 5 (#4#). After that, I start from the beginning with the next iteration in the second row of A and so on (#m=m+1#).

I do that for every row in spreadsheet A. That means, that every value V of every row in A is added to some cell in spreadsheet B, but not every row in B has to have a value from A in its column 5.

Solution/Problem:
I solved this task with Python 3.x using xlrd/xlwt. However, I am facing a problem, that my script only works for one iteration (meaning one row in A). If I run my script for more than one iteration automatically (Python executes the script for every row in A automatically), the previous value gets overwritten by the newer ones resulting in an not completed spreadsheet B.

However, if I run it manually (by changing the row of A by hand) it works. I think this is because, the script is executed manually and somehow saved the values so that they get not overwritten. Since, this is not to an option for my case (more than 1k rows in A...), I am looking for an automated solution.

Is there a way to overcome this issue?

import xlwt
import xlrd
from xlutils.copy import copy

grid_file = 'grid_aut_100km.xls'                    #spreadsheet B#
wind_file = 'wind_aut_sample.xls'                   #spreadsheet A#

gridbook = xlrd.open_workbook(grid_file)
gridsheet = gridbook.sheet_by_index(0)

windbook_rd = xlrd.open_workbook(wind_file)
windsheet_rd = windbook_rd.sheet_by_index(0)

gridbook_wt = copy(gridbook)
gridsheet_wt=gridbook_wt.get_sheet(0)

m=1

def setup():
    gridsheet_wt.write(0,5,"sum_capacity")
    gridbook_wt.save(grid_file)

def setsumszero():
    n=1
    sum_capacity = int(0)
    while n <= gridsheet.nrows-1:
        gridsheet_wt.write(n,5,sum_capacity)
        n=n+1
    gridbook_wt.save(grid_file)

def gridmatch(m,n):

    id_in_windsheet = windsheet_rd.cell_value(m,15)             #2#
    n=1
    id_in_gridsheet = gridsheet.cell_value(n,0)

    while True:
        if id_in_windsheet == id_in_gridsheet:                  #3#
            print(str(id_in_windsheet) + " = " + str(id_in_gridsheet))
            print("It's a Match in row " + str(n))
            break
        else:
            n=n+1
            id_in_gridsheet = gridsheet.cell_value(n,0)

    sum_capacity_old = gridsheet.cell_value(n,5)
    print("Old capacity is " + str(sum_capacity_old))

    additional_capacity = windsheet_rd.cell_value(m,7)          #1#
    print("Additional capacity is " + str(additional_capacity))

    sum_capacity_new = sum_capacity_old + additional_capacity   #4#
    print("Sum of new capacity is " + str(sum_capacity_new))

    gridsheet_wt.write(n,5,sum_capacity_new)
    print("New capacity is " + str(sum_capacity_new))

    gridbook_wt.save(grid_file)
    print("")
    print("")

setup()
setsumszero()  
m=1  #row in windbook
n=1  #row in gridbook

while m <= windsheet_rd.nrows-1:
    gridmatch(m,n)
    gridbook_wt.save(grid_file)
    m=m+1

Solution

  • I got a solution to solve my issue:

    I used a list to store the values. After iterating over n-rows I wrote down the final results using xlwt.

    Please find the details in the code attached.

    import xlwt
    import xlrd
    from xlutils.copy import copy
    
    ##make sure that no macros are used and files are not .xlsx!
    grid_file = 'grid_aut_100km.xls'
    wind_file = 'wind_aut_sample.xls'
    ##reading gridfile
    gridbook = xlrd.open_workbook(grid_file)
    gridsheet = gridbook.sheet_by_index(0)
    ##reading windfile
    windbook_rd = xlrd.open_workbook(wind_file)
    windsheet_rd = windbook_rd.sheet_by_index(0)
    ##writing gridfile
    gridbook_wt = copy(gridbook)
    gridsheet_wt=gridbook_wt.get_sheet(0)
    
    already_used_lists = []
    
    def setup():
        ##writes header
        gridsheet_wt.write(0,5,"sum_capacity")
        gridbook_wt.save(grid_file)
    
    def gridmatch(m,n):
        ##list initialisation
        capacity_list = []
        while n <= gridsheet.nrows-1:
            capacity_list= capacity_list + [0]
            n=n+1
            #print(capacity_list)
    
        print("List successfully initialised - Nr. of elements in list " + str(len(capacity_list)))
        print()
    
        while m <= windsheet_rd.nrows-1:
            print("m is " + str(m))
            id_in_windsheet = windsheet_rd.cell_value(m,15)
            print("to check: " + str(id_in_windsheet))
            n=1
            id_in_gridsheet = gridsheet.cell_value(n,0)
    
            while True:
                if id_in_windsheet == id_in_gridsheet:
                    print(str(id_in_windsheet) + " = " + str(id_in_gridsheet))
                    print("It's a Match in row " + str(n))
                    break
                else:
                    n=n+1
                    id_in_gridsheet = gridsheet.cell_value(n,0)
    
            print("Btw: m is " + str(m))
            print("Btw: n is " + str(n))
    
            additional_capacity = windsheet_rd.cell_value(m,7)
            print("Additional capacity is " + str(additional_capacity))
            capacity_list[n-1] = capacity_list[n-1] + additional_capacity
    
            print(capacity_list)
            print("")
            print("")
            m=m+1
    
        ##writing capacity to .xls file
        n=1
        while n <= gridsheet.nrows-1:
            total_capacity = capacity_list[n-1]
            gridsheet_wt.write(n,5,total_capacity)
            n=n+1
    
    setup()
    
    m=1  ##row in windbook
    n=1  ##row in gridbook
    
    gridmatch(m,n)
    
    gridbook_wt.save(grid_file)