Search code examples
pythonopenpyxl

How to read merged cells in python using openpyxl?


I'm trying to read data from the excel file that has merged_cells_range... but the output is not my goal. Pls help me out

import openpyxl
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
print(sheet.merged_cells.ranges)
for row_index in range(1,sheet.max_row+1):
    row=[]
    for col_index in range(1,sheet.max_column+1):
        vals = sheet.cell(row_index,col_index).value
        if vals =='':
            for crange in sheet.merged_cells.ranges:
                rlo,rhi,clo,chi = crange
                if rlo<=row_index and row_index<rhi and clo<=col_index and col_index<chi:
                    vals = sheet.cell(rlo,clo).value
                    print(vals)
                    break
        row.append(vals)
    all_data.append(row)
print(all_data)
for row in all_data:
    sheet.append(row)
wb.save('bbbb.xlsx')

I desired to get output: [['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], ['06D', 'Transinco B60KL', 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], ['06E', 'Daewoo BC 212', 60, 1061], ['06E', 'Daewoo BC 212', 60, 19]] but results is:

[['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], [None, None, 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], [None, None, 60, 1061], [None, None, 60, 19]]

my input output desired


Solution

  • I amend my code and it's work.

    import openpyxl
    from openpyxl.utils import range_boundaries
    wb = openpyxl.load_workbook('book1.xlsx')
    sheet = wb.get_sheet_by_name('info')
    all_data=[]
    for row_index in range(1,sheet.max_row+1):
        row=[]
        for col_index in range(1,sheet.max_column+1):
            vals = sheet.cell(row_index,col_index).value
            if vals == None:
                for crange in sheet.merged_cells:
                    clo,rlo,chi,rhi = crange.bounds
                    top_value = sheet.cell(rlo,clo).value
                    if rlo<=row_index and row_index<=rhi and clo<=col_index and col_index<=chi:
                    vals = top_value
                        print(vals)
                        break
        row.append(vals)
    all_data.append(row)
    print(all_data)
    for row in all_data:
        sheet.append(row)
    wb.save('bbbb.xlsx')