Consider the DataFrame df
df = pd.DataFrame({'Name': ['Tesla','Tesla','Tesla','Toyota','Ford','Ford','Ford','BMW','BMW','BMW','Mercedes','Mercedes','Mercedes'],
'Type': ['Model X','Model X','Model X','Corolla','Bronco','Bronco','Mustang','3 Series','3 Series','7 Series','C-Class','C-Class','S-Class'],
'Year': [2015, 2015, 2015, 2017, 2018, 2018, 2020, 2015, 2015, 2017, 2018, 2018, 2020],
'Price': [85000, 90000, 95000, 20000, 35000, 35000, 45000, 40000, 40000, 65000, 50000, 50000, 75000],
'Color': ['White','White','White','Red','Blue','Blue','Yellow','Silver','Silver','Black','White','White','Black']
})
I am trying to merge cells in excel which has common values consecutively for a DataFrame df
columns using the below mergecells
function, however, when I open the excel file after merging it says the excel file has recovered some of the values.
def mergecells(df, columntomerge, sheetname, writer):
df1 = df.index.to_series().groupby(df[columntomerge]).agg(['first', 'last']).reset_index()
df1 = df1.sort_values("first").reset_index()
first_last_rows = df1.set_index('first')['last'].to_dict()
merge_ranges = {}
for key, value in first_last_rows.items():
if df.loc[key, columnname] in ["Alpha", "-"] or key == value:
continue
merge_ranges[df.loc[key, columnname]] = (
key+1, df.columns.get_loc(columnname), value+1, df.columns.get_loc(columnname))
wb = writer.book
ws = writer.sheets[sheetname]
mf = wb.add_format({'align': 'center', 'valign': 'vcenter'})
for name, merge_range in merge_ranges.items():
ws.merge_range(*merge_range, name, mf)
for col in df.columns:
mergecells(df,col,'Trial',writer)
But when I call the above merge function with the code above, I am getting the error as the below image
The Type
column, Name
column and Price
column are correctly merged, However the Year and color are completely wrong
The problem lies in the groupby. You have disjoint intervals while grouping by colors or year: White is found in rows [1, 2, 3]
but also [11, 12]
. You should consider consecutive values in a column. more_itertools.consecutive_groups
can help you with that:
from more_itertools import consecutive_groups
sheetname='Sheet1'
with pd.ExcelWriter("test.xlsx") as writer:
df.to_excel(writer, sheet_name=sheetname, index=False)
wb = writer.book
ws = writer.sheets[sheetname]
mf = wb.add_format({'align': 'center', 'valign': 'vcenter'})
for j, col in enumerate(df.columns):
ws.set_column(j, j, 12, mf)
for val in df[col].unique():
idx = df[(df[col]==val) & (df[col]==df[col].shift(1))].index # indices of the rows where the value is the same as the previous row
for seg in consecutive_groups(idx):
l = list(seg)
ws.merge_range(l[0], j, l[-1]+1, j, val, mf)