Search code examples
pythonpython-3.xpandasxlsxwriterxlrd

Grouping columns by unique alternating colors and alternating those colors based on IDs with XlsxWriter


The problem: I want to color code my data using XlsxWriter as shown below.

enter image description here

You can see that rows with the same ID stay the same color, and if ID changes, the row color alternates. In addition, I want columns that share F (F1 #pets, F1 #kids, F1 #hobbies) to follow the same color scheme, change that color scheme if F changes, and keep the alternating pattern that is based on ID.

So far, I was able to alternate the blue coloring (shown below).

enter image description here

To do this, I used the following code.

import pandas as pd
import xlsxwriter
import xlrd


def build_format(wb, color, bold=False, align='center', font_color='black'):
    """
    Set formatting in excel file to be centered, bold, and have grey borders

    Args:
        wb(:obj:`str`): The complete path to the directory containing files
        color: accepts color value to change cell color
        bold: Enables text to be bold if True; default is False
        align: Set alignment of cell text; default is centered
        sci_not: Enables scientific notation if True; default is False
        font_color: Sets font color; default is black

    Returns:
        Customized XlsxWriter formatting template, based on user-provided parameters
    """
    #: Format headers
    format_x = wb.add_format({'align': align,'bold': bold, 'bg_color':color, 'font_color':font_color})
    format_x.set_border()
    format_x.set_border_color('#BFBFBF')
    format_x.set_align('center')

    return format_x

def main():
    df = pd.DataFrame(
        {'ID': [3284, 2208, 2855, 1855, 4010, 1066, 1066, 2476, 2476],
            'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
            'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
            'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
            'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
            'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
            'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
            'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})

    print("df", df)
    writer = pd.ExcelWriter('input_df.xlsx', engine = 'xlsxwriter')
    df.to_excel(writer, sheet_name = 'sheet1', index=False, header=True)
    writer.save()
    workbook = xlsxwriter.Workbook('output_df.xlsx')
    finalsheet = workbook.add_worksheet('Hobbies Pets Kids')

    #: Open input_wb file containing peptides_df and proteins_df sheets
    input_wb = xlrd.open_workbook('input_df.xlsx')

    #: Save input_wb sheets
    sheet1 = input_wb.sheet_by_name('sheet1')
    group_id = None

    #: Colors
    dark_colors = {

        0: '#A3E0CD', #: Dark color 1
        1: '#8DDC8D', #: Dark color 2
        2: '#ACCD90', #: Dark color 3
        3: '#85CAA0', #: Dark color 4
        4: '#FDA695', #: Dark color 5
        5: '#EFB6BB', #: Dark color 6
        6: '#E6AEAD', #: Dark color 7
        7: '#B3ADF5', #: Dark color 8
        8: '#B59CDD', #: Dark color 9
        9: '#D390DD', #: Dark color 10
        10: '#A7CEEF'} #: Dark blue (header)

    light_colors = {

        0: '#E5F8F3', #: Light color 1 (should be for col 3, 4, 5)
        1: '#E3F5E8', #: Light color 2 (should be for col 6, 7, 8)
        2: '#E1F1D8', #: Light color 3
        3: '#EDF5EF', #: Light color 4
        4: '#FDE4DD', #: Light color 5
        5: '#FAE5E4', #: Light color 6
        6: '#F8ECEB', #: Light color 7
        7: '#E6E5F9', #: Light color 8
        8: '#EDE7F6', #: Light color 9
        9: '#EDE8F2', #: Light color 10
        10: '#DBE8F0'} #: Light blue (should be for cols 1 and 2 only)

    med_colors = {
        0: '#C7EDE0', #: Med color 1 (should be for col 3, 4, 5)
        1: '#C7ECC7', #: Med color 2 (should be for col 6, 7, 8)
        2: '#C6E0B4', #: Med color 3
        3: '#C0E8C8', #: Med color 4
        4: '#FFD4CD', #: Med color 5
        5: '#F7CDCF', #: Med color 6
        6: '#F2D7D5', #: Med color 7
        7: '#DAD6FA', #: Med color 8
        8: '#D1C4E9', #: Med color 9
        9: '#E1BEE7', #: Med color 10
        10: '#B9D3EE'} #: Med blue (should be for cols 1 and 2 only)

    color_dicts = [light_colors, med_colors]
    the_num = 0
    current_color_dict = color_dicts[the_num % 2]
    for row_num in range (sheet1.nrows):
        input_wb_row = sheet1.row_values(row_num) # Get first row

        for col_num, data in enumerate(input_wb_row):
            finalsheet.write(row_num, col_num, data)
            if row_num == 0: # Header row
                finalsheet.set_row(row_num, None, build_format(workbook, dark_colors[10]))

            elif row_num != 0:
                if col_num == 0 and group_id == None:
                    group_id = data

                elif col_num ==0 and group_id != None:
                    if data != group_id:
                        group_id = data

                        the_num = the_num + 1
                        current_color_dict = color_dicts[the_num % 2]
                finalsheet.set_row(row_num, None, build_format(workbook, current_color_dict[10]))

    # Close the workbook to ensure the script actions are implemented in the workbook.   
    workbook.close()

main()

Is it possible to implement the coloring for columns 3-5 and 6-8 using XlsxWriter or does anyone know a workaround?


Solution

  • Not the most effective way as it involves a lot of iterations (for loops) but it works for your case. Take in mind that if your file has a lot of rows and/or columns the program will become slow.

    Anyway, what i did was to define a function to format a single row exactly the way that you want it. This way the format will also stop at the last column of your df and it will not expand until the last column of the file (xlsxwriter's method set_row formats until the very end of the file).

    Then we iterate through every row and we compare the row's ID value with the ID value of the previous row. If the values are the same we keep formatting with the same color, if they are different we switch the colors.

    import pandas as pd
    
    # Create a test df
    df = pd.DataFrame({'ID': [3284, 2208, 2855, 2855, 4010, 1066, 1066, 2476, 2476],
                'col A': ['num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9'],
                'F1 #pets': [48, 66, 77, 77, 72, 90, 90, 40, 40],
                'F1 #kids': [23, 45, 77, 77, 47, 90, 90, 40, 40],
                'F1 #hobbies': [1436, 850, 162, 162, 419, 176, 176, 576, 576],
                'F2 #pets': [51, 67, 75, 75, 68, 94, 94, 36, 36],
                'F2 #kids': [24, 45, 75, 75, 44, 94, 94, 36, 36],
                'F2 #hobbies': [1399, 833, 163, 163, 430, 184, 184, 481, 481]})
    
    df['ID'] = df['ID'].astype('str')
    
    writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
    df.to_excel(writer, sheet_name='Sheet1', index=False)
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    
    # Define the formats
    format1 = workbook.add_format({'bg_color': '#E5F8F3', 'border_color': '#BFBFBF', 'border': 1}) # light green 1
    format2 = workbook.add_format({'bg_color': '#E3F5E8', 'border_color': '#BFBFBF', 'border': 1}) # light green 2
    format3 = workbook.add_format({'bg_color': '#C7EDE0', 'border_color': '#BFBFBF', 'border': 1}) # dark green 1
    format4 = workbook.add_format({'bg_color': '#C7ECC7', 'border_color': '#BFBFBF', 'border': 1}) # dark green 2
    format5 = workbook.add_format({'bg_color': '#B9D3EE', 'border_color': '#BFBFBF', 'border': 1}) # dark blue
    format6 = workbook.add_format({'bg_color': '#DBE8F0', 'border_color': '#BFBFBF', 'border': 1}) # light blue
    
    # Set the color for the starting row
    current_color = 'Dark'
    
    # Define a function to format a single row
    def format_row(row, color=current_color):
        if color == 'Dark':
            for column in range(0,2): # format the first 2 columns
                worksheet.write(row, column, df.iloc[row-1,column], format5)
            for column in range(2,5): # format columns 3,4,5
                worksheet.write(row, column, df.iloc[row-1,column], format3)
            for column in range(5,8): # format columns 6,7,8
                worksheet.write(row, column, df.iloc[row-1,column], format4)
        elif color == 'Light':
            for column in range(0,2): # format the first 2 columns
                worksheet.write(row, column, df.iloc[row-1,column], format6)
            for column in range(2,5): # format columns 3,4,5
                worksheet.write(row, column, df.iloc[row-1,column], format1)
            for column in range(5,8): # format columns 6,7,8
                worksheet.write(row, column, df.iloc[row-1,column], format2)
    
    # Format the 1st row
    format_row(1)
    
    # Start formatting from the 2nd row until the end of the df
    for row in range(2,df.shape[0]+1):
        # if the id of the row is the same as the id of the previous row
        if df.iloc[row-1,0] == df.iloc[row-2,0]:
            format_row(row, color=current_color)
        # if it's different than that of the previous row switch the colors
        else:
            if current_color == 'Dark':
                current_color = 'Light'
            elif current_color == 'Light':
                current_color = 'Dark'
            format_row(row, color=current_color)
    
    # Set the columns width
    worksheet.set_column('A:H', 11)
    
    writer.save()
    

    Output identical to your desired output: enter image description here