Search code examples
pythonpython-3.xopenpyxlpyexcel

Python 3.0: Apply Conditional Formatting to a Cell based off the calculated value of 2 other cells using openpyxl


I am trying to automate processing of a form with;
(1) two calculated fields in columns F and G, and
(2) one manually-entered field in column H.

  1. If both these values in a row are calculated to be >=30, I would like to highlight the corresponding cell in column A.

  2. Alternatively, if the value in column H is "Warranty PM", I want to highlight the corresponding cell in column A in yellow.

At this time, I have not started on function (2) as I want it to run after function (1), such that it is the priority function. I apologize if I am messing the naming convention up with Excel!

In case it may help, the following is an example of my data:

Case No Status Current Date Date Created Date Modified Days Since Creation Days Since Modification Type
3051 New [Today] 01-Nov-2024 01-Nov-2024 =DATEDIF(D2,C2,"d") =DATEDIF(E2,C2,"d") Warranty PM
3048 Service Scheduled [Today] 31-Oct-2024 01-Nov-2024 =DATEDIF(D3,C3,"d") =DATEDIF(E3,C3,"d") Hardware
...
2832 Service Scheduled [Today] 20-Aug-2024 27-Aug-2024 =DATEDIF(D16,C16,"d") =DATEDIF(E16,C16,"d") Customer Request (Move)

In my current code, I am able to get both the F and G columns to properly highlight based on their calculated values.
These should be light red if the cell is over 30.
This is completed using conditional formatting:

from openpyxl import load_workbook
from openpyxl.styles import PatternFill, NamedStyle
from openpyxl.formatting.rule import CellIsRule

def format_xlsx(xlsx_file):
    #Load the file and workbook
    file_path = xlsx_file
    workbook = load_workbook(file_path)
    sheet = workbook.active

    #Define the fills
    light_red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
    dark_red_fill = PatternFill(start_color="8B0000", end_color="8B0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")

    #Create conditional format rules for F and G:
    sheet.conditional_formatting.add(
        "F2:F1048576",  # Apply to column F from row 2 onwards
        CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))

    sheet.conditional_formatting.add(
        "G2:G1048576",  # Apply to column G from row 2 onwards
        CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))
    
    workbook.save(file_path)
    return xlsx_file

However, if I try to include a function that will highlight cells in column A:

from openpyxl import load_workbook 
from openpyxl.styles import PatternFill, NamedStyle
from openpyxl.formatting.rule import CellIsRule
    
def format_xlsx(xlsx_file): #Load the file and workbook file_path = xlsx_file workbook = load_workbook(file_path) sheet = workbook.active
    #Define the fills
    light_red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
    dark_red_fill = PatternFill(start_color="8B0000", end_color="8B0000", fill_type="solid")
    yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")

    #Create conditional format rules for F and G:
    sheet.conditional_formatting.add(
        "F2:F1048576",  # Apply to column F from row 2 onwards
        CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))

    sheet.conditional_formatting.add(
        "G2:G1048576",  # Apply to column G from row 2 onwards
        CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))

    #Loop through each cell in columns F and G, ignoring the first row for headers.
    red = "AND($F2>=30, $G2>=30)"
    sheet.conditional_formatting.add(
        "A2:A1048576",  # Apply to column F from row 2 onwards
        CellIsRule(formula=[red], fill=dark_red_fill))

    workbook.save(file_path)
    return xlsx_file

I get the following error when opening the workbook in excel:

"We found a problem with some content in 'testfile_04-Nov-2024.xlsx'.
Do you want us to try to recover as much as we can? If you trust the source of this workbook, click Yes."

I am then presented with the option to view

Excel was able to open the file by repairing or removing the unreadable content.

and this error message in a word document:

Repair Result to testfile_04-Nov-2024.xlsx Errors were detected in file '/Users/llindgren/Downloads/testfile_04-Nov-2024.xlsx'
Removed Records: Conditional formatting from /xl/worksheets/sheet1.xml part

I also tried using a different function for all the above, but this resulted in no highlighted values:

import openpyxl
from openpyxl.styles import PatternFill

# Load the Excel file
file_path = "your_file.xlsx"
workbook = openpyxl.load_workbook(file_path)
sheet = workbook.active

# Define the fill styles for highlighting
light_red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")  # Light red for columns F and G
dark_red_fill = PatternFill(start_color="8B0000", end_color="8B0000", fill_type="solid")   # Dark red for column A

# Loop through each row and check values in columns F and G
for row in range(2, sheet.max_row + 1):  # Starting from row 2 to skip headers if they exist
    cell_f = sheet[f"F{row}"]
    cell_g = sheet[f"G{row}"]
    
    # Check and highlight cells in F and G if their value >= 30
    if cell_f.value is not None and isinstance(cell_f.value, (int, float)):
        if cell_f.value >= 30:
            cell_f.fill = light_red_fill  # Highlight F
            print(f"Highlighted F{row} with light red.")  # Debug statement

    if cell_g.value is not None and isinstance(cell_g.value, (int, float)):
        if cell_g.value >= 30:
            cell_g.fill = light_red_fill  # Highlight G
            print(f"Highlighted G{row} with light red.")  # Debug statement

    # Check if both F and G are >= 30, then apply dark red fill to column A
    if (
        cell_f.value is not None and cell_g.value is not None and
        isinstance(cell_f.value, (int, float)) and isinstance(cell_g.value, (int, float)) and
        cell_f.value >= 30 and cell_g.value >= 30
    ):
        sheet[f"A{row}"].fill = dark_red_fill
        print(f"Highlighted A{row} with dark red.")  # Debug statement

# Save changes to the same file
workbook.save(file_path)
print("File saved successfully.")

Solution

  • PART 1
    To fix your first issue;
    If both these values in a row are calculated to be >=30, I would like to highlight the corresponding cell in column A
    You just need to add another CF rule for column A using 'FormulaRule'

    Using a Sheet that contains your example data which I'll just use to fill the first 3 rows so the data covers the range A1:H4, add another CF Rule that checks the value of the same row F and G columns. If both are greater or equal to 30 then highlight A.

    I have updated your first code sample with the additional rule.
    The rule uses this formula;
    Thanks to @Rachel for pointing out the formula can be simplified to...

    AND($F2>=30,$G2>=30)
    

    which basically looks at the value in Columns F and G and if both are greater or equal to 30 then return TRUE, otherwise return FALSE.

    Note: The extra import for the formula Rule 'FormulaRule '
    Doesn't seem likely that you're going to have a Sheet that uses all 1048576 rows but will stick with that max for the example

    from openpyxl import load_workbook
    from openpyxl.styles import PatternFill, NamedStyle
    from openpyxl.formatting.rule import CellIsRule, FormulaRule  # <--- Add import
    
    
    def format_xlsx(xlsx_file):
        # Load the file and workbook
        file_path = xlsx_file
        workbook = load_workbook(file_path)
        sheet = workbook.active
    
        # Define the fills
        light_red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
        dark_red_fill = PatternFill(start_color="8B0000", end_color="8B0000", fill_type="solid")
        yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
    
        # Create conditional format rules for F and G:
        ### Note the range used here. This covers both Columns F and G, dont need a second rule for Column G 
        sheet.conditional_formatting.add(
            "F2:G1048576",  # Apply to column F from row 2 onwards
            CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))
    
        ### Dont need this ### 
        # sheet.conditional_formatting.add(
        #     "G2:G1048576",  # Apply to column G from row 2 onwards
        #     CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill))
    
        ### Additional Rule for Column A
        sheet.conditional_formatting.add(
            "A2:A1048576",  # Apply to column A from row 2 onwards
            FormulaRule(formula=["AND($F2>=30,$G2>=30)"], fill=light_red_fill))
    
        workbook.save(f"new_{file_path}")
        return xlsx_file
    
    
    format_xlsx('cf1.xlsx')
    

    Example Sheet
    I have manually changed the value in cell G3 to 32 for display
    In the example Sheet below;
    In Row 2 in F and G values are below 30 so no cell is highlighted
    In Row 3 in F is less than 30 and G is greater or equal to 30, so G is highlighted and A and F are not
    In Row 4 in F and G are greater or equal to 30 so F, G and A are highlighted.

    The rule on column F & G is your original, however, there is no need to apply to the ranges F2:F1048576 and then the same for G2:... Just set the range to F2:G1048576 on one rule.

    Example Sheet

    PART 2
    For your Second requirement;
    Alternatively, if the value in column H is "Warranty PM", I want to highlight the corresponding cell in column A in yellow.
    This is the same as in Part 1 with a different formula checking the value of Column H.

    I have modified your code below slightly to include the setting of priorities on the rules. Priority 1 obviously should be applied over any other rules for the same range by Excel and appear at the the top of the list of rules in CF.
    I have also added the stopIfTrue= field which is used to determine if the next priority rule for the same range is applied if a higher priority rule on the range was True. Don't think you want to apply it but have added it in set to default False just in case.
    I think the changes to the code are pretty clear on what and how so I wont bother with including additional comments on what each line does.
    I have arbitrarily applied priorities, I'll leave it to you to set as you need.

    from openpyxl import load_workbook
    from openpyxl.styles import PatternFill, NamedStyle
    from openpyxl.formatting.rule import CellIsRule, FormulaRule  # <--- Add import
    
    
    def format_xlsx(xlsx_file):
        # Load the file and workbook
        file_path = xlsx_file
        workbook = load_workbook(file_path)
        sheet = workbook.active
    
        # Define the fills
        light_red_fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid", stopIfTrue=False)
        dark_red_fill = PatternFill(start_color="8B0000", end_color="8B0000", fill_type="solid", stopIfTrue=False)
        yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid", stopIfTrue=False)
    
        # Create conditional format rules for A, F and G:
        rule2 = CellIsRule(operator="greaterThanOrEqual", formula=["30"], fill=light_red_fill)
        rule3 = FormulaRule(formula=["AND($F2>=30,$G2>=30)"], fill=light_red_fill)
        rule1 = FormulaRule(formula=["=$H2=\"Warranty PM\""], fill=yellow_fill)
    
        # Add the rules to the Sheet
        sheet.conditional_formatting.add("F2:G1048576", rule2)
        sheet.conditional_formatting.add("A2:A1048576", rule3)
        sheet.conditional_formatting.add("A2:A1048576", rule1)
    
        # Set the rule's Priority
        rule2.priority = 2
        rule3.priority = 3
        rule1.priority = 1
    
        workbook.save(f"new_{file_path}")
        return xlsx_file
    
    format_xlsx('cf1.xlsx')