Search code examples
pandasdataframecolorstermcolor

Highlighting operation in pandas DataFrame by specific way defined


I have my script to highlight 2 max values for columns in my DataFrame as:

from termcolor import colored
import pandas as pd

data = {'Col1': [190, 88, 120, 170, 110, 150], 'Col2': [11, -6, 3, 16, -21, 8], 'Col3': [20, 25, 30, 35, 40, 45],'Col4': [-33, -8, 93, -18, 3, 23], 'Col5': [544, 19, 78, 270, 210, 369]}
df = pd.DataFrame(data)

def hightlight(series, excluded, spcl_col, spcl_val):
    if series.name == spcl_col:
        spcl_val = str(spcl_val)
        return series.astype(str).apply(lambda x: colored(x, None, 'on_green') if x == spcl_val else colored(x, 'white'))

    if series.name in excluded:
        return series.astype(str).apply(lambda x: colored(x, 'white'))
    colors = pd.Series([(None,'on_blue'), (None,'on_magenta')] + [('white',)]*(len(series)-2), index=series.sort_values(ascending=False).index).reindex(series.index)

    return [colored(str(x), *c) for x,c in zip(series, colors)]

col_to_exclude = ['Col3']
print(df.apply(hightlight, excluded=col_to_exclude, spcl_col='Col3', spcl_val=30)
.rename(columns=lambda x: colored(x, 'white', None)).to_string(index=False))

Now I want to add something in my above script so that all negative values are in red text and all positive values in green text for specific columns(let's say Col2 & Col4 here which can be changed) without using df.style method and text colour should not be changed for highlighted values already by above script.

Haw can I do it ??


Solution

  • I would recommend to reorganize your code to use subfunctions to format each series type. Having a modular code will make it easier to add extra functions. I also reworked a bit the logic to highlight the top two values:

    from termcolor import colored
    import pandas as pd
    import numpy as np
    
    data = {'Col1': [190, 88, 120, 170, 110, 150], 'Col2': [11, -6, 3, 16, -21, 8], 'Col3': [20, 25, 30, 35, 40, 45],'Col4': [-33, -8, 93, -18, 3, 23], 'Col5': [544, 19, 78, 270, 210, 369]}
    df = pd.DataFrame(data)
    
    def hightlight(series, excluded, spcl_col, spcl_val, extra_col=[]):
    
        def excl_formatter(series):
            return [colored(str(x), 'white') for x in series]
        
        def norm_formatter(series, extra=False):
            tmp = series.sort_values()
            if extra:
                tmp.iloc[:-2] = tmp.iloc[:-2].map(lambda x: colored(x,
                                                  *[('white',),
                                                    ('green',),
                                                    ('red',),
                                                   ][np.sign(x)])
                                                 )
            else:
                tmp.iloc[:-2] = tmp.iloc[:-2].map(lambda x: colored(str(x), 'white'))
            tmp.iloc[-2] = colored(tmp.iloc[-2], None, 'on_magenta')
            tmp.iloc[-1] = colored(tmp.iloc[-1], None, 'on_blue')
            return tmp
            
        def spcl_formatter(series):
            return [colored(x, None, 'on_green') if x == spcl_val
                    else colored(x, 'white')
                    for x in series]
        
        if series.name == spcl_col:
            return spcl_formatter(series)
        if series.name in excluded:
            return excl_formatter(series)
        return norm_formatter(series, extra=series.name in extra_col)
                                                            
    col_to_exclude = ['Col5']
    print()
    print(df.apply(hightlight, excluded=col_to_exclude, spcl_col='Col3',
                   spcl_val=30, extra_col=['Col2', 'Col4'])
            .rename(columns=lambda x: colored(x, 'white', None)).to_string(index=False)
         )
    
    
    

    Output:

    enter image description here