Search code examples
pythonpandaspandas-styles

Where to add multiindex level in pandas?


I have a multiindex dataframe, I provided minimal reproducable sample.

import pandas as pd
import numpy as np
import string

index = pd.MultiIndex.from_product(
    ([2020], [1, 2, 3, 4]), names=['year', 'q']
)

columns = pd.MultiIndex.from_product(
    (['Items1', 'Items2', 'Items3'], ['new', 'old']),
    names=['Items', 'type']
)

data = np.random.seed(123)
data = list(np.random.choice(list(string.ascii_lowercase), (4,6)))

Ldata = pd.DataFrame(data, index=index, columns=columns)
Ldata

And I want to mark certain values in dataframe, and I'm doing it like this. But I don't know where to put multiindex level=1. I tried but I got errors, where I can add level?

def highlight_cells(x):
    c = 'background-color: white'
    c1 = 'background-color: red'
    c2 = 'background-color: blue'
    c3 = 'background-color: green'
                
    k1 = Ldata['new'].str.contains("a", na=False) 
    k2 = Ldata['new'].str.contains("b", na=False)
    k3 = Ldata['new'].str.contains("c", na=False) 
    
    colordata = pd.DataFrame(c, index=x.index, columns=x.columns)
    colordata.loc[k1, 'new'] = c1
    colordata.loc[k2, 'new'] = c2
    colordata.loc[k3, 'new'] = c3

    return colordata

end = Ldata.style.apply(highlight_cells,axis=None)

end

Thanks in advance!


Solution

  • The most straightforward modification would be to use the subset argument of Styler.apply in conjunction with pd.IndexSlice. Then reduce from DataFrame applied styles to Series level applied styles for each column in the MultiIndex matching the subset:

    def highlight_cells(x):
        c = 'background-color: white'
        c1 = 'background-color: red'
        c2 = 'background-color: blue'
        c3 = 'background-color: green'
    
        k1 = x.str.contains("a", na=False)
        k2 = x.str.contains("b", na=False)
        k3 = x.str.contains("c", na=False)
        
        # Build Series for _column_ level Styles
        color_data = pd.Series(c, index=x.index)
        color_data[k1] = c1
        color_data[k2] = c2
        color_data[k3] = c3
        return color_data
    
    
    idx = pd.IndexSlice
    end = Ldata.style.apply(highlight_cells, subset=idx[:, idx[:, 'new']])
    
    end
    

    Naturally this can be done without the separate variables as well:

    def highlight_cells(x):
        color_data = pd.Series('background-color: white', index=x.index)
        color_data[x.str.contains("a", na=False)] = 'background-color: red'
        color_data[x.str.contains("b", na=False)] = 'background-color: blue'
        color_data[x.str.contains("c", na=False)] = 'background-color: green'
        return color_data
    
    
    idx = pd.IndexSlice
    end = Ldata.style.apply(highlight_cells, subset=idx[:, idx[:, 'new']])
    
    end
    

    Or without building an indexed structure with np.select:

    def highlight_cells(x):
        return np.select(
            [x.str.contains("a", na=False),
             x.str.contains("b", na=False),
             x.str.contains("c", na=False)],
            ['background-color: red',
             'background-color: blue',
             'background-color: green'],
            default='background-color: white'
        )
    
    
    idx = pd.IndexSlice
    end = Ldata.style.apply(highlight_cells, subset=idx[:, idx[:, 'new']])
    
    end
    

    All options produce:

    styled table