Search code examples
pythonpandaspandas-styles

Sorting styled dataframe return keyError in Pandas


I would like to groupby and sortindex upon styling a dataframe. However, the compiler return an error

KeyError: ('Other', 'B')

May I know what is the issue here?

The code to reproduce the above error:



import pandas as pd
import numpy as np
dict_map=dict(group_one=['D','GG','G'],group_two=['A','C','E','F'])
vv=np.random.randn(5, 4)
# ['foo', '*', 'bar','ff']
nn=np.array([['foo', '*', 'bar','ff'], ['foo', '*', 'bar','**'],
             ['foo', '*', 'bar','**'],['foo', '*', 'bar','ff'],
             ['foo', '*', '**','ff']])
arrays = [["bar", "bar", "baz", "baz"],
          ["one", "two", "one", "two"]]
tuples = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C","D",'G'], columns=index)
df = df.rename_axis ( index=['my_ch'] ).reset_index()

d = {i:k for k,v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch",axis=1).map(d).fillna('Other'))


def highlight_(s):
    return np.select(
        condlist=[s.str.contains('\*\*'), s.str.contains('\*')],
        choicelist=['background-color:green', 'background-color:purple'],
        default='')

df=out.style.apply(highlight_)


df.data=df.data.set_index(['Group', 'my_ch'])

df.data=df.data.sort_index(level=0)
df.to_excel('n1test.xlsx')

Please note that, in actual use case. sorting the index level 0 is required


Solution

  • This should work:

    import pandas as pd
    import numpy as np
    
    
    dict_map = dict(group_one=["D", "GG", "G"], 
                    group_two=["A", "C", "E", "F"])
    vv = np.random.randn(5, 4)
    nn = np.array(
        [
            ["foo", "*", "bar", "ff"],
            ["foo", "*", "bar", "**"],
            ["foo", "*", "bar", "**"],
            ["foo", "*", "bar", "ff"],
            ["foo", "*", "**", "ff"],
        ]
    )
    arrays = [["bar", "bar", "baz", "baz"], ["one", "two", "one", "two"]]
    tuples = list(zip(*arrays))
    
    index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
    df = pd.DataFrame(nn, index=["A", "B", "C", "D", "G"], columns=index)
    df = df.rename_axis(index=["my_ch"]).reset_index()
    
    d = {i: k for k, v in dict_map.items() for i in v}
    out = df.assign(Group=df.xs("my_ch", axis=1).map(d).fillna("Other"))
    
    
    def highlight_(s):
        return np.select(
            condlist=[s.str.contains("\*\*"), s.str.contains("\*")],
            choicelist=["background-color:green", "background-color:purple"],
            default=None,
        )
    
    
    (
        out.sort_index(level=0)
        .set_index(["Group", "my_ch"])
        .style.apply(highlight_)
        .to_excel("n1test.xlsx")
    )
    

    The main difference is to sort first then set the index then apply the Styler and save it as an Excel file. All expressions are enclosed in the parenthesis instead of break lines.