I would like to groupby
and sortindex
upon styling a dataframe
. However, the compiler return an error
KeyError: ('Other', 'B')
May I know what is the issue here?
The code to reproduce the above error:
import pandas as pd
import numpy as np
dict_map=dict(group_one=['D','GG','G'],group_two=['A','C','E','F'])
vv=np.random.randn(5, 4)
# ['foo', '*', 'bar','ff']
nn=np.array([['foo', '*', 'bar','ff'], ['foo', '*', 'bar','**'],
['foo', '*', 'bar','**'],['foo', '*', 'bar','ff'],
['foo', '*', '**','ff']])
arrays = [["bar", "bar", "baz", "baz"],
["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C","D",'G'], columns=index)
df = df.rename_axis ( index=['my_ch'] ).reset_index()
d = {i:k for k,v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch",axis=1).map(d).fillna('Other'))
def highlight_(s):
return np.select(
condlist=[s.str.contains('\*\*'), s.str.contains('\*')],
choicelist=['background-color:green', 'background-color:purple'],
default='')
df=out.style.apply(highlight_)
df.data=df.data.set_index(['Group', 'my_ch'])
df.data=df.data.sort_index(level=0)
df.to_excel('n1test.xlsx')
Please note that, in actual use case. sorting the index level 0
is required
This should work:
import pandas as pd
import numpy as np
dict_map = dict(group_one=["D", "GG", "G"],
group_two=["A", "C", "E", "F"])
vv = np.random.randn(5, 4)
nn = np.array(
[
["foo", "*", "bar", "ff"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "**"],
["foo", "*", "bar", "ff"],
["foo", "*", "**", "ff"],
]
)
arrays = [["bar", "bar", "baz", "baz"], ["one", "two", "one", "two"]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
df = pd.DataFrame(nn, index=["A", "B", "C", "D", "G"], columns=index)
df = df.rename_axis(index=["my_ch"]).reset_index()
d = {i: k for k, v in dict_map.items() for i in v}
out = df.assign(Group=df.xs("my_ch", axis=1).map(d).fillna("Other"))
def highlight_(s):
return np.select(
condlist=[s.str.contains("\*\*"), s.str.contains("\*")],
choicelist=["background-color:green", "background-color:purple"],
default=None,
)
(
out.sort_index(level=0)
.set_index(["Group", "my_ch"])
.style.apply(highlight_)
.to_excel("n1test.xlsx")
)
The main difference is to sort first then set the index then apply the Styler and save it as an Excel file. All expressions are enclosed in the parenthesis instead of break lines.