I want to select multiple columns from a MultiIndex with multiple independent indexers. For example
df = pd.DataFrame(
np.zeros((2,4)),
columns=pd.MultiIndex.from_product([('a','b'),(1,2)])
)
from this DataFrame
a b
1 2 1 2
0 0 0 0 0
1 0 0 0 0
I want to select all columns under 'a'
plus ('b', 1)
, like df[[('a', 1), ('a', 2), ('b', 1)]]
, but I don't want to have to explicitly specify all levels of the columns below 'a'
.
What does not work:
df[['a', ('b', 1)]]
: KeyError: "[('b', 1)] not in index"
df.loc[:, ['a', ('b', 1)]]
: KeyError: "[('b', 1)] not in index"
df[[('a', slice(None)), ('b', 1)]]
: TypeError: unhashable type: 'slice'
df.loc[:, [pd.IndexSlice['a', :], ('b', 1)]]
: TypeError: unhashable type: 'slice'
Another similar thing I would like to be able to do is: ('a', 1)
plus pd.IndexSlice[:, 2]
I ended up writing a helper function to do this. It takes an array of slicers, and uses each one independently, then gathers all the selected columns.
def mimsc(col_specs):
# usage: df.loc[msms(['A', ('B', 'X')])]
def slicer(df):
cols = []
dfc = df.columns.to_frame()
for cs in col_specs:
cols.append(dfc.loc[cs])
all_cols = pd.concat(cols, ignore_index=True)
return pd.MultiIndex.from_frame(all_cols)
return slicer
usage
df.loc[:, mimsc(['a', ('b', 1)])]
df.loc[:, mimsc([('b', 1), pd.IndexSlice[:, 2]])]
Here's a more generic version that also works with indexes
def mims(col_specs, axis=1):
def slicer(df):
cols = []
if axis==1:
dfc = df.columns.to_frame()
elif axis==0:
dfc = df.index.to_frame()
for cs in col_specs:
col = dfc.loc[cs, :]
if isinstance(col, pd.Series):
col = dfc.loc[[cs], :]
cols.append(col)
all_cols = pd.concat(cols, ignore_index=True)
return pd.MultiIndex.from_frame(all_cols)
return slicer
example
df.T.loc[mims(['a', ('b', 1)], axis=0), :]
df.T.loc[mims([('b', 1), pd.IndexSlice[:, 2]], axis=0), :]