Search code examples
pythonpandasdataframemulti-index

Add another level of headers to multiindex dataframe


I have the following dataframe:

dic = {'US':{'Quality':{'points':"-2 n", 'difference':'equal', 'stat': 'same'}, 'Prices':{'points':"-7 n", 'difference':'negative', 'stat': 'below'}, 'Satisfaction':{'points':"3 n", 'difference':'positive', 'stat': 'below'}},
      'UK': {'Quality':{'points':"3 n", 'difference':'equal', 'stat': 'above'}, 'Prices':{'points':"-13 n", 'difference':'negative', 'stat': 'below'}, 'Satisfaction':{'points':"2 n", 'difference':'negative', 'stat': 'same'}}}
d1 = defaultdict(dict)
for k, v in dic.items():
    for k1, v1 in v.items():
        for k2, v2 in v1.items():
            d1[(k, k2)].update({k1: v2})

df = pd.DataFrame(d1)

df.columns = df.columns.rename("Skateboard", level=0)
df.columns = df.columns.rename("Q3", level=1)
df.insert(loc=0, column=('', 'Mode'), value="Website")

Currently, it looks like this:enter image description here

How do I add another level of headers to the multiindex dataframe to make it look like the image below?enter image description here

Update:

dic = {'US':{'Quality':{'points':"-2 n", 'difference':'equal', 'stat': 'same'}, 'Prices':{'points':"-7 n", 'difference':'negative', 'stat': 'below'}, 'Satisfaction':{'points':"3 n", 'difference':'positive', 'stat': 'below'}},
      'UK': {'Quality':{'points':"3 n", 'difference':'equal', 'stat': 'above'}, 'Prices':{'points':"-13 n", 'difference':'negative', 'stat': 'below'}, 'Satisfaction':{'points':"2 n", 'difference':'negative', 'stat': 'same'}}}
d1 = defaultdict(dict)
for k, v in dic.items():
    for k1, v1 in v.items():
        for k2, v2 in v1.items():
            d1[(k, k2)].update({k1: v2})

df = pd.DataFrame(d1)

df.columns = df.columns.rename("Skateboard", level=0)
df.columns = df.columns.rename("Metric", level=1)
df1 = df.xs('points', axis=1, level=1, drop_level=False)
df2 = df.drop('points', axis=1, level=1)
df3 = (pd.concat([df1, df2], keys=['GM', ''], axis=1)
 .swaplevel(0, 1, axis=1)
 .sort_index(axis=1))
df3.columns = df3.columns.rename("Q3", level=1)
df3.insert(loc=0, column=('','', 'Mode'), value="Website")

df3

Now the data frame looks like: enter image description here

How do I move the header GM to be first for the column for both US and UK (see the second image for the final output)?


Solution

  • Example

    data = {('A', 'a'): {0: 8, 1: 3, 2: 4},
            ('A', 'b'): {0: 5, 1: 7, 2: 8},
            ('A', 'c'): {0: 1, 1: 7, 2: 6},
            ('B', 'a'): {0: 7, 1: 1, 2: 0},
            ('B', 'b'): {0: 1, 1: 1, 2: 7},
            ('B', 'c'): {0: 7, 1: 7, 2: 4}}
    df = pd.DataFrame(data)
    

    df

        A           B
        a   b   c   a   b   c
    0   8   5   1   7   1   7
    1   3   7   7   1   1   7
    2   4   8   6   0   7   4
    

    Code

    make new level and add c to a column, add d except a



    df with a (df1)

    df1 = df.xs('a', axis=1, level=1, drop_level=False)
    

    output(df1):

        A   B
        a   a
    0   8   7
    1   3   1
    2   4   0
    



    df except a (df2)

    df2 = df.drop('a', axis=1, level=1)
    

    output(df2):

        A       B
        b   c   b   c
    0   5   1   1   7
    1   7   7   1   7
    2   8   6   7   4
    



    concat df1 & df2 with key

    pd.concat([df1, df2], keys=['c', 'd'], axis=1)
    

    output:

        c       d
        A   B   A       B
        a   a   b   c   b   c
    0   8   7   5   1   1   7
    1   3   1   7   7   1   7
    2   4   0   8   6   7   4
    



    swaplevel and sort

    (pd.concat([df1, df2], keys=['c', 'd'], axis=1)
     .swaplevel(0, 1, axis=1)
     .sort_index(axis=1))
    

    result:

        A           B
        c   d       c   d
        a   b   c   a   b   c
    0   8   5   1   7   1   7
    1   3   7   7   1   1   7
    2   4   8   6   0   7   4
    



    we can add level to columns



    or use simple way

    df3 = pd.concat([df], keys=[''], names=['Q3'], axis=1).swaplevel(0, 1, axis=1)
    df3.columns = df3.columns.map(lambda x: (x[0], 'c', x[2]) if x[2] == 'a' else x)
    

    df3

        A           B
    Q3  c           c   
        a   b   c   a   b   c
    0   8   5   1   7   1   7
    1   3   7   7   1   1   7
    2   4   8   6   0   7   4