Search code examples
pythondataframegroup-bypivot

Sum of "sub-ID" values for each ID


I have this kind of dataframe:

   ID  ID2  Feature1  Feature2  Feature3  Feature4  Feature5
0   2    2         8         5         7         3         7
1   2    2         4         8         8         3         6
2   1    1         8         6         2         5         1
3   2    2         1         3         7         4         9
4   2    1         3         7         5         9         7

I would like to calculate sums of of rows for each ID and ID2 so, that output dataframe would have columns ID, and sum of ID2 values so the expected output should be

   ID   Sum_ID2_values1  Sum_ID2_values2  
0   1                22                0         
1   2                31               83         

I tried different pivot table and groubpy methods, but haven't got the expected output.


Solution

  • Try:

    out = (
        df.groupby("ID")
        .apply(lambda x: x.groupby("ID2").sum().iloc[:, 1:].sum(axis=1))
        .unstack("ID2")
        .fillna(0)
    )
    print(out)
    

    Prints:

    ID2     1     2
    ID             
    1    22.0   0.0
    2    31.0  83.0
    

    Or:

    out = (
        (
            df.groupby("ID")
            .apply(lambda x: x.groupby("ID2").sum().iloc[:, 1:].sum(axis=1))
            .unstack("ID2")
            .fillna(0)
        )
        .add_prefix("Sum_ID2_values")
        .reset_index()
        .rename_axis(columns=None)
    )
    print(out)
    

    Prints:

       ID  Sum_ID2_values1  Sum_ID2_values2
    0   1             22.0              0.0
    1   2             31.0             83.0
    

    Another version:

    print(
        df.set_index(["ID", "ID2"])
        .stack()
        .to_frame(name="vals")
        .pivot_table(index=["ID", "ID2"], aggfunc="sum")
        .unstack("ID2")
        .fillna(0)
        .droplevel(0, axis=1)
        .rename_axis(columns=None)
        .add_prefix("Sum_ID2_values")
        .reset_index()
    )