Search code examples
pythonpython-3.xpandasdataframecptbarplot

ranking a stacked Bar plot


all,

I have a dataframe which I have grouped and sorted and that looks like this

 brkrcy=data[data['upload_date']==Date].groupby(['CPTY','currency'], as_index=False).agg({"Gross Loan Amount": "sum"})
    brkrcy=brkrcy.sort_values(by=['CPTY', 'Gross Loan Amount'], ascending=[True, False])
    brkrcy = brkrcy.set_index('CPTY')

Double Ranking

       currency  Gross Loan Amount
CPTY                              
BARC        RUB       2.178780e+07
BARC        ZAR       7.779714e+07
BARC        JPY       1.227676e+09
BARC        EUR       3.301354e+09
BARC        GBP       5.002534e+09
BARC        USD       6.667446e+09
BMON        CAD       2.018614e+08
BMON        GBP       4.096820e+08
BMON        USD       6.510318e+08
BNP         CAD       2.349053e+08
BNP         JPY       1.523716e+09
BNP         GBP       3.234833e+09
BNP         USD       4.576760e+09
BNP         EUR       4.935927e+09
CALIP       EUR       1.832390e+07
CALIP       USD       1.448161e+09
CALIP       GBP       3.492144e+09
CANTR       USD       3.987880e+08
CIBC        CAD       6.851792e+08
CIBC        GBP       8.861776e+08
CITI        CZK       7.549203e+06

brkrcy.set_index('currency',append=True)['Gross Loan Amount'].unstack().plot(kind="bar",stacked=True,figsize=(10,8))
plt.ylabel('Gross Loan Amount in Billions')
plt.show()

enter image description here As you can see although it is double ranking, the stacked bar plot is not ranked in descending order. How can I change that please?


Solution

  • Assuming you mean descending bars on your stacked plot, consider adding a helper Total column that sums all currency fields of your plot dataframe by each CPTY. Use this new column to sort the data in descending order, then drop the helper column before plotting:

    plot_df = brkrcy.set_index('currency',append=True)['Gross Loan Amount'].unstack()
    plot_df['Total'] = plot_df.apply('sum', axis=1)          # HELPER COLUMN
    
    plot_df.sort_values('Total', ascending=False)\
           .drop(columns=['Total'])\
           .plot(kind="bar", stacked=True, figsize=(10,8))
    
    plt.ylabel('Gross Loan Amount in Billions')
    plt.show()
    

    To demonstrate with random data that hopefully replicates your actual data (seeded for reproducibility):

    Data

    import numpy as np
    import pandas as pd
    
    np.random.seed(42018)
    
    CPTY = ["BARC", "BMON", "CALIP", "BNP", "CIBC", "CANTR", "CITI"]
    currency = ["RUB", "ZAR", "JPY", "EUR", "GBP", "USD", "CAD"]
    
    data = pd.DataFrame({'CPTY': ["".join(np.random.choice(CPTY,1)) for _ in range(50)],
                         'currency': ["".join(np.random.choice(currency,1)) for _ in range(50)],
                         'Gross Loan Amount': abs(np.random.randn(50))*10000000
                        }, columns = ['CPTY','currency','Gross Loan Amount'])
    
    brkrcy = data.groupby(['CPTY','currency'], as_index=False).agg({"Gross Loan Amount": "sum"})\
                 .sort_values(by=['CPTY', 'Gross Loan Amount'], ascending=[True, False])\
                 .set_index('CPTY')
    print(brkrcy.head(10))
    #       currency  Gross Loan Amount
    # CPTY                             
    # BARC       JPY       3.854475e+07
    # BARC       RUB       9.201352e+06
    # BARC       USD       7.744341e+06
    # BMON       EUR       2.780286e+07
    # BMON       JPY       2.365747e+07
    # BMON       CAD       8.523440e+06
    # BNP        RUB       1.268484e+07
    # BNP        GBP       8.149266e+06
    # BNP        EUR       7.575220e+06
    # CALIP      USD       3.387214e+07
    

    Plot

    import matplotlib.pyplot as plt
    
    plot_df = brkrcy.set_index('currency',append=True)['Gross Loan Amount'].unstack()
    plot_df['Total'] = plot_df.apply('sum', axis=1)
    
    plot_df.sort_values('Total', ascending=False)\
           .drop(columns=['Total'])\
           .plot(kind="bar", stacked=True, figsize=(10,8))
    
    plt.ylabel('Gross Loan Amount in Billions')
    plt.show()
    

    Plot Output