Search code examples
pythonpandasmatplotlibchartsenumerate

Adding values onto unstacked bar chart - python


Here is my code for a stacked bar chart. I can add percentage values for the first segment (AA), but how I can add values for all 4 segments?

stacked bar chart with first segment value

df = pd.read_csv("123.csv")

df1 = df.groupby(['Country', 'ClassWeight']) 
['Count'].sum().unstack('ClassWeight').fillna(0)

#sort on the 'total' column, and then drop it to avoid double plotting

ax = df1.sort_values(['total']).iloc[:,:-1].plot(kind='barh', width=0.8, 
stacked=True, figsize=(15, 10),colormap=ListedColormap(sns.color_palette("Blues_d")))

#plot barchart
ax.set_xlabel('No.of Shipments',fontsize=15)

ax.set_ylabel('Country',fontsize=15)

plt.xticks(fontsize=15)

plt.yticks(fontsize=15)

plt.title('Total Shipments by Country and Customer Class',fontsize=15)

df2=df1.sort_values(['total'],ascending=True)

df2['AA'] = 100*df2['AA']/df2['total']

df2['A'] = 100*df2['A']/df2['total']

df2['B'] = 100*df2['B']/df2['total']

df2['C'] = 100*df2['C']/df2['total']

df3 = df2.iloc[:,:-1]

#Can only enumerate on the AA column. How could we do all 4 columns?

for i, v in enumerate(df3['AA']):
    ax.text(v + -1, i + -0.2, str("{0:.1f}%".format(v)), color='white', 
fontweight='bold', fontsize=15)

Example data:

ClassWeight            AA          A          B          C
Country                                                   
Romania         17.142857  32.268908  28.235294  22.352941
Finland         60.325203  13.495935  12.682927  13.495935

{'Country': {0: 'France', 1: 'Poland', 2: 'Lithuania', 3: 'United Kingdom', 4: 'Denmark'}, 'Count': {0: 233, 1: 232, 2: 286, 3: 236, 4: 223}, 'SumWeight': {0: 8072469.5, 1: 6689511.05, 2: 5158305.25, 3: 4675914.53, 4: 3536684.52}, 'AvgWeight': {0: 34645.79, 1: 28834.1, 2: 18036.03, 3: 19813.2, 4: 15859.57}, 'ClassWeight': {0: 'AA', 1: 'AA', 2: 'AA', 3: 'AA', 4: 'AA'}}


Solution

  • I've tried to rearrange your code a bit - you don't need to create new dataframes each time, and you only really need to sort by your totals once.

    import pandas as pd
    
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    import seaborn as sns
    
    df = pd.DataFrame(
        {'Country': {0: 'France', 1: 'France', 2: 'France', 3: 'France', 4: 'France'},
         'Count': {0: 100, 1: 232, 2: 286, 3: 236, 4: 854},
         'ClassWeight': {0: 'AA', 1: 'A', 2: 'B', 3: 'C', 4: 'total'}}
    )
    
    # Track which value columns we want to plot
    VALUE_COLS =['AA', 'A', 'B', 'C']
    
    # We only need to sort_values once, so we might as well do it as we generate df1
    df1 = df.groupby(['Country', 'ClassWeight'])['Count']\
            .sum()\
            .unstack('ClassWeight')\
            .fillna(0)\
            .sort_values(by='total', ascending=False)
    
    # Get percentage values 
    for col in VALUE_COLS:
        df1[col + '_%'] = 100*df1[col]/df1['total']
    
    ax = df1[VALUE_COLS].plot(kind='barh', width=0.8,stacked=True,
                              figsize=(15, 10),
                              colormap=ListedColormap(sns.color_palette("Blues_d")))
    
    # Set up labels and ticks
    ax.set_xlabel('No.of Shipments',fontsize=15)
    ax.set_ylabel('Country',fontsize=15)
    plt.xticks(fontsize=15)
    plt.yticks(fontsize=15)
    plt.title('Total Shipments by Country and Customer Class',fontsize=15)
    
    # Add in text labels
    df1['label_tot'] = 0
    for col in VALUE_COLS:
        df1['label_tot'] += df1[col]
        for i, (val, pos) in enumerate(df1[[col + '_%', 'label_tot']].itertuples(index=False, name=None)):
            ax.text(pos + -1, i, str("{0:.1f}%".format(val)),
                    color='white',fontweight='bold', fontsize=15, ha='right')
    

    With my slightly modified version of your input data, this gives something like:

    enter image description here