Here is my code for a stacked bar chart. I can add percentage values for the first segment (AA), but how I can add values for all 4 segments?
df = pd.read_csv("123.csv")
df1 = df.groupby(['Country', 'ClassWeight'])
['Count'].sum().unstack('ClassWeight').fillna(0)
#sort on the 'total' column, and then drop it to avoid double plotting
ax = df1.sort_values(['total']).iloc[:,:-1].plot(kind='barh', width=0.8,
stacked=True, figsize=(15, 10),colormap=ListedColormap(sns.color_palette("Blues_d")))
#plot barchart
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
df2=df1.sort_values(['total'],ascending=True)
df2['AA'] = 100*df2['AA']/df2['total']
df2['A'] = 100*df2['A']/df2['total']
df2['B'] = 100*df2['B']/df2['total']
df2['C'] = 100*df2['C']/df2['total']
df3 = df2.iloc[:,:-1]
#Can only enumerate on the AA column. How could we do all 4 columns?
for i, v in enumerate(df3['AA']):
ax.text(v + -1, i + -0.2, str("{0:.1f}%".format(v)), color='white',
fontweight='bold', fontsize=15)
Example data:
ClassWeight AA A B C
Country
Romania 17.142857 32.268908 28.235294 22.352941
Finland 60.325203 13.495935 12.682927 13.495935
{'Country': {0: 'France', 1: 'Poland', 2: 'Lithuania', 3: 'United Kingdom', 4: 'Denmark'}, 'Count': {0: 233, 1: 232, 2: 286, 3: 236, 4: 223}, 'SumWeight': {0: 8072469.5, 1: 6689511.05, 2: 5158305.25, 3: 4675914.53, 4: 3536684.52}, 'AvgWeight': {0: 34645.79, 1: 28834.1, 2: 18036.03, 3: 19813.2, 4: 15859.57}, 'ClassWeight': {0: 'AA', 1: 'AA', 2: 'AA', 3: 'AA', 4: 'AA'}}
I've tried to rearrange your code a bit - you don't need to create new dataframes each time, and you only really need to sort by your totals once.
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.DataFrame(
{'Country': {0: 'France', 1: 'France', 2: 'France', 3: 'France', 4: 'France'},
'Count': {0: 100, 1: 232, 2: 286, 3: 236, 4: 854},
'ClassWeight': {0: 'AA', 1: 'A', 2: 'B', 3: 'C', 4: 'total'}}
)
# Track which value columns we want to plot
VALUE_COLS =['AA', 'A', 'B', 'C']
# We only need to sort_values once, so we might as well do it as we generate df1
df1 = df.groupby(['Country', 'ClassWeight'])['Count']\
.sum()\
.unstack('ClassWeight')\
.fillna(0)\
.sort_values(by='total', ascending=False)
# Get percentage values
for col in VALUE_COLS:
df1[col + '_%'] = 100*df1[col]/df1['total']
ax = df1[VALUE_COLS].plot(kind='barh', width=0.8,stacked=True,
figsize=(15, 10),
colormap=ListedColormap(sns.color_palette("Blues_d")))
# Set up labels and ticks
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
# Add in text labels
df1['label_tot'] = 0
for col in VALUE_COLS:
df1['label_tot'] += df1[col]
for i, (val, pos) in enumerate(df1[[col + '_%', 'label_tot']].itertuples(index=False, name=None)):
ax.text(pos + -1, i, str("{0:.1f}%".format(val)),
color='white',fontweight='bold', fontsize=15, ha='right')
With my slightly modified version of your input data, this gives something like: