I have a dataframe with five columns [CAT_LET, fid, input, climate change, terrestrial acidification].
In my real dataset there are:
The code makes a stacked bar graph scaled to 100% for each impact (with the contribution of each input for each fid) and should organize the bar per CAT_LET and fid.
Now I need to add the x labels to the graph where I can see each of the 6 CAT_LET values for groups of fid that vary in length. In my example I expect veg to appear in x for fids 1,3 and 5 and fruit for 2 and 4. Any idea how to add these ticks and labels?
In my real dataset I do not want all 3871 fid to get the category name on the x label but only the groups with the same category appear (they should be together).
Thanks for the help.
import pylab as pb
from glob import glob
import pandas as pd
import numpy as np
All_impacts_cat2 = pd.DataFrame()
All_impacts_cat2 = pd.DataFrame({'CAT_LET':['veg','veg','fruit','fruit','veg','veg','veg','fruit','veg','fruit','veg','fruit','fruit','veg','veg'],
'fid': [5,1,4,2,3,1,5,2,3,4,1,2,4,3,5],
'input':['urea','urea','kgN','urea','urea','kgN','kgN','kgN','kgN','urea','manure','manure','manure','manure','manure'],
'climate change': [1,0,15,10,1,20,1,1,10,15,4,10,15,3,2],
'terrestrial acidification': [0,1,10,10,2,1,2,10,1,15,4,20,25,3,2]})
impact_list = ['terrestrial acidification','climate change']
inputs = ['urea', 'kgN','manure']
for impact in impact_list[:1]:
values_list = []
for x in inputs:
values_list.append(All_impacts_cat2.loc[All_impacts_cat2['input']==x].sort_values(by=['CAT_LET','fid'])[impact].values)
labels = [*range(0,len(values_list[0]))]
width = 0.35
sum_values = np.zeros(np.shape(np.array(values_list[0])))
sum_total = np.zeros(np.shape(np.array(values_list[0])))
fig, ax = pb.subplots(figsize=(11,10))
for i in range(len(inputs)):
sum_total = sum_total + np.array(values_list[i])
sum_total[sum_total ==0] = 1
for i in range(len(inputs)):
data = values_list[i]/sum_total
ax.bar(labels, data, width, label=inputs[i], bottom =sum_values)
sum_values = sum_values + np.array(data)
ax.set_ylabel(impact)
ax.legend()
pb.show()
As a quick fix, you could collect the unique values for each fid
and use them to label the x-positions of the bars.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
All_impacts_cat2 = pd.DataFrame({'CAT_LET':['veg','veg','fruit','fruit','veg','veg','veg','fruit','veg','fruit','veg','fruit','fruit','veg','veg'],
'fid': [5,1,4,2,3,1,5,2,3,4,1,2,4,3,5],
'input':['urea','urea','kgN','urea','urea','kgN','kgN','kgN','kgN','urea','manure','manure','manure','manure','manure'],
'climate change': [1,0,15,10,1,20,1,1,10,15,4,10,15,3,2],
'terrestrial acidification': [0,1,10,10,2,1,2,10,1,15,4,20,25,3,2]})
impact_list = ['terrestrial acidification','climate change']
inputs = ['urea', 'kgN','manure']
for impact in impact_list[:1]:
values_list = []
for x in inputs:
values_list.append(All_impacts_cat2.loc[All_impacts_cat2['input']==x].sort_values(by=['CAT_LET','fid'])[impact].values)
labels = [*range(0,len(values_list[0]))]
width = 0.35
sum_values = np.zeros(np.shape(np.array(values_list[0])))
sum_total = np.zeros(np.shape(np.array(values_list[0])))
fig, ax = plt.subplots(figsize=(11,10))
for i in range(len(inputs)):
sum_total = sum_total + np.array(values_list[i])
sum_total[sum_total ==0] = 1
for i in range(len(inputs)):
data = values_list[i]/sum_total
ax.bar(labels, data, width, label=inputs[i], bottom =sum_values)
sum_values = sum_values + np.array(data)
#get all unique CAT_LET entries for each fid
cat_groups = All_impacts_cat2["CAT_LET"].groupby(All_impacts_cat2["fid"]).unique().apply(list)
#relabel the x-ticks
ax.set_xticks(range(cat_groups.size), cat_groups)
ax.set_ylabel(impact)
ax.legend()
plt.show()
This approach takes into account that (although implied by your sample dataframe) not one fid
value is mapped to exactly one CAT_LET
value (for instance, change the first veg
to fruit
). If each fid
has exactly one CAT_LET
entry assigned, then you could simplify your approach.
Please also note that I have removed pylab
from your code as its use is now discouraged by matplotlib.