I'm plotting some columns of my data frame in a grouped bar chart. My code currently plots the value counts, but I would like to plot the percentage of the values relative to the sum of the grouped values. For instance, the first column is about gender and it counts 530 females and 470 males. I would like to have the first group of my first chart divided by 530 and represented as a percentage and the second group divided by 470 and represented as a percentage.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Define the columns to consider
columns = [
"1 : A quale sesso appartieni? ",
"age range",
"6 : Come è composto il tuo nucleo familiare? ",
"7 : Unicamente ai fini statistici ti chiediamo di indicare in quale delle seguenti fasce rientra il reddito familiare mensile netto (per rispondere considera tutte le fonti di reddito tue o del tuo coniuge o delle persone con le quali vivi: stipendio, pe...",
"8 : Quale tra le seguenti categorie meglio descrive la tua professione attuale? "
]
# Set a custom color palette for the charts
colors = sns.color_palette("Set3")
# Create individual bar charts for each column
for i, column in enumerate(columns):
plt.figure(figsize=(12, 6))
ax = sns.countplot(x=column, hue="Sportivo/Non Sportivo", data=df, palette=colors)
# Set the axis labels
ax.set_xlabel(column)
ax.set_ylabel("Count")
# Set the title from the list
ax.set_title(lista_titoli_graph[i])
# Set the legend
ax.legend(title="Sportivo/Non Sportivo")
# Rotate x-axis labels if necessary
plt.xticks(rotation=90)
# Remove spines
sns.despine()
# Add value labels to the bars (without decimal points)
for p in ax.patches:
ax.annotate(f"{int(p.get_height())}", (p.get_x() + p.get_width() / 2., p.get_height()), ha="center", va="center", xytext=(0, 5), textcoords="offset points")
# Show the plot
plt.tight_layout()
plt.show()
To adapt your code to show percentages of each column that you have, there are a few things you need to do...
pengins
dataset. Not that species
column has been used as the hue.mycounts=df[column].value_counts()
. You can use each occurrence by the formula, just this would be simpler. This will give you the Total
by which you will need to divide each value.%
symbol.This will give you the below plot (for penguins):
The full code is below, including changes...
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
## NEW - My data... note that I have sorted this by the HUE = species
df=sns.load_dataset('penguins')
df.sort_values('species', inplace=True)
# Define the columns to consider - UPDATED for penguins
columns = ['sex', 'island']
# Set a custom color palette for the charts
colors = sns.color_palette("Set3")
# Create individual bar charts for each column
for i, column in enumerate(columns):
plt.figure(figsize=(12, 6))
ax = sns.countplot(x=column, hue="species", data=df, palette=colors)
# Set the axis labels
ax.set_xlabel(column)
ax.set_ylabel("Count")
# Set the title from the list
ax.set_title(f'lista_titoli_graph : {column}') ##UPDATED to show column name
# Set the legend
ax.legend(title="Sportivo/Non Sportivo")
# Rotate x-axis labels if necessary
plt.xticks(rotation=90)
# Remove spines
sns.despine()
mycounts=df[column].value_counts() ##NEW - get value_counts
# Add value labels to the bars (without decimal points)
for i, p in enumerate(ax.patches): ## UPDATED - added enumerate
if not np.isnan(p.get_height()): ## NEW - Annotate if label exists, meaning !=0
# ax.annotate(f"{math.ceil(p.get_height())}", (p.get_x() + p.get_width() / 2., p.get_height()), ha="center", va="center", xytext=(0, 5), textcoords="offset points")
##NEW - annotate text now gets height divided by equivalent TOTAL and formats to %
ax.annotate(f"{int(p.get_height())/mycounts[len(mycounts) -1 - int(i%len(mycounts))] :.1%}",
(p.get_x() + p.get_width() / 2., p.get_height()),
ha="center", va="center", xytext=(0, 5), textcoords="offset points")
# Show the plot
plt.tight_layout()
plt.show()