my groupby challenge I am trying to visualize using a line graph the food production of African Countries(45 from my data) over 10 years. After using the groupby function and unstack the plot comes out well but is not readable and the colour to distinguish each line is poor. From my lecturer's visualization, he used Wolfram.
How can I achieve this using Python or are there better alternatives to my approach?
Here is my code
#To make the legend readable we reduce the font size
from matplotlib.font_manager import FontProperties
fontP = FontProperties()
fontP.set_size('small')
fig, ax = plt.subplots(figsize = (20,16))
df1.groupby(['Year','Country',]).sum().unstack().plot(ax = ax)
ax.set_yscale("log")
ax.set_ylim(1,300000)
plt.ylabel('Year')
plt.xlabel('Total Value')
plt.title('Food Production in Africa over the Years')
plt.legend(title='Countries', bbox_to_anchor=(1.05, 1), loc='upper left',
prop=fontP)
Since the data were not available, I pseudo-created the population trends for each country in Africa from the most recent data, and then used ax.text(x,y,country)
to create the data. It's not that crowded in your data, so I think it's applicable. This example is customized from the official reference.
import pandas as pd
import numpy as np
df = pd.read_csv('./africa_t.csv', sep=',')
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(12, 14))
# These are the colors that will be used in the plot
ax.set_prop_cycle(color=[
'#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a',
'#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94',
'#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d',
'#17becf', '#9edae5'])
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
fig.subplots_adjust(left=.06, right=.75, bottom=.02, top=.94)
ax.set_xticks(range(2011, 2020, 10))
ax.set_yticks(range(5000, 210000000, 5000000))
ax.xaxis.set_major_formatter('{x:.0f}')
ax.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
ax.tick_params(axis='both', which='both', labelsize=14,
bottom=False, top=False, labelbottom=True,
left=False, right=False, labelleft=True)
country = df.columns
for column in country:
line, = ax.plot(df['Year'].to_list(), df[column].to_list(), lw=2.5)
y_pos = df[column].to_list()[-1] - 0.5
ax.text(2020.5, y_pos, column, fontsize=14, color=line.get_color())
plt.show()