I have a dataset containing data on covid cases. the link is as follows
i have a 3 questions waiting to be answered:
I tried many times, but I couldn't come to a conclusion. I used pandas for create dataframe, but i couldn't draw those 3 graphics/charts. I can reach the results by filtering and grouping as desired, but I cannot turn them into graphics. Thanks for the helps guys.(If you want more details I can share with you.)
Only that I found the task interesting, so you have it.
1:
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn.objects as so
url = r'https://covid.ourworldindata.org/data/owid-covid-data.csv'
with open('covid_data.csv', 'wb') as f:
f.write(requests.get(url).content)
df = pd.read_csv('covid_data.csv')
df['date'] = pd.to_datetime(df['date'])
group_location_max = df.dropna(subset='continent').groupby(by=['location']).max()
top10_total_cases_per_milliion = group_location_max['total_cases_per_million'].sort_values(ascending=False).head(10)
top10_total_deaths_per_milliion = group_location_max['total_deaths_per_million'].sort_values(ascending=False).head(10)
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
vals1 = top10_total_cases_per_milliion.values
vals2 = top10_total_deaths_per_milliion.values
ax1 = top10_total_cases_per_milliion.plot.pie(figsize=(10, 9), autopct=make_autopct(vals1), explode=np.ones((10))*0.1)
ax1.yaxis.set_label_coords(-0.15, 0.5)
plt.show()
ax2 = top10_total_deaths_per_milliion.plot.pie(figsize=(10, 9), autopct=make_autopct(vals2), explode=np.ones((10))*0.1)
ax2.yaxis.set_label_coords(-0.15, 0.5)
plt.tight_layout()
plt.show()
2:
total_cases_slice = df[['date', 'continent', 'total_cases']].dropna()
total_deaths_slice = df[['date', 'continent', 'total_deaths']].dropna()
s1 = so.Plot(total_cases_slice, x='date', y='total_cases', color='continent').add(so.Area(alpha=.5), so.Agg(), so.Stack()).layout(size=(8, 4))
s2 = so.Plot(total_deaths_slice, x='date', y='total_deaths', color='continent').add(so.Area(alpha=.5), so.Agg(), so.Stack()).layout(size=(8, 4))
s1.save('s1.png', bbox_inches='tight')
s2.save('s2.png', bbox_inches='tight')
3:
total_cases_march = df[df.date.gt('2020-03-01') & df.date.le('2020-03-31') & df.continent.notna()][['date', 'location', 'total_cases']]
s3 = sns.lineplot(data=total_cases_march, x='date', y='total_cases', hue='location')
plt.legend(bbox_to_anchor=(2.04, 1), loc="upper right")
for tick in s3.get_xticklabels():
tick.set_rotation(45)
plt.show()