I have created this plot where I have "observed E. coli" on the the left side "y axis", "modelled E. coli" on the right side "y axis" and "dates" on the "x axis".
The code is this
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
source = "Sample_table.csv"
df = pd.read_csv(source, encoding = 'unicode_escape')
x = df['Date_1']
y1 = df['Obs_Ec']
y2 = df['Rain']
y3 = df['Mod_Ec']
# Plot Line1 (Left Y Axis)
fig, ax1 = plt.subplots(1,1,figsize=(10,6), dpi= 80)
# Plot Line2 (Right Y Axis)
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax2.plot(x, y2, color='tab:blue', linewidth=2.0)
# Plot Line2 (Right Y Axis)
ax3 = ax1.twinx() # instantiate a second axes that shares the same x-axis
ax3.scatter(x, y3)
# Control limits of the y Axis
a,b = 0,80000
c,d = 0,80000
e,f = 0,35
ax1.set_ylim(a,b)
ax3.set_ylim(c,d)
ax2.set_ylim(e,f)
# Decorations
# ax1 (left Y axis)
ax1.set_xlabel('Date', fontsize=20)
ax1.set_ylabel('E. coli - cfu ml-1', color='tab:red', fontsize=20)
ax1.tick_params(axis='y',rotation=0, labelcolor='tab:red')
ax1.grid(alpha=.0)
ax1.tick_params(axis='both', labelsize=14)
# Plot the scatter points
ax1.scatter(x, y1,
color="red", # Color of the dots
s=50, # Size of the dots
alpha=0.5, # Alpha of the dots
linewidths=0.5) # Size of edge around the dots
ax1.scatter(0**np.arange(5), 0**np.arange(5))
ax1.legend(['Observed E. coli'], loc='right',fontsize=14, bbox_to_anchor=(0.2, -0.20))
ax3.scatter(x, y3,
color="green", # Color of the dots
s=50, # Size of the dots
alpha=0.5, # Alpha of the dots
linewidths=0.5) # Size of edge around the dots
ax3.scatter(0**np.arange(5), 0**np.arange(5))
ax3.legend(['Modelled E. coli'], loc='right',fontsize=14, bbox_to_anchor=(0.48, -0.20))
# ax2 (right Y axis)
ax2.set_ylabel("Rainfall - mm", color='tab:blue', fontsize=20)
ax2.tick_params(axis='y', labelcolor='tab:blue')
ax2.tick_params(axis='both', labelsize=15)
ax2.set_xticks(np.arange(1, len(x), 4))
ax2.set_xticklabels(x[0::4], rotation=15, fontdict={'fontsize':10})
ax2.set_title("SP051 - without SR (validation 2018-2020)", fontsize=22)
ax2.legend(['rainfall'], loc='right',fontsize=14, bbox_to_anchor=(1.05, -0.20))
fig.tight_layout()
plt.show()
But this code is giving me this plot below:
I want to change three things in this plot:
An example of the plot I want to create is the one below, but instead of the lines I will be using a scatter plot as shown in the previous figure:
The data can be downloaded here: link for the data
data = {'Date_1': ['1/17/2018', '2/21/2018', '3/21/2018', '4/18/2018', '5/17/2018', '6/20/2018', '7/18/2018', '8/8/2018', '9/19/2018', '10/24/2018', '11/21/2018', '12/19/2018', '1/16/2019', '2/20/2019', '3/20/2019', '4/29/2019', '5/30/2019', '6/19/2019', '7/19/2019', '8/21/2019', '9/18/2019', '10/16/2019', '1/22/2020', '2/19/2020'],
'FLOW_OUTcms': [0.00273, 0.01566, 0.02071, 0.00511, 0.00777, 0.00581, 0.00599, 0.00309, 0.00204, 0.04024, 0.00456, 0.0376, 0.00359, 0.00301, 0.01515, 0.02796, 0.00443, 0.03602, 0.0071, 0.00255, 0.00159, 0.00319, 0.04443, 0.04542],
'Rain': [0.0, 30.4, 2.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 8.7, 0.0, 0.0, 0.1, 0.1, 0.0, 0.0, 0.1, 0.0, 1.1, 0.1, 33.3, 0.0],
'Mod_Ec': [10840, 212, 1953, 2616, 2715, 2869, 3050, 2741, 5479, 1049, 2066, 146, 6618, 7444, 992, 2374, 6602, 82, 5267, 3560, 4845, 1479, 58, 760],
'Obs_Ec': [2500, 69000, 13000, 3300, 1600, 2400, 2300, 1400, 1600, 1300, 10000, 20000, 2000, 2500, 2900, 1500, 280, 260, 64, 59, 450, 410, 3900, 870]}
df = pd.DataFrame(data)
pandas.DataFrame.plot
ax
ax
and assign it to ax2
ax2
customize the secondary axes.python 3.10
, pandas 1.5.0
, matplotlib 3.5.2
matplotlib 3.5.0
, ax.set_xticks
can be used to set the ticks and labels. Otherwise use ax.set_xticks(xticks)
followed by ax.set_xticklabels(xticklabels, ha='center')
, as per this answer.import pandas as pd
# starting with the sample dataframe, convert Date_1 to a datetime dtype
df.Date_1 = pd.to_datetime(df.Date_1)
# plot E coli data
ax = df.plot(x='Date_1', y=['Mod_Ec', 'Obs_Ec'], figsize=(12, 8), rot=0, color=['blue', 'red'])
# the xticklabels are empty strings until after the canvas is drawn
# needing this may also depend on the version of pandas and matplotlib
ax.get_figure().canvas.draw()
# center the xtick labels on the ticks
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
xticks = ax.get_xticks()
ax.set_xticks(xticks, xticklabels, ha='center')
# cosmetics
# ax.set_xlim(df.Date_1.min(), df.Date_1.max())
ax.set_ylim(0, 70000)
ax.set_ylabel('E. coli')
ax.set_xlabel('Date')
ax.legend(['Observed E. coli', 'Modelled E. coli'], loc='upper left', ncol=2, bbox_to_anchor=(-.01, 1.09))
# create twinx for rain
ax2 = ax.twinx()
# filter the rain column to only show points greater than 0
df_filtered = df[df.Rain.gt(0)]
# plot data with on twinx with secondary y as a scatter plot
df_filtered.plot(kind='scatter', x='Date_1', y='Rain', marker='d', ax=ax2, color='deepskyblue', secondary_y=True, legend=False)
# add vlines to the scatter points
ax2.vlines(x=df_filtered.Date_1, ymin=0, ymax=df_filtered.Rain, color='deepskyblue')
# cosmetics
ax2.set_ylim(0, 60)
ax2.invert_yaxis() # reverse the secondary y axis so it starts at the top
ax2.set_ylabel('Rain (mm)')
ax2.legend(['Rainfall'], loc='upper right', ncol=1, bbox_to_anchor=(1.01, 1.09))