Search code examples
python-3.xmatplotlibplotlabelline-plot

How to fix the overlapping lines in matplotlib plot


Some of my bullet points and lines overlap, making it less clear to see what results in each year are. I have already read the previous posts on the same problem (questions 55710343 and 40766909). However, they do not help me because they assume that the parameters like alpha and linestyle can be changed individually inside plt.plots while mine is df.plot with several column values inside it.

Anyway, here is my code that you can fully reproduce. My actual data has 8 lines and 10 years.

#Downloading data 
import numpy as np
import pandas as pd
from IPython.display import display, HTML

def install_module(module):
    ! conda install "$module" -y
    js_cmd = ['IPython.notebook.kernel.restart();',
              'IPython.notebook.select(1);',
              'IPython.notebook.execute_cell();'
              ]
    js = "<script>{0}</script>".format(' '.join(js_cmd))
    display(HTML(js))

url = 'http://www.svpl.org.uk/index.php/2019-league-tables?year=2019-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df19 = df_list[0].iloc[0:]
names=['Rank', 'Teams', 'Played','Points', 'Won','Lost','GF', 'GA', 'GD', 'PF', 'PA','PD','CurrentForm']
df19.columns = [names]
df19 = df19[['Rank', 'Teams','Points','GD','PD']]
url = 'http://www.svpl.org.uk/index.php/2018-league-tables?year=2018-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df18 = df_list[0].iloc[0:]
names1=['Rank', 'Teams', 'Played','Points', 'Won','Lost','Drawn', 'GD', 'PF', 'PA','PD','CurrentForm']

df18.columns = [names1]
df18 = df18[['Rank', 'Teams','Points','GD','PD']]
url = 'http://www.svpl.org.uk/index.php/2017-league-tables?year=2017-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df17 = df_list[0].iloc[0:]

names2=['Rank', 'Teams', 'Played','Points', 'Won','Lost', 'GD', 'PF', 'PA','PD','CurrentForm']
df17.columns = [names2]
df17 = df17[['Rank', 'Teams','Points','GD','PD']]
url = 'http://www.svpl.org.uk/index.php/2016-league-tables?year=2016-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df16 = df_list[0].iloc[0:]
names3=['Rank', 'Teams','Played', 'Won','Lost', 'Points','GD', 'CurrentForm']
df16.columns = [names3]
df16 = df16[['Rank', 'Teams','Points','GD']]
url='http://www.svpl.org.uk/index.php/2015-league-tables?year=2015-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df15 = df_list[0].iloc[0:]
names2=['Rank', 'Teams','Played', 'Points','Won','Lost', 'GD','PF','PA','PD','CurrentForm']
df15.columns = [names2]
df15 = df15[['Rank', 'Teams','Points','GD', 'PD']]
url='http://www.svpl.org.uk/index.php/2014-league-tables?year=2014-league-tables'
try:
    df_list = pd.read_html(url)
except Exception as e:
    print(e)
    # #install necessary modules for read_html
    module = str(e).split()[0]
    install_module(module)
print('Number of Data Frames {}'.format(len(df_list)))
df_list[0].columns = df_list[0].iloc[0]
df14 = df_list[0].iloc[0:]
names2=['Rank', 'Teams', 'Played','Points', 'Won','Lost', 'GD', 'PF', 'PA','PD','CurrentForm']
df14.columns = [names2]
df14 = df14[['Rank', 'Teams','Points','GD','PD']]

# Filtering dataframes
AlveleyRoyals=df15[df15.values == 'Alveley Royals'].merge(df16[df16.values == 'Alveley Royals'], how='outer').merge(df17[df17.values == 'Alveley Royals'], how='outer').merge(df18[df18.values == 'Alveley Royals'], how='outer').merge(df19[df19.values == 'Alveley Royals'], how='outer')
AlveleyRoyals=AlveleyRoyals[['Teams','Points', 'GD']]
AlveleyRoyals.index.name='Year'
row=['Alveley Royals',0, np.NaN]
AlveleyRoyals.loc[-1] = row
AlveleyRoyals = AlveleyRoyals.sort_index().reset_index(drop=True)
AlveleyRoyals=AlveleyRoyals.rename(index={0: '2015', 1:'2016',2:'2017',3:'2018',4:'2019'},columns={'Points':'Aveley Royals'})
AlveleyRoyals=AlveleyRoyals[['Aveley Royals']]

TopPub=df15[df15.values == 'Top Pub'].merge(df16[df16.values == 'Top Pub'], how='outer').merge(df17[df17.values == 'Top Pub'], how='outer').merge(df18[df18.values == 'Top Pub'], how='outer').merge(df19[df19.values == 'Top Pub'], how='outer')
TopPub=TopPub[['Teams','Points', 'GD']]
TopPub.index.name='Year'
row=['Top Pub',0,np.NaN]
TopPub.loc[0.5] = row
TopPub = TopPub.sort_index().reset_index(drop=True)
TopPub=TopPub.rename(index={0: '2015', 1:'2016',2:'2017',3:'2018',4:'2019'},columns={'Points':'Top Pub'})
TopPub=TopPub[['Top Pub']]

AlveleyOaks=df14[df14.values == 'Alveley Oaks'].merge(df16[df16.values == 'Alveley Oaks'], how='outer').merge(df17[df17.values == 'Alveley Oaks'], how='outer').merge(df18[df18.values == 'Alveley Oaks'], how='outer').merge(df19[df19.values == 'Alveley Oaks'], how='outer')
AlveleyOaks=AlveleyOaks[['Teams','Points', 'GD']]
AlveleyOaks.index.name='Year'
row=['Alveley Oaks',0, np.NaN]
AlveleyOaks.loc[-0.2] = row #2015
AlveleyOaks.loc[-0.3] = row #2016
AlveleyOaks.loc[-0.4] = row #2017
AlveleyOaks = AlveleyOaks.sort_index().reset_index(drop=True)
AlveleyOaks = AlveleyOaks.rename(index={0: '2015', 1:'2016',2:'2017',3:'2018',4:'2019'}, columns={'Points':'Alveley Oaks'})
AlveleyOaks=AlveleyOaks[['Alveley Oaks']]

# Merging dataframes
merged=AlveleyRoyals.merge(TopPub, left_index=True, right_index=True).merge(AlveleyOaks,left_index=True, right_index=True)
merged

# Plotting lines
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator, FormatStrFormatter
#%matplotlib notebook
plt.style.use('seaborn-poster')

fig,ax = plt.subplots(figsize=(8,6))

my_colors=['steelblue','darkviolet','orangered']

ax=merged.plot( marker='o',figsize=(9,6),legend=None, color=my_colors, ax=ax)

for line, name in zip(ax.lines, merged.columns):

    y = line.get_ydata()[-1]
    ax.annotate(name, xy=(1,y), xytext=(6,0), color=line.get_color(), 
                xycoords = ax.get_yaxis_transform(), textcoords="offset points",
                size=16, va="center")
x_axis = ax.axes.get_xaxis()
x_label = x_axis.get_label()
x_label.set_visible(False)
ax.set_ylabel('Points won', color='darkblue',  fontdict={'fontsize': 15, 'fontweight': 'medium'})

years=[2015, 2016,2017,2018, 2019]
plt.xticks(range(len(years)),(years))
plt.margins(x=0.012,y=0.018)

ax.get_xaxis().tick_bottom()
#ax.get_yaxis().tick_left()

ax.tick_params(axis='y', which='major', labelsize=12,bottom=False, top=False, labelbottom=True,
               left=False, right=False, labelright=False)
ax.tick_params(axis='x', which='major', labelsize=14,bottom=False, top=False, labelbottom=True,
               left=False, right=False, labelright=False)

ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0f}'.format))
ax.grid(True, 'major', 'y', ls='--', lw=.7, c='darkgray', alpha=.5)

plt.show()

this is the chart that you should get

Not sure why the round brackets appeared on the labels, if someone can tell me how to fix this, I would be extra grateful.


Solution

  • You can use the alpha keyword in pandas plot the same way as in plt.plot.

    In order to apply the y-offset-transformation you'll need to plot the dataframe column by column and shift each plot differently. In this case you'll first need to set the y-axis limits, otherwise pandas can't figure it out correctly.

    The round brackets come from a multiindex. It can be fixed by converting it to a regular index.

    import matplotlib.transforms as mtrans
    
    merged.columns = merged.columns.get_level_values(0)
    ax.set_ylim(merged.min().min(), merged.max().max())
    for i,col in enumerate(merged):
            merged.plot(y=col, marker='o', figsize=(9,6), legend=None, color=my_colors[i], ax=ax, alpha=.5,
                        transform=mtrans.offset_copy(ax.transData, fig=fig, x=0.0, y=3*(i-1), units='points'))
    

    enter image description here