Search code examples
pythonpandasmatplotlibscatter-plot

How to place arrows between multiple scatter points


The code below produce this graph. I wonder if there is a way to make the lines between value1 and value2 into arrows, pointing in the direction of 1 to 2, from blue to green (In this case none of blues is lower than the greens).

enter image description here

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
# Create a dataframe
value1=np.random.uniform(size=20)
value2=value1+np.random.uniform(size=20)/4
df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
 
# Reorder it following the values of the first value:
ordered_df = df.sort_values(by='value1')
my_range=range(1,len(df.index)+1)
 
# The horizontal plot is made using the hline function
plt.hlines(y=my_range, xmin=ordered_df['value1'], xmax=ordered_df['value2'], color='grey', alpha=0.4)
plt.scatter(ordered_df['value1'], my_range, color='skyblue', alpha=1, label='value1')
plt.scatter(ordered_df['value2'], my_range, color='green', alpha=0.4 , label='value2')
plt.legend()
 
# Add title and axis names
plt.yticks(my_range, ordered_df['group'])
plt.title("Comparison of the value 1 and the value 2", loc='left')
plt.xlabel('Value of the variables')
plt.ylabel('Group')

# Show the graph
plt.show()

Solution

    • The best option for multiple arrows is matplotlib.pyplot.quiver, because it accepts an array or dataframe of locations, unlike matplotlib.pyplot.arrow, which only accepts a single value.
      • Since the y-axis labels are defined by 'group', which are letters, use V = np.zeros(len(ordered_df)) or V = ordered_df.index - ordered_df.index for the .quiver direction vector.
    • Plot the dataframe directly with pandas.DataFrame.plot and kind='scatter'.
    • Tested in python 3.8.12, pandas 1.3.3, matplotlib 3.4.3
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    
    # Create a dataframe
    np.random.seed(354)
    value1=np.random.uniform(size=20)
    value2=value1+np.random.uniform(size=20)/4
    df = pd.DataFrame({'group':list(map(chr, range(65, 85))), 'value1':value1 , 'value2':value2 })
     
    # Reorder it following the values of the first value and reset the index so the index values correspond to the y-axis tick locations
    ordered_df = df.sort_values(by='value1').reset_index(drop=True)
    
    # plot the dataframe
    ax = ordered_df.plot(kind='scatter', x='value1', y='group', color='skyblue', alpha=1, figsize=(8, 6), label='value1')
    ordered_df.plot(kind='scatter', x='value2', y='group', color='green', alpha=1, ax=ax, label='value2', xlabel='Value of the variables', ylabel='Group')
    
    # plot the arrows
    V = ordered_df.index - ordered_df.index  # the Y direction vector is 0 for each
    ax.quiver(ordered_df.value1, ordered_df.group, (ordered_df.value2-ordered_df.value1), V, width=0.003, color='gray', scale_units='x', scale=1)
    
    # Add title with position
    ax.set_title("Comparison of the value 1 and the value 2", loc='left')
    
    # Show the graph
    plt.show()
    

    enter image description here