Search code examples
pythonpandasmatplotlibpython-ggplot

Matplot lib add session( duration ) indicator and represent y-axis as stars


I have simple series of user-events, where the is a login and logout, and in-between few activities are performed. I tried various types of graphs and kinds but no luck.

I want to represent time between login and logout in some color so it represent an activity. And in Y axis the activities should be represented as a star.

%matplotlib  inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from collections import namedtuple


Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login,  2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
               Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
               Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
               Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
               Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
               Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
               Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
               Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
              ]

df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')
df['event_type'].plot()

I'm trying to get something around this line. Any advise is appreciated.

enter image description here

Complete working solution based in ALollz solution

import pandas as pd
# Used for the legend
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches

from datetime import datetime
from collections import namedtuple

Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login,  2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
               Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
               Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
               Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
               Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
               Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
               Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
               Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
              ]

df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')

# Set a margin for the time on the plot
tmargin = pd.Timedelta(hours=2)

# Set up the figure and the plotting region
_ = plt.figure(figsize=(12,8))
_ = plt.xlabel(df.index.name)
_ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)

# Plot the points with scatter, using the colors you can define for each event type
cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
_ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=100, 
        c=[cdict[value] for value in df.event_type.values])
_ = plt.xticks(rotation=90)

# make default background red for inactive regions
_ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='pink', alpha=0.5)

# make the active regions blue
for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
    _ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
    _ = plt.axvspan(region[0], region[1], color='black', alpha=0.2, lw=0)

# Set a margin for the event type
_ = plt.margins(.02)

# Create the legend for the points
labels=[]
for item in df.event_type.unique():
    labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
                          markersize=15, label=ldict[item], lw=0))


# Add the session information to the legend
labels.append(mpatches.Patch(color='pink', alpha=0.5, label='Inactive Session'))
labels.append(mpatches.Patch(color='grey', alpha=0.2, label='Active Session'))

_ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)

# Display the plot
_ = plt.show()

Solution

  • I find for complicated things like this you always have to fall back to matplotlib. Here's my commented code so you can see what each part does.

    import pandas as pd
    # Used for the legend
    import matplotlib.lines as mlines
    import matplotlib.patches as mpatches
    
    # Set a margin for the time on the plot
    tmargin = pd.Timedelta(hours=2)
    
    # Set up the figure and the plotting region
    _ = plt.figure(figsize=(12,8))
    _ = plt.xlabel(df.index.name)
    _ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)
    
    # Plot the points with scatter, using the colors you can define for each event type
    cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
    ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
    _ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=400, 
            c=[cdict[value] for value in df.event_type.values])
    _ = plt.xticks(rotation=30)
    
    # make default background red for inactive regions
    _ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='r', alpha=0.5)
    
    # make the active regions blue
    for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
        _ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
        _ = plt.axvspan(region[0], region[1], color='b', alpha=0.2, lw=0)
    
    # Set a margin for the event type
    _ = plt.margins(.02)
    
    # Create the legend for the points
    labels=[]
    for item in df.event_type.unique():
        labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
                              markersize=15, label=ldict[item], lw=0))
    
    # Add the session information to the legend
    labels.append(mpatches.Patch(color='red', alpha=0.5, label='Inactive Session'))
    labels.append(mpatches.Patch(color='blue', alpha=0.2, label='Active Session'))
    
    _ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)
    
    # Display the plot
    _ = plt.show()
    

    Final Plot

    If you want to add some dashed lines before this will do it. (It will be fine if your df doesn't have too many rows.

    for index, (event_type, event_duration) in df.iterrows():
        _ = plt.hlines(y=event_type, xmin=index, xmax=index+event_duration, linestyle='--',
                      color=cdict[event_type], lw=3)
    

    And since you want the stars to be at the end of the period, you should probably change a few other things in the code to include the duration

    _ = plt.scatter(df.index+df.event_duration.values, df.event_type, marker='*', zorder=2, s=100, 
            c=[cdict[value] for value in df.event_type.values])