I have simple series of user-events, where the is a login and logout, and in-between few activities are performed. I tried various types of graphs and kinds but no luck.
I want to represent time between login and logout in some color so it represent an activity. And in Y axis the activities should be represented as a star.
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from collections import namedtuple
Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login, 2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
]
df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')
df['event_type'].plot()
I'm trying to get something around this line. Any advise is appreciated.
Complete working solution based in ALollz solution
import pandas as pd
# Used for the legend
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from datetime import datetime
from collections import namedtuple
Event = namedtuple('Event',('event_time', 'event_type'))
# event_type = 1 -> Login, 2 -> watch , 3 -> buy , 4 -> like , 5 -> Logout
user_events = [ Event(event_time=datetime.strptime('2018-04-20 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-20 12:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-20 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=3),
Event(event_time=datetime.strptime('2018-04-20 16:00:00', "%Y-%m-%d %H:%M:%S"), event_type=4),
Event(event_time=datetime.strptime('2018-04-20 19:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5),
Event(event_time=datetime.strptime('2018-04-21 07:00:00', "%Y-%m-%d %H:%M:%S"), event_type=1),
Event(event_time=datetime.strptime('2018-04-21 10:00:00', "%Y-%m-%d %H:%M:%S"), event_type=2),
Event(event_time=datetime.strptime('2018-04-21 14:00:00', "%Y-%m-%d %H:%M:%S"), event_type=5)
]
df = pd.DataFrame(user_events, columns=['event_time','event_type'])
df = df.set_index('event_time')
# Set a margin for the time on the plot
tmargin = pd.Timedelta(hours=2)
# Set up the figure and the plotting region
_ = plt.figure(figsize=(12,8))
_ = plt.xlabel(df.index.name)
_ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)
# Plot the points with scatter, using the colors you can define for each event type
cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
_ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=100,
c=[cdict[value] for value in df.event_type.values])
_ = plt.xticks(rotation=90)
# make default background red for inactive regions
_ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='pink', alpha=0.5)
# make the active regions blue
for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
_ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
_ = plt.axvspan(region[0], region[1], color='black', alpha=0.2, lw=0)
# Set a margin for the event type
_ = plt.margins(.02)
# Create the legend for the points
labels=[]
for item in df.event_type.unique():
labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
markersize=15, label=ldict[item], lw=0))
# Add the session information to the legend
labels.append(mpatches.Patch(color='pink', alpha=0.5, label='Inactive Session'))
labels.append(mpatches.Patch(color='grey', alpha=0.2, label='Active Session'))
_ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)
# Display the plot
_ = plt.show()
I find for complicated things like this you always have to fall back to matplotlib
. Here's my commented code so you can see what each part does.
import pandas as pd
# Used for the legend
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
# Set a margin for the time on the plot
tmargin = pd.Timedelta(hours=2)
# Set up the figure and the plotting region
_ = plt.figure(figsize=(12,8))
_ = plt.xlabel(df.index.name)
_ = plt.xlim(df.index.min()-tmargin, df.index.max()+tmargin)
# Plot the points with scatter, using the colors you can define for each event type
cdict = {1: 'green', 2: 'maroon', 3: 'yellow', 4: 'cyan', 5: 'red'}
ldict = {1: '1 - Login', 2: '2 - Watch', 3: '3 - Buy', 4: '4 - Like', 5: '5 - Logout'}
_ = plt.scatter(df.index, df.event_type, marker='*', zorder=2, s=400,
c=[cdict[value] for value in df.event_type.values])
_ = plt.xticks(rotation=30)
# make default background red for inactive regions
_ = plt.axvspan(df.index.min()-tmargin, df.index.max()+tmargin, color='r', alpha=0.5)
# make the active regions blue
for region in (zip(df[df.event_type==1].index, df[df.event_type==5].index)):
_ = plt.axvspan(region[0], region[1], color='white', alpha=1, lw=0)
_ = plt.axvspan(region[0], region[1], color='b', alpha=0.2, lw=0)
# Set a margin for the event type
_ = plt.margins(.02)
# Create the legend for the points
labels=[]
for item in df.event_type.unique():
labels.append(mlines.Line2D([], [], color=cdict[item], marker='*',
markersize=15, label=ldict[item], lw=0))
# Add the session information to the legend
labels.append(mpatches.Patch(color='red', alpha=0.5, label='Inactive Session'))
labels.append(mpatches.Patch(color='blue', alpha=0.2, label='Active Session'))
_ = plt.legend(bbox_to_anchor=(1,1), handles=labels, fontsize=20)
# Display the plot
_ = plt.show()
If you want to add some dashed lines before this will do it. (It will be fine if your df
doesn't have too many rows.
for index, (event_type, event_duration) in df.iterrows():
_ = plt.hlines(y=event_type, xmin=index, xmax=index+event_duration, linestyle='--',
color=cdict[event_type], lw=3)
And since you want the stars to be at the end of the period, you should probably change a few other things in the code to include the duration
_ = plt.scatter(df.index+df.event_duration.values, df.event_type, marker='*', zorder=2, s=100,
c=[cdict[value] for value in df.event_type.values])