Search code examples
pythonmatplotlib

Matplotlib weird behaviour when plotting a timeline with plt.barh


I have a DataFrame of events. I want to plot these events to show the timeline for a day.

event_start     event_end   event_type
0   2024-09-12 21:59:00     2024-09-13 05:09:00     Stop
1   2024-09-13 01:26:00     2024-09-13 02:00:00     Farming
2   2024-09-13 02:00:00     2024-09-13 02:35:00     Active
3   2024-09-13 02:35:00     2024-09-13 06:11:00     Farming
4   2024-09-13 02:35:00     2024-09-13 06:11:00     Farming
5   2024-09-13 06:11:00     2024-09-13 07:27:00     Active
6   2024-09-13 07:27:00     2024-09-13 07:47:00     Stop
7   2024-09-13 07:47:00     2024-09-13 08:00:00     Active
8   2024-09-13 08:00:00     2024-09-13 08:06:00     Stop
9   2024-09-13 08:06:00     2024-09-13 08:07:00     Active
10  2024-09-13 08:07:00     2024-09-13 08:16:00     Stop
11  2024-09-13 08:16:00     2024-09-13 08:18:00     Active
12  2024-09-13 08:18:00     2024-09-13 08:45:00     Stop
13  2024-09-13 08:45:00     2024-09-13 09:00:00     Active
14  2024-09-13 09:00:00     2024-09-13 09:31:00     Stop
15  2024-09-13 09:31:00     2024-09-13 10:24:00     Active
16  2024-09-13 10:24:00     2024-09-13 10:32:00     Stop
17  2024-09-13 10:32:00     2024-09-13 10:37:00     Active
18  2024-09-13 10:37:00     2024-09-13 16:29:00     Stop
19  2024-09-13 16:29:00     2024-09-13 16:34:00     Active

I tried plotting with matplotlib.pyplot.barh, but the last event, starting from 16:29 and ending at 16:34 (Active) is getting plotted as a block from 16:39 to roughly 18:30. I can't understand why. This is the plot_timeline function that I'm using.

def plot_timeline(events):
    df_events = events

    plt.figure(figsize=(12, 6))

    for _, row in df_events.iterrows():
        color_map = {
            'Active': 'lightblue',
            'Idling': 'red',
            'Stop': 'black',
            'Farming': 'green'
        }
        print(f"start : {row['event_start']}, end : {row['event_end']}, type : {row['event_type']}")
        plt.barh(y=0, width=(row['event_end'] - row['event_start']).total_seconds() / 3600, 
                 left=row['event_start'], 
                 color=color_map.get(row['event_type'], 'lightblue'))

    plt.xlim(pd.to_datetime(f"{df_events['event_start'].dt.date.iloc[-1]} 04:00"),
             pd.to_datetime(f"{df_events['event_start'].dt.date.iloc[-1]} 23:00"))

    plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

    plt.title('Timeline of Events')
    plt.xlabel('Time')
    plt.yticks([])  
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()
plot_timeline(events)

Incorrect Timeline

(EDIT) : print output

start : 2024-09-12 21:59:00, end : 2024-09-13 05:09:00, type : Stop
start : 2024-09-13 01:26:00, end : 2024-09-13 02:00:00, type : Farming
start : 2024-09-13 02:00:00, end : 2024-09-13 02:35:00, type : Active
start : 2024-09-13 02:35:00, end : 2024-09-13 06:11:00, type : Farming
start : 2024-09-13 02:35:00, end : 2024-09-13 06:11:00, type : Farming
start : 2024-09-13 06:11:00, end : 2024-09-13 07:27:00, type : Active
start : 2024-09-13 07:27:00, end : 2024-09-13 07:47:00, type : Stop
start : 2024-09-13 07:47:00, end : 2024-09-13 08:00:00, type : Active
start : 2024-09-13 08:00:00, end : 2024-09-13 08:06:00, type : Stop
start : 2024-09-13 08:06:00, end : 2024-09-13 08:07:00, type : Active
start : 2024-09-13 08:07:00, end : 2024-09-13 08:16:00, type : Stop
start : 2024-09-13 08:16:00, end : 2024-09-13 08:18:00, type : Active
start : 2024-09-13 08:18:00, end : 2024-09-13 08:45:00, type : Stop
start : 2024-09-13 08:45:00, end : 2024-09-13 09:00:00, type : Active
start : 2024-09-13 09:00:00, end : 2024-09-13 09:31:00, type : Stop
start : 2024-09-13 09:31:00, end : 2024-09-13 10:24:00, type : Active
start : 2024-09-13 10:24:00, end : 2024-09-13 10:32:00, type : Stop
start : 2024-09-13 10:32:00, end : 2024-09-13 10:37:00, type : Active
start : 2024-09-13 10:37:00, end : 2024-09-13 16:29:00, type : Stop
start : 2024-09-13 16:29:00, end : 2024-09-13 16:34:00, type : Active

Solution

  • picture

    It seems that using Rectangles turns out to be easier.

    The first part is devoted to have a-sort-of-data-frame and it's unimportant, the function put_r is a bit more interesting.

    Note also that adding rectangles to a plot doesn't change the axis limits, but you already use plt.xlim()...

    Eventually I plotted a few scatter points to have the possibility of drawing a legend.

    import matplotlib.pyplot as plt
    import matplotlib.dates as md
    from matplotlib.patches import Rectangle
    from datetime import datetime, timedelta
    
    data = ''''\
    0   2024-09-12 21:59:00     2024-09-13 05:09:00     Stop
    1   2024-09-13 01:26:00     2024-09-13 02:00:00     Farming
    2   2024-09-13 02:00:00     2024-09-13 02:35:00     Active
    3   2024-09-13 02:35:00     2024-09-13 06:11:00     Farming
    4   2024-09-13 02:35:00     2024-09-13 06:11:00     Farming
    5   2024-09-13 06:11:00     2024-09-13 07:27:00     Active
    6   2024-09-13 07:27:00     2024-09-13 07:47:00     Stop
    7   2024-09-13 07:47:00     2024-09-13 08:00:00     Active
    8   2024-09-13 08:00:00     2024-09-13 08:06:00     Stop
    9   2024-09-13 08:06:00     2024-09-13 08:07:00     Active
    10  2024-09-13 08:07:00     2024-09-13 08:16:00     Stop
    11  2024-09-13 08:16:00     2024-09-13 08:18:00     Active
    12  2024-09-13 08:18:00     2024-09-13 08:45:00     Stop
    13  2024-09-13 08:45:00     2024-09-13 09:00:00     Active
    14  2024-09-13 09:00:00     2024-09-13 09:31:00     Stop
    15  2024-09-13 09:31:00     2024-09-13 10:24:00     Active
    16  2024-09-13 10:24:00     2024-09-13 10:32:00     Stop
    17  2024-09-13 10:32:00     2024-09-13 10:37:00     Active
    18  2024-09-13 10:37:00     2024-09-13 16:29:00     Stop
    19  2024-09-13 16:29:00     2024-09-13 16:34:00     Active'''
    data = dict(zip(
        'index start_day start_time end_day end_time type'.split(),
        zip(*[l.split() for l in data.split('\n')])
    ))
    data['start'] = [datetime.fromisoformat(dt) for dt in (' '.join(t) for t in zip(data['start_day'], data['start_time']))]
    data['end'] = [datetime.fromisoformat(dt) for dt in (' '.join(t) for t in zip(data['end_day'], data['end_time']))]
    
    color = {'Active':  'lightblue',
             'Idling':  'red',
             'Stop':    'lightgrey',
             'Farming': 'green'}
    
    def put_r(d1, d2, color):
        plt.gca().add_patch(
            Rectangle((d1, 0), d2-d1, 1,
                      linewidth=0.5, edgecolor='k',
                      facecolor=color, alpha=0.8))
    
    fig, ax = plt.subplots(figsize=(11,4), layout='constrained', edgecolor='k')
    plt.xlim(datetime(2024, 9, 13, 4), datetime(2024, 9, 13, 23))
    for d1, d2, t in zip(*[data[s] for s in 'start end type'.split()]):
        print(d1, d2, t.rjust(8), color[t])
        put_r(d1, d2, color[t])
    
    for t, c in color.items():
        ax.scatter(datetime(2024, 9, 13, 4)-timedelta(minutes=1), 0, color=c, label=t)
    
    plt.title('Timeline of Events')
    
    plt.xlabel('Time')
    ax.xaxis.set_major_locator(md.HourLocator(interval=1))
    ax.xaxis.set_major_formatter(md.DateFormatter('%H:%M'))
    
    ax.set_yticks([])
    ax.set_ylim(0, 1)
    plt.legend()
    plt.show()