Search code examples
pythongraphdashboard

How to graph some events with time on x-axis with plotly express?


I'm trying to build a dashboard using plotly dash and I have data that looks like this :

data

Here the text data:

data={'SECTOR':['KHN','KHN','KHN','KHN','KHN','KHN'],
"NAME": ["ELSILATE","ELSILATE","ELSILATE","ELSILATE","ELSILATE","ELSILATE"],
"TIME" : ["4:00", "4:25","4:45", "5:03", "6:00","7:00"],
"POINT_NAME":["ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN"],
"MESSAGE":["Change Status","Operator Control","Return to Normal", 
"Operator Control", "Return to Normal","Return to Normal"],
"VALUE":["OPEN","CLOSE","NORMAL","OPEN","NORMAL","CLOSE"],
"ch_open":[1,0,0,0,0,0],
"ch_close":[0,2,0,0,0,0],
"normal_open":[0,0,3,0,0,0],
"command_open":[0,0,0,4,0,0],
"command_close":[0,0,0,0,5,0],
"normal_close":[0,0,0,0,0,6]}

df_cb=pd.DataFrame(data)

I used pandas to show a number for every event. I want to show the time versus the event of open/close/normal/control,close,etc. for every sector, name, adn point_name !

I manage to get it like this

output

from a code on the internet but I can't think of a way to show time in x-axis

Here is the code:

import matplotlib.pyplot as plt
import numpy as np

#for a specific line cb :


df_ex = df_cb.loc[df_cb['POINT_NAME'].str.contains('ZERAEIN')]
ch_open=list(df_ex["ch_open"])
ch_close=list(df_ex["ch_close"])
normal_open=list(df_ex["normal_open"])
normal_close=list(df_ex["normal_close"])
command_open=list(df_ex["command_open"])
command_close=list(df_ex["command_close"])


data = [ch_open,
        ch_close, 
        normal_open,
       normal_close,
       command_open,
       command_close]

fig = plt.figure()
ax = fig.add_subplot(111)
ax.axes.get_yaxis().set_visible(False)
ax.set_aspect(1)

def avg(a, b):
    return (a + b) / 2.0

for y, row in enumerate(data):
    for x, col in enumerate(row):
        x1 = [x, x+1]
        y1 = [0, 0]
        y2 = [1, 1]
        if col == 1:
            plt.fill_between(x1, y1, y2=y2, color='yellow')
            plt.text(avg(x1[0], x1[1]), avg(y1[0], y2[0]), "A", 
                                        horizontalalignment='center',
                                        verticalalignment='center')
        if col == 2:
            plt.fill_between(x1, y1, y2=y2, color='red')
            plt.text(avg(x1[0], x1[0]+1), avg(y1[0], y2[0]), "B", 
                                        horizontalalignment='center',
                                        verticalalignment='center')
        if col == 3:
            plt.fill_between(x1, y1, y2=y2, color='orange')
            plt.text(avg(x1[0], x1[0]+1), avg(y1[0], y2[0]), "C", 
                                        horizontalalignment='center',
                                        verticalalignment='center')
        if col == 4:
            plt.fill_between(x1, y1, y2=y2, color='brown')
            plt.text(avg(x1[0], x1[0]+1), avg(y1[0], y2[0]), "D", 
                                        horizontalalignment='center',
                                        verticalalignment='center')
        if col == 5:
            plt.fill_between(x1, y1, y2=y2, color='green')
            plt.text(avg(x1[0], x1[0]+1), avg(y1[0], y2[0]), "E", 
                                        horizontalalignment='center',
                                        verticalalignment='center')
        if col == 6:
            plt.fill_between(x1, y1, y2=y2, color='black')
            plt.text(avg(x1[0], x1[0]+1), avg(y1[0], y2[0]), "F", 
                                        horizontalalignment='center',
                                        verticalalignment='center')

plt.ylim(1, 0)
plt.show()

would be nice to have it like this with time shows as x-axis:

output


Solution

  • I convert TIME to datetime

    df_ex['TIME'] = pd.to_datetime(df_ex['TIME'])
    

    And late use shift(-1) to have time from next row in current row as TIME_END.

    df_ex['TIME_END'] = df_ex['TIME'].shift(-1)
    

    It needs also to add some value in last 'TIME_END' instead of NaT

    df_ex.loc[last_index, 'TIME_END'] = df_ex.loc[last_index, 'TIME'] + dt.timedelta(minutes=25)
    

    This way I have start and end in one row and I can use them to draw rectangles.

    for index, row in df_ex.iterrows():
    
        x = [row['TIME'], row['TIME_END']]
        y1 = [0, 0]
        y2 = [1, 1]
            
        ax.fill_between(x, y1, y2=y2, color=color)
    

    I also use if/else to set different color for different VALUE.


    Full working code:

    import pandas as pd
    import matplotlib.pyplot as plt
    import datetime as dt
    
    data = {
        'SECTOR': ['KHN','KHN','KHN','KHN','KHN','KHN'],
        "NAME": ["ELSILATE","ELSILATE","ELSILATE","ELSILATE","ELSILATE","ELSILATE"],
        "TIME": ["4:00", "4:25","4:45", "5:03", "6:00","7:00"],
        "POINT_NAME": ["ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN","ZERAEIN"],
        "MESSAGE": ["Change Status","Operator Control","Return to Normal", 
        "Operator Control", "Return to Normal","Return to Normal"],
        "VALUE": ["OPEN","CLOSE","NORMAL","OPEN","NORMAL","CLOSE"],
    }
    
    df_cb = pd.DataFrame(data)
    
    mask = df_cb['POINT_NAME'].str.contains('ZERAEIN')
    df_ex = df_cb[mask].copy()
    
    # convert to datetime
    df_ex['TIME'] = pd.to_datetime(df_ex['TIME'])
    
    # move one row up
    df_ex['TIME_END'] = df_ex['TIME'].shift(-1)
    
    # put some value in last row (instead of NaT)
    
    #df_ex['TIME_END'].iloc[-1] = df_ex['TIME'].iloc[-1] + dt.timedelta(minutes=25)  # warning: set value on copy
    last_index = df_ex.index[-1]
    df_ex.loc[last_index, 'TIME_END'] = df_ex.loc[last_index, 'TIME'] + dt.timedelta(minutes=25)
    
    # --- plot ---
    
    fig, ax = plt.subplots(1, figsize=(16,3))
        
    for index, row in df_ex.iterrows():
        #print(index, row)
    
        x = [row['TIME'], row['TIME_END']]
        y1 = [0, 0]
        y2 = [1, 1]
        
        if row['VALUE'] == 'OPEN':
            color = 'green'
        elif row['VALUE'] == 'CLOSE':
            color = 'red'
        else:
            color = 'yellow'
            
        ax.fill_between(x, y1, y2=y2, color=color)
    
        center_x = x[0] + (x[1] - x[0])/2
        center_y = (y2[0] + y1[0]) / 2
        #print(center_x, center_y)
        
        ax.text(center_x, center_y, row['VALUE'], horizontalalignment='center', verticalalignment='center')
         
    plt.show()
    

    enter image description here

    On X-axis it displays time with date/day (because TIME can be in different days) and it would need to changes xticks to set different text - but I skip this problem.


    If you use different values y1 y2 for different VALUE then you can get

    enter image description here

    or index, row in df_ex.iterrows():
        #print(index, row)
    
        if row['VALUE'] == 'OPEN':
            color = 'green'
            y = 1
        elif row['VALUE'] == 'CLOSE':
            color = 'red'
            y = 2
        else:
            color = 'yellow'
            y = 0
    
        x = [row['TIME'], row['TIME_END']]
        y1 = [y, y]
        y2 = [y+1, y+1]
            
        ax.fill_between(x, y1, y2=y2, color=color)
        
        center_x = x[0] + (x[1] - x[0])/2
        center_y = (y2[0] + y1[0]) / 2
        #print(center_x, center_y)
        
        ax.text(center_x, center_y, row['VALUE'], horizontalalignment='center', verticalalignment='center')
    

    BTW:

    Meanwhile I realized that this type of chart can be called gantt and using this word in Google I found some interesting results with barh or broken_barh but examples needed to convert time to number of days or seconds and make more other calculations.

    See some articles - but they may need to login to portal.

    Gantt charts with Python’s Matplotlib | by Thiago Carvalho | Towards Data Science

    enter image description here

    Full code: https://gist.github.com/Thiagobc23/ad0f228dd8a6b1c9a9e148f17de5b4b0

    Create an Advanced Gantt Chart in Python | by Abhijith Chandradas | Geek Culture | Medium