Search code examples
pythonplotly-dash

How to show size of dataset in Dash?


I have such a dataset. User can choose certain time interval via slider to adjust time interval in minutes. I would like display the size of dataset at the beginning of web page. If user changes the time interval, the size of dataset should change dynamically as well as showing how many datapoints are removed dynamically.

import random
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
from dash import Dash, html, dcc, Input, Output,dash_table

df = pd.DataFrame({'DATE_TIME':pd.date_range('2022-11-01', '2022-11-06 23:00:00',freq='20min'),
                   'ID':[random.randrange(1, 3) for n in range(430)]})
df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
df['INSPECTION'] = df['DATE_TIME'].dt.day

df['MODE'] = np.select([df['INSPECTION']==1, df['INSPECTION'].isin([2,3])], ['A', 'B'], 'C')
df['TIME'] = df['DATE_TIME'].dt.time
df['TIME'] = df['TIME'].astype('str')
df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)



def to_day_period(s):
    bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
    labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']

    return pd.cut(
        pd.to_timedelta(s),
        bins=list(map(pd.Timedelta, bins)),
        labels=labels, right=False, ordered=False
    )


df['TIME_OF_DAY'] = to_day_period(df['TIME'])
app = Dash(__name__)

app.layout = html.Div([
    html.H4('Interactive Scatter Plot'),
    html.P("Select ID:"),
    dcc.Dropdown(df.ID.unique(), id='pandas-dropdown-1'),  # for choosing ID,
    html.P("Filter by time interval during nighttime (18:00-6:00):"),
    dcc.RangeSlider(
        id='range-slider-night',
        min=0, max=600, step=10,
        marks={0: '0', 50: '50', 100: '100', 150: '150', 200: '200', 250: '250', 300: '300', 350: '350', 400: '400',
               450: '450', 500: '500', 550: '550', 600: '600'},
        value=[0, 600]
    ),
    html.P("Filter by time interval during daytime (6:00-18:00):"),
    dcc.RangeSlider(
        id='range-slider-morning',
        min=0, max=600, step=10,
        marks={0: '0', 50: '50', 100: '100', 150: '150', 200: '200', 250: '250', 300: '300', 350: '350', 400: '400',
               450: '450', 500: '500', 550: '550', 600: '600'},
        value=[0, 600]
    ),
    dcc.Graph(id="scatter-plot", style={'width': '130vh', 'height': '80vh'}),
    html.Div(id='dd-output-container')
])


@app.callback(
    Output("scatter-plot", "figure"),
    Input("pandas-dropdown-1", "value"),
    Input("range-slider-night", "value"),
    Input("range-slider-morning", "value"),
    prevent_initial_call=True)

def update_lineplot(value, slider_range_night, slider_range_morning):
    low_night, high_night = slider_range_night
    low_morning, high_morning = slider_range_morning
    df_night = df.query("ID == @value & TIME_OF_DAY == 'Night' & TIMEINTERVAL >= @low_night & TIMEINTERVAL < @high_night").copy()
    df_morning = df.query("ID == @value & TIME_OF_DAY == 'Morning' & TIMEINTERVAL >= @low_morning & TIMEINTERVAL < @high_morning").copy()
    df1 = pd.concat([df_night, df_morning], axis=0).sort_values(['DATE_TIME'])

    if df1.shape[0] != 0:
        fig = px.line(df1, x="DATE_TIME", y=["VALUE1", "VALUE2"],
                         facet_col='INSPECTION',
                         facet_col_wrap=2,
                         symbol='MODE',hover_data=['TIMEINTERVAL'],
                         facet_row_spacing=0.1,
                         facet_col_spacing=0.09)

        fig.update_xaxes(matches=None, showticklabels=True)

        return fig
    else:
        return no_update

app.run_server(debug=True, use_reloader=False)




I know that I can do such a task in Flask like in this link. But how can I adapt this solution in Dash?


Solution

  • Please, try it out and tell me if that what you are trying to do:

    import random
    import pandas as pd
    import numpy as np
    import plotly.express as px
    from plotly.subplots import make_subplots
    from dash import Dash, html, dcc, Input, Output,dash_table, no_update
    
    df = pd.DataFrame({'DATE_TIME':pd.date_range('2022-11-01', '2022-11-06 23:00:00',freq='20min'),
                       'ID':[random.randrange(1, 3) for n in range(430)]})
    df['VALUE1'] = [random.uniform(110, 160) for n in range(430)]
    df['VALUE2'] = [random.uniform(50, 80) for n in range(430)]
    df['INSPECTION'] = df['DATE_TIME'].dt.day
    
    df['MODE'] = np.select([df['INSPECTION']==1, df['INSPECTION'].isin([2,3])], ['A', 'B'], 'C')
    df['TIME'] = df['DATE_TIME'].dt.time
    df['TIME'] = df['TIME'].astype('str')
    df['TIMEINTERVAL'] = df.DATE_TIME.diff().astype('timedelta64[m]')
    df['TIMEINTERVAL'] = df['TIMEINTERVAL'].fillna(0)
    
    
    
    def to_day_period(s):
        bins = ['0', '06:00:00', '13:00:00', '18:00:00', '23:00:00', '24:00:00']
        labels = ['Nighttime', 'Daytime', 'Daytime', 'Nighttime', 'Nighttime']
    
        return pd.cut(
            pd.to_timedelta(s),
            bins=list(map(pd.Timedelta, bins)),
            labels=labels, right=False, ordered=False
        )
    
    
    df['TIME_OF_DAY'] = to_day_period(df['TIME'])
    app = Dash(__name__)
    
    app.layout = html.Div([
        html.H4('Interactive Scatter Plot'),
        html.Div('Size of dataset = '+str(len(df)),id='size',style={'whiteSpace': 'pre-wrap'}),
        html.P("Select ID:"),
        dcc.Dropdown(df.ID.unique(), id='pandas-dropdown-1'),  # for choosing ID,
        html.P("Filter by time interval during nighttime (18:00-6:00):"),
        dcc.RangeSlider(
            id='range-slider-night',
            min=0, max=600, step=10,
            marks={0: '0', 50: '50', 100: '100', 150: '150', 200: '200', 250: '250', 300: '300', 350: '350', 400: '400',
                   450: '450', 500: '500', 550: '550', 600: '600'},
            value=[0, 600]
        ),
        html.P("Filter by time interval during daytime (6:00-18:00):"),
        dcc.RangeSlider(
            id='range-slider-morning',
            min=0, max=600, step=10,
            marks={0: '0', 50: '50', 100: '100', 150: '150', 200: '200', 250: '250', 300: '300', 350: '350', 400: '400',
                   450: '450', 500: '500', 550: '550', 600: '600'},
            value=[0, 600]
        ),
        dcc.Graph(id="scatter-plot", style={'width': '130vh', 'height': '80vh'}),
        html.Div(id='dd-output-container')
    ])
    
    
    @app.callback(
        Output("scatter-plot", "figure"),
        Output("size", "children"),
        Input("pandas-dropdown-1", "value"),
        Input("range-slider-night", "value"),
        Input("range-slider-morning", "value"),
        prevent_initial_call=True)
    
    def update_lineplot(value, slider_range_night, slider_range_morning):
        low_night, high_night = slider_range_night
        low_morning, high_morning = slider_range_morning
        df_night = df.query("(ID == @value) & (TIME_OF_DAY == 'Nighttime') & (TIMEINTERVAL >= @low_night) & (TIMEINTERVAL < @high_night)").copy()
        df_morning = df.query("(ID == @value) & (TIME_OF_DAY == 'Daytime') & (TIMEINTERVAL >= @low_morning) & (TIMEINTERVAL < @high_morning)").copy()
        df1 = pd.concat([df_night, df_morning], axis=0).sort_values(['DATE_TIME'])
        
        
        text = f"The size of dataset = {len(df)} \n Selected points = {len(df1)}, unselected points = {len(df) - len(df1)}, \n df_night = {len(df_night)}, df_morning = {len(df_morning)}"
        
        if df1.shape[0] != 0:
            fig = px.line(df1, x="DATE_TIME", y=["VALUE1", "VALUE2"],
                             facet_col='INSPECTION',
                             facet_col_wrap=2,
                             symbol='MODE',hover_data=['TIMEINTERVAL'],
                             facet_row_spacing=0.1,
                             facet_col_spacing=0.09)
    
            fig.update_xaxes(matches=None, showticklabels=True)
    
            return fig,text
        else:
            return no_update
        
    
    app.run_server(debug=True, use_reloader=False)
    

    Output:

    enter image description here

    You will find here how I calculate the sizes ;

    text = f"The size of dataset = {len(df)} \n Selected points = {len(df1)}, unselected points = {len(df) - len(df1)}, \n df_night = {len(df_night)}, df_morning = {len(df_morning)}"