Search code examples
pythonpandasfunctionparameter-passingplotly-dash

Pass Pandas Dataframe between functions using Upload component in Plotly Dash


I am working with an Excel workbook in plotly dash and I need to access the dataframe it returns so I can use it as an input to another function, I'm following this tutorial - https://dash.plotly.com/dash-core-components/upload

I've tried a couple of approaches, per this solution here - Is it possible to upload a csv file in Dash and also store it as a pandas DataFrame?

but neither are working. When I set df as a global variable, which i also know is not good practice, I'm getting an error in the app that it is not defined NameError: name 'df' is not defined

I've also tried to pass the df variable between the functions but am unclear on how to access it when the inputs to the function parse_contents are all coming from the dash html component.

Here is my current code, you should be able to execute it with any excel workbook.

import base64
import datetime
import io

import dash
from dash.dependencies import Input, Output, State
from dash import dcc, html, dash_table

import pandas as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Upload(
        id='upload-data',
        children=html.Div([
            'Drag and Drop or ',
            html.A('Select Files')
        ]),
        style={
            'width': '100%',
            'height': '60px',
            'lineHeight': '60px',
            'borderWidth': '1px',
            'borderStyle': 'dashed',
            'borderRadius': '5px',
            'textAlign': 'center',
            'margin': '10px'
        },
        # Allow multiple files to be uploaded
        multiple=True
    ),
    html.Div(id='output-data-upload'),
])

def parse_contents(contents, filename, date):
    content_type, content_string = contents.split(',')

    global df #define data frame as global

    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            # Assume that the user uploaded an excel file
            print(io.BytesIO(decoded))
            workbook_xl = pd.ExcelFile(io.BytesIO(decoded))
            df = pd.read_excel(workbook_xl, sheet_name=0)
            # print(df)
    except Exception as e:
        print(e)
        return html.Div([
            'There was an error processing this file.'
        ])

    return html.Div([
        html.H5(filename),
        html.H6(datetime.datetime.fromtimestamp(date)),

        dash_table.DataTable(
            df.to_dict('records'),
            [{'name': i, 'id': i} for i in df.columns]
        ),

        html.Hr(),  # horizontal line

        # For debugging, display the raw contents provided by the web browser
        html.Div('Raw Content'),
        html.Pre(contents[0:200] + '...', style={
            'whiteSpace': 'pre-wrap',
            'wordBreak': 'break-all'
        })
    ]), df

@app.callback(Output('output-data-upload', 'children'),
              Input('upload-data', 'contents'),
              State('upload-data', 'filename'),
              State('upload-data', 'last_modified'))

def update_output(list_of_contents, list_of_names, list_of_dates):
    print(df)
    if list_of_contents is not None:
        children = [
            parse_contents(c, n, d) for c, n, d in
            zip(list_of_contents, list_of_names, list_of_dates)]
        return children

if __name__ == '__main__':
    app.run_server(debug=True)

Solution

  • After tireless googling and reading message board posts I realized this is basically impossible, at least the way I was thinking about it initially.

    Although it may be technically possible to pass a dataframe between the functions is is very difficult and highly discourages in this framework as it needs to interact heavily with javascript on the front end.

    The best way to achieve the result of passing a dataframe in Dash is using the dcc.Store component which stores the data as JSON in the browser - https://dash.plotly.com/dash-core-components/store

    You can then return the data in a html.Div and pass it to a call back later in the code. To achieve this I made a few changes to my code. First adding a dcc.Store component in the app layout:

    app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
        dcc.Store(id='stored-data', storage_type='session'),
        dcc.Upload(
            id='upload-data',
            children=html.Div([
                'Drag and Drop or ',
                html.A('Select Files')
            ]),
            style={
                'width': '100%',
                'height': '60px',
                'lineHeight': '60px',
                'borderWidth': '1px',
                'borderStyle': 'dashed',
                'borderRadius': '5px',
                'textAlign': 'center',
                'margin': '10px'
            },
            # Allow multiple files to be uploaded
            multiple=True
        ),
        html.Div(id='output-div'),
        html.Div(id='output-datatable'),
    ])
    

    Next you should return the stored-data in a html.Div from the parse_contents() fuction:

        return html.Div([
            html.H5(filename),
            html.H6(datetime.datetime.fromtimestamp(date)),
            
            dash_table.DataTable(
                data=df.to_dict('records'),
                columns=[{'name': i, 'id': i} for i in df.columns],
                page_size=15
            ),
            dcc.Store(id='stored-data', data=df.to_dict('records')),
            
            html.Hr(),  # horizontal line
    
            # For debugging, display the raw contents provided by the web browser
            html.Div('Raw Content'),
            html.Pre(contents[0:200] + '...', style={
                'whiteSpace': 'pre-wrap',
                'wordBreak': 'break-all'
            })
        ])
    

    Finally, write a new callback that will pass the new data as a JSON object and convert it to a dataframe there:

    @app.callback(Output('output-div', 'children'),
                  Input('stored-data','data'))
    
    def make_graphs(data):
        # bar_fig = px.bar(data, x=data['variable'], y=data['value'])
        print(data)
        df_agg = pd.DataFrame(data)
        print(df_agg)
        return #dcc.Graph(figure=bar_fig)
    

    Full working code here -

    import base64
    import datetime
    import io
    
    import dash
    from dash.dependencies import Input, Output, State
    import dash_core_components as dcc
    import dash_html_components as html
    import dash_table
    import plotly.express as px
    
    import pandas as pd
    from read_workbook import *
    
    suppress_callback_exceptions=True
    
    external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
    
    app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
    
    app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
        dcc.Store(id='stored-data', storage_type='session'),
        dcc.Upload(
            id='upload-data',
            children=html.Div([
                'Drag and Drop or ',
                html.A('Select Files')
            ]),
            style={
                'width': '100%',
                'height': '60px',
                'lineHeight': '60px',
                'borderWidth': '1px',
                'borderStyle': 'dashed',
                'borderRadius': '5px',
                'textAlign': 'center',
                'margin': '10px'
            },
            # Allow multiple files to be uploaded
            multiple=True
        ),
        html.Div(id='output-div'),
        html.Div(id='output-datatable'),
    ])
    
    def parse_contents(contents, filename, date):
        content_type, content_string = contents.split(',')
        
        decoded = base64.b64decode(content_string)
        try:
            workbook_xl = pd.ExcelFile(io.BytesIO(decoded))
    
            #aggregates all months data into a single data frame
            def get_all_months(workbook_xl):
                months = ['July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'June']
                xl_file = pd.ExcelFile('data/DP Beh Data 2022-23.xlsm')
                months_data = []
                for month in months:
                    months_data.append(get_month_dataframe(xl_file, month))
                return pd.concat(months_data)
            
            df = get_all_months(workbook_xl)
        
        except Exception as e:
            print(e)
            return html.Div([
                'There was an error processing this file.'
            ])
    
        return html.Div([
            html.H5(filename),
            html.H6(datetime.datetime.fromtimestamp(date)),
            
            dash_table.DataTable(
                data=df.to_dict('records'),
                columns=[{'name': i, 'id': i} for i in df.columns],
                page_size=15
            ),
            dcc.Store(id='stored-data', data=df.to_dict('records')),
            
            html.Hr(),  # horizontal line
    
            # For debugging, display the raw contents provided by the web browser
            html.Div('Raw Content'),
            html.Pre(contents[0:200] + '...', style={
                'whiteSpace': 'pre-wrap',
                'wordBreak': 'break-all'
            })
        ])
    
    
    @app.callback(Output('output-datatable', 'children'),
                  Input('upload-data', 'contents'),
                  State('upload-data', 'filename'),
                  State('upload-data', 'last_modified'))
    
    def update_output(list_of_contents, list_of_names, list_of_dates):
        
        if list_of_contents is not None:
            children = [
                parse_contents(c, n, d) for c, n, d in
                zip(list_of_contents, list_of_names, list_of_dates)]
            return children
    
    
    @app.callback(Output('output-div', 'children'),
                  Input('stored-data','data'))
    
    def make_graphs(data):
        # bar_fig = px.bar(data, x=data['variable'], y=data['value'])
        print(data)
        df_agg = pd.DataFrame(data)
        print(df_agg)
        return #dcc.Graph(figure=bar_fig)
        
    if __name__ == '__main__':
        app.run_server(debug=True)
    

    I hope this helps some other poor soul with this issue as I know from my search for the answer that a lot of folks have ran into this.