I am working with an Excel workbook in plotly dash and I need to access the dataframe it returns so I can use it as an input to another function, I'm following this tutorial - https://dash.plotly.com/dash-core-components/upload
I've tried a couple of approaches, per this solution here - Is it possible to upload a csv file in Dash and also store it as a pandas DataFrame?
but neither are working. When I set df
as a global variable, which i also know is not good practice, I'm getting an error in the app that it is not defined NameError: name 'df' is not defined
I've also tried to pass the df
variable between the functions but am unclear on how to access it when the inputs to the function parse_contents
are all coming from the dash html component.
Here is my current code, you should be able to execute it with any excel workbook.
import base64
import datetime
import io
import dash
from dash.dependencies import Input, Output, State
from dash import dcc, html, dash_table
import pandas as pd
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-data-upload'),
])
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
global df #define data frame as global
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assume that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assume that the user uploaded an excel file
print(io.BytesIO(decoded))
workbook_xl = pd.ExcelFile(io.BytesIO(decoded))
df = pd.read_excel(workbook_xl, sheet_name=0)
# print(df)
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
df.to_dict('records'),
[{'name': i, 'id': i} for i in df.columns]
),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
]), df
@app.callback(Output('output-data-upload', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
print(df)
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
if __name__ == '__main__':
app.run_server(debug=True)
After tireless googling and reading message board posts I realized this is basically impossible, at least the way I was thinking about it initially.
Although it may be technically possible to pass a dataframe between the functions is is very difficult and highly discourages in this framework as it needs to interact heavily with javascript on the front end.
The best way to achieve the result of passing a dataframe in Dash is using the dcc.Store
component which stores the data as JSON in the browser - https://dash.plotly.com/dash-core-components/store
You can then return
the data in a html.Div
and pass it to a call back later in the code. To achieve this I made a few changes to my code. First adding a dcc.Store
component in the app layout:
app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
dcc.Store(id='stored-data', storage_type='session'),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-div'),
html.Div(id='output-datatable'),
])
Next you should return
the stored-data
in a html.Div
from the parse_contents()
fuction:
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
data=df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in df.columns],
page_size=15
),
dcc.Store(id='stored-data', data=df.to_dict('records')),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
Finally, write a new callback that will pass the new data as a JSON object and convert it to a dataframe there:
@app.callback(Output('output-div', 'children'),
Input('stored-data','data'))
def make_graphs(data):
# bar_fig = px.bar(data, x=data['variable'], y=data['value'])
print(data)
df_agg = pd.DataFrame(data)
print(df_agg)
return #dcc.Graph(figure=bar_fig)
Full working code here -
import base64
import datetime
import io
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import plotly.express as px
import pandas as pd
from read_workbook import *
suppress_callback_exceptions=True
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([ # this code section taken from Dash docs https://dash.plotly.com/dash-core-components/upload
dcc.Store(id='stored-data', storage_type='session'),
dcc.Upload(
id='upload-data',
children=html.Div([
'Drag and Drop or ',
html.A('Select Files')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
# Allow multiple files to be uploaded
multiple=True
),
html.Div(id='output-div'),
html.Div(id='output-datatable'),
])
def parse_contents(contents, filename, date):
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
workbook_xl = pd.ExcelFile(io.BytesIO(decoded))
#aggregates all months data into a single data frame
def get_all_months(workbook_xl):
months = ['July', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'June']
xl_file = pd.ExcelFile('data/DP Beh Data 2022-23.xlsm')
months_data = []
for month in months:
months_data.append(get_month_dataframe(xl_file, month))
return pd.concat(months_data)
df = get_all_months(workbook_xl)
except Exception as e:
print(e)
return html.Div([
'There was an error processing this file.'
])
return html.Div([
html.H5(filename),
html.H6(datetime.datetime.fromtimestamp(date)),
dash_table.DataTable(
data=df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in df.columns],
page_size=15
),
dcc.Store(id='stored-data', data=df.to_dict('records')),
html.Hr(), # horizontal line
# For debugging, display the raw contents provided by the web browser
html.Div('Raw Content'),
html.Pre(contents[0:200] + '...', style={
'whiteSpace': 'pre-wrap',
'wordBreak': 'break-all'
})
])
@app.callback(Output('output-datatable', 'children'),
Input('upload-data', 'contents'),
State('upload-data', 'filename'),
State('upload-data', 'last_modified'))
def update_output(list_of_contents, list_of_names, list_of_dates):
if list_of_contents is not None:
children = [
parse_contents(c, n, d) for c, n, d in
zip(list_of_contents, list_of_names, list_of_dates)]
return children
@app.callback(Output('output-div', 'children'),
Input('stored-data','data'))
def make_graphs(data):
# bar_fig = px.bar(data, x=data['variable'], y=data['value'])
print(data)
df_agg = pd.DataFrame(data)
print(df_agg)
return #dcc.Graph(figure=bar_fig)
if __name__ == '__main__':
app.run_server(debug=True)
I hope this helps some other poor soul with this issue as I know from my search for the answer that a lot of folks have ran into this.