I have a column called Pizza Shops in my data that has numbers by state, ranging from 10k to over a million (numbers are made up). For some reason every bubble, while it appears to be the right size, all show as the same color (red).
My code
import plotly.graph_objects as go
import pandas as pd
import os
xl_path = "path to XLSX file"
df = pd.read_excel(open(xl_path, 'rb'), sheet_name='Data')
df.head()
scale = 5000
limits = [(0,15000),(15000,50000),(50000,100000),(100000,500000),(500000,2000000)]
colors = ["red","orange","yellow","green","blue"]
df['Text'] = df['State'] + '<br>Number of Pizza Shops ' + (df['Pizza Shops']).astype(str)
fig = go.Figure()
for i in range(len(limits)):
lim = limits[i]
df_sub = df[lim[0]:lim[1]]
fig.add_trace(go.Scattergeo(
locationmode = 'USA-states',
locations=df['State Code'],
text = df_sub['Text'],
marker = dict(
size = df_sub['Pizza Shops']/scale,
color = colors[i],
line_color='rgb(40,40,40)',
line_width=0.5,
sizemode = 'area'
),
name = '{0} - {1}'.format(lim[0],lim[1])))
fig.update_layout(
title_text = '2019 US Number of Pizza Shops<br>(Click legend to toggle traces)',
showlegend = True,
geo = dict(
scope = 'usa',
landcolor = 'rgb(217, 217, 217)',
)
)
fig.show()
Sample data:
| State | State Code | Pizza Shops |
----------------------------------------
Texas TX 13256
California CA 500235
Idaho ID 4000
.... .... .... and so on
The problem is that with df_sub = df[lim[0]:lim[1]]
you are subsetting your data frame based on the row indices, and not based on the number of shops. If your data frame has less than 15,000 rows, then all the data points will fall in the first bucket and will be coloured in red.
If you want to subset your data frame based on the number of shops you should replace df_sub = df[lim[0]:lim[1]]
with df_sub = df[(df["Pizza Shops"] >= lim[0]) & (df["Pizza Shops"] < lim[1])]
.
import plotly.graph_objects as go
import pandas as pd
df = pd.DataFrame({"State": ["Texas", "California", "Idaho", "Alabama", "Arizona", "Georgia", "Washington"],
"State Code": ["TX", "CA", "ID", "AL", "AZ", "GA", "WA"],
"Pizza Shops": [12500, 25000, 75000, 250000, 1000000, 15000, 100000]})
df["Text"] = df["State"] + "<br>Number of Pizza Shops " + (df["Pizza Shops"]).astype(str)
scale = 2000
limits = [(0,15000),(15000,50000),(50000,100000),(100000,500000),(500000,2000000)]
colors = ["red", "orange", "yellow", "green", "blue"]
fig = go.Figure()
for i in range(len(limits)):
lim = limits[i]
df_sub = df[(df["Pizza Shops"] >= lim[0]) & (df["Pizza Shops"] < lim[1])]
fig.add_trace(go.Scattergeo(
locationmode="USA-states",
locations=df_sub["State Code"],
text=df_sub["Text"],
marker=dict(
size=df_sub["Pizza Shops"]/scale,
color=colors[i],
line_color="rgb(40,40,40)",
line_width=0.5,
sizemode="area"),
name="{0} - {1}".format(lim[0],lim[1])))
fig.update_layout(
title_text="2019 US Number of Pizza Shops<br>(Click legend to toggle traces)",
showlegend=True,
geo=dict(scope="usa", landcolor="rgb(217, 217, 217)")
)
fig.show()