Search code examples
pythoncsvdictionaryjupyter-notebookbokeh

Customize Bokeh Unemployment Example: Replacing Percentage Value


starter code: https://docs.bokeh.org/en/latest/docs/gallery/texas.html

I am trying to replace the unemployment percentage with a different percentage that I have in a csv file. The csv columns are county name and concentration.

I am using the same call method for the county data as in the example. Just pulling in different data for the percentage value.

I have tried turning the csv into a dictionary to then look up the county name value and return the corresponding concentration using the same format as the starter code. I've tried inner joining, outer joining, appending. What am I missing here?

from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure

from bokeh.sampledata.us_counties import data as counties

import pandas as pd
import csv

#with open('resources/concentration.csv', mode='r') as infile:
    #reader = csv.reader(infile)
        #with open('concentration_new.csv', mode='w') as outfile:
            #writer = csv.writer(outfile)
            #mydict = {rows[0]:rows[1] for rows in reader}

#d_1_2= dict(list(counties.items()) + list(mydict.items()))

pharmacy_concentration = []
with open('resources/unemployment.csv', mode = 'r') as infile:
    reader = csv.reader(infile, delimiter = ',', quotechar = ' ') # remove 
last attribute if you dont have '"' in your csv file
    for row in reader:
        name, concentration = row 
        pharmacy_concentration[name] = concentration

counties = {
    code: county for code, county in counties.items() if county["state"] == 
"tx"
}

palette.reverse()

county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]

county_names = [county['name'] for county in counties.values()]

#this is the line I am trying to have pull the corresponding value for the correct county
#county_rates = [d_1_2['concentration'] for county in counties.values()]
color_mapper = LogColorMapper(palette=palette)

data=dict(
    x=county_xs,
    y=county_ys,
    name=county_names,
    #rate=county_rates,
   )

   TOOLS = "pan,wheel_zoom,reset,hover,save"

   p = figure(
        title="Texas Pharmacy Concentration", tools=TOOLS,
        x_axis_location=None, y_axis_location=None,
        tooltips=[
            ("Name", "@name"), ("Pharmacy Concentration", "@rate%"), 
            (" (Long, Lat)", "($x, $y)")])
            p.grid.grid_line_color = None
            p.hover.point_policy = "follow_mouse"
            p.patches('x', 'y', source=data,
            fill_color={'field': 'rate', 'transform': color_mapper},
      fill_alpha=0.7, line_color="white", line_width=0.5)

show(p)

enter image description here


Solution

  • Is is hard to speculate without knowing the exact structure of you csv file. Assuming there are just 2 columns in your csv file: county_name + concentration (no first empty column or there between) the following code may work for you:

    from bokeh.models import LogColorMapper
    from bokeh.palettes import Viridis256 as palette
    from bokeh.plotting import figure, show
    from bokeh.sampledata.us_counties import data as counties
    import csv
    
    pharmacy_concentration = {}
    with open('resources/concentration.csv', mode = 'r') as infile:
        reader = [row for row in csv.reader(infile.read().splitlines())]
        for row in reader:
            try:
                county_name, concentration = row  # add "dummy" before "county_name" if there is an empty column in the csv file
                pharmacy_concentration[county_name] = float(concentration)
            except Exception, error:
                print error, row
    
    counties = { code: county for code, county in counties.items() if county["state"] == "tx" }
    county_xs = [county["lons"] for county in counties.values()]
    county_ys = [county["lats"] for county in counties.values()]
    county_names = [county['name'] for county in counties.values()]
    county_pharmacy_concentration_rates = [pharmacy_concentration[counties[county]['name']] for county in counties if counties[county]['name'] in pharmacy_concentration]
    palette.reverse()
    color_mapper = LogColorMapper(palette = palette)
    
    data = dict(x = county_xs, y = county_ys, name = county_names, rate = county_pharmacy_concentration_rates)
    p = figure(title = "Texas Pharmacy Concentration, 2009", tools = "pan,wheel_zoom,reset,hover,save", tooltips = [("Name", "@name"), ("Pharmacy Concentration)", "@rate%"), ("(Long, Lat)", "($x, $y)")], x_axis_location = None, y_axis_location = None,)
    p.grid.grid_line_color = None
    p.hover.point_policy = "follow_mouse"
    p.patches('x', 'y', source = data, fill_color = {'field': 'rate', 'transform': color_mapper}, fill_alpha = 0.7, line_color = "white", line_width = 0.5)
    
    show(p)
    

    The result: enter image description here