Search code examples
pythonpandascsvbokehconcatenation

Concat a dataframe's value to a dictionary's matching value


Tangent related to this post: Customize Bokeh Unemployment Example: Replacing Percentage Value

Starter code: https://docs.bokeh.org/en/latest/docs/gallery/texas.html

from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure

from bokeh.sampledata.us_counties import data as counties
counties = { code: county for code, county in counties.items() if county["state"] == "tx" }

csv data:

enter image description here

I have a dictionary of county names:
{(48, 1): {'name': 'Anderson',
  'detailed name': 'Anderson County, Texas',
  'state': 'tx'}
{(48, 3): {'name': 'Andrews',
  'detailed name': 'Andrews County, Texas',
  'state': 'tx'}

and a dataframe created from a csv file of percentage values:
 {'Anderson': 21.0,
 'Andrews': 28.0,
 'Angelina': 31.0,
 'Aransas': 24.0,
 'Archer': 11.0,
 'Armstrong': 53.0,
 'Atascosa': 27.0,
 'Austin': 30.0,
 'Bailey': 42.0,
 'Bandera': 0.0}

I am trying to merge the dataframe's percentage value on the county name in the dictionary.

from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure, show
from bokeh.sampledata.us_counties import data as counties
import csv
import pandas as pd

pharmacy_concentration = {}
with open('resources/unemployment.csv', mode = 'r') as infile:
    next(infile)
    reader = csv.reader(infile, delimiter = ',', quotechar = '"')
    for row in reader:
        name, concentration = row 
            pharmacy_concentration[name] = float(concentration)

counties = { code: county for code, county in counties.items() if county["state"] == "tx" }
counties = pd.concat(pharmacy_concentration[concentration], on='name', 
how='left', keys='concentration')

counties

I receive a keyerror showing the percentage value and can't figure out why.

Expected output:

 counties
 {(48, 1): {'name': 'Anderson',
 'detailed name': 'Anderson County, Texas',
 'state': 'tx', 'concentration': 21}

Solution

  • Credit to @Tony

    from bokeh.models import LogColorMapper
    from bokeh.palettes import Viridis256 as palette
    from bokeh.plotting import figure, show
    from bokeh.sampledata.us_counties import data as counties
    import csv
    
    pharmacy_concentration = {}
    with open('resources/unemployment.csv', mode = 'r') as infile:
        reader = [row for row in csv.reader(infile.read().splitlines())]
        for row in reader:
            try:
                county_name, concentration = row
                pharmacy_concentration[county_name] = float(concentration)
            except Exception, error:
                print error, row
    
    counties = { code: county for code, county in counties.items() if county["state"] == 
    "tx" }
    county_xs = [county["lons"] for county in counties.values()]
    county_ys = [county["lats"] for county in counties.values()]
    county_names = [county['name'] for county in counties.values()]
    # Below is the line of code I was missing to make it work
    county_pharmacy_concentration_rates = [pharmacy_concentration[counties[county] 
    ['name']] for county in counties if counties[county]['name'] in 
    pharmacy_concentration]