Search code examples
pythonweb-scraping

KeyError: 'response' how to solve this error while scraping data?


I'm trying to scrape data from foursquare.
The result of this code:

lat = 60.172667
lng = 24.932009
radius = 500
LIMIT = 5

headers = {
    "Accept": "application/json",
    "Authorization": "APIKEY"
}

url = "https://api.foursquare.com/v3/places/nearby?ll={},{}&radius={}&limit={}"
url = url.format(lat, lng, radius, LIMIT)

results = requests.request("GET", url, headers=headers)

is something like:

{'results': [{'fsq_id': '4adcdb23f964a520dc6021e3',
   'categories': [{'id': 10004,
     'name': 'Art Gallery',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/artgallery_',
      'suffix': '.png'}}],
   'chains': [],
   'distance': 81,
   'geocodes': {'main': {'latitude': 60.172127213487165,
     'longitude': 24.93101423081712}},
   'location': {'address': 'Nervanderinkatu 3',

Then I tried to build function:

    def getNearbyVenues(names, latitudes, longitudes, radius=500):
        headers = {
            "Accept": "application/json",
            "Authorization": "f#######Z3CJo="
        }

        URL = "https://api.foursquare.com/v3/places/search?ll={},{}&radius={}&limit={}"

        venues_list = []
        for name, lat, lng in zip(names, latitudes, longitudes):
            print(name)

            url = URL.format(lat, lng, radius, LIMIT)
            results = requests.request("GET", url, headers=headers).json()["response"]['groups'][0]['items']
            venues_list.append([(
                name,
                lat,
                lng,
                v['venue']['name'],
                v['venue']['location']['lat'],
                v['venue']['location']['lng'],
                v['venue']['categories'][0]['name']) for v in results])


            nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
            nearby_venues.columns = ['Neighborhood',
                                     'Neighborhood Latitude',
                                     'Neighborhood Longitude',
                                     'Venue',
                                     'Venue Latitude',
                                     'Venue Longitude',
                                     'Venue Category']
        return(nearby_venues)

and I'm getting KeyError: 'response'. I tried to remove ["response"]['groups'][0]['items'] then I started getting a string indices must be integers error.


Solution

  • here is one way to achieve this. I think the main confusion here was how to parse the json.

    import pandas as pd
    import requests
    import json
    # define the function
    def getNearbyVenues(names, latitudes, longitudes, radius=500,limit=5):
    
        URL= "https://api.foursquare.com/v3/places/nearby?ll={},{}&radius={}&limit={}"
        headers = {
            "Accept": "application/json",
            "Authorization": "your API key here"
        }
    
        df_list=[]
        
        for name, lat, lng in zip(names, latitudes, longitudes):
            url = URL.format(lat, lng, radius, LIMIT)
            results = requests.request("GET", url, headers=headers).json()
            
            for each_result in results['results']: # filter the result based on JSON identification
                result={}
                result['Neighborhood']=name
                result['Neighborhood Latitude']=lat
                result['Neighborhood Longitude']=lng
                result['Name']=each_result['name']
                result['Venue Latitude']=each_result['geocodes']['main']['latitude']
                result['Venue Longitude']=each_result['geocodes']['main']['longitude']
                result['Locality']=each_result['location']['locality']
                result['Category_Names']=[each_name['name'] for each_name in each_result['categories']]
                df_list.append(result.copy())
        return pd.DataFrame(df_list) # return dataframe
    

    the call the function like this

    names=['test_locations']
    latitudes = [60.172667]
    longitudes = [24.932009]
    radius = 500
    LIMIT = 5
    # call the function
    df_result=getNearbyVenues(names,latitudes,longitudes)
    
    

    enter image description here