Search code examples
pythonpandasgeopy

Geopy error: GeocoderServiceError: HTTP Error 500: Internal Server Error using pandas apply function with str concat


Working function (see code Python Pandas 'apply' returns series; can't convert to dataframe) has stopped working. Only difference is I'm passing it a string concatenation.

# Get geocode, return LAT and LON
def locate(x):
    geolocator = Nominatim()
    print("'" + x + "'")
    location = geolocator.geocode(x)  # Get geocode
    print(location)
    lat = location.latitude
    lon = location.longitude
    try:
        #Get geocode
        location = geolocator.geocode(x, timeout=8, exactly_one=True)
        lat = location.latitude
        lon = location.longitude
    except:
        #didn't work for some reason that I really don't care about
        lat = np.nan
        lon = np.nan
        print(lat,lon)
    return pd.Series([lat,  lon])

This works

In[4] locate('MOSCOW   123098 RUSSIA')
'MOSCOW   123098 RUSSIA'
Москва, Центральный административный округ, Москва, ЦФО, Россия
Out[4]:
0    55.751633
1    37.618704
dtype: float64

But this does not:

df_addr[['LAT','LON']] =  df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + \
                                                          df_addr['PROVINCE'] + ' ' + \
                                                          df_addr['STATE'] + ' ' + \
                                                          df_addr['ZIP_CODE'] + ' ' + \
                                                          df_addr['COUNTRY'])) # Geocode it!

I see the function echoing the correct input strings:

0                 'INNSBRUCK    AUSTRIA'
1           'BERN   CH-3001 SWITZERLAND'
2                 'INNSBRUCK    AUSTRIA'
3               'MOSCOW   123098 RUSSIA'
4               'MOSCOW   123098 RUSSIA'
5              'FREDERICK  MD 21702 USA'

Removing the try/except I get the following fugly exception info

.
.
99    'GLASGOW LANARK  G20 9NB SCOTLAND'
dtype: object
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    131         try:
--> 132             page = requester(url, timeout=(timeout or self.timeout), **kwargs)
    133         except Exception as error: # pylint: disable=W0703

C:\Users\gn\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault)
    152         opener = _opener
--> 153     return opener.open(url, data, timeout)
    154 

C:\Users\gn\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    460             meth = getattr(processor, meth_name)
--> 461             response = meth(req, response)
    462 

C:\Users\gn\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    570             response = self.parent.error(
--> 571                 'http', request, response, code, msg, hdrs)
    572 

C:\Users\gn\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    498             args = (dict, 'default', 'http_error_default') + orig_args
--> 499             return self._call_chain(*args)
    500 

C:\Users\gn\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    432             func = getattr(handler, meth_name)
--> 433             result = func(*args)
    434             if result is not None:

C:\Users\gn\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
    578     def http_error_default(self, req, fp, code, msg, hdrs):
--> 579         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    580 

HTTPError: HTTP Error 500: Internal Server Error

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    146                 try:
--> 147                     raise ERROR_CODE_MAP[code](message)
    148                 except KeyError:

KeyError: 500

During handling of the above exception, another exception occurred:

GeocoderServiceError                      Traceback (most recent call last)
<ipython-input-6-7412c2e27dd8> in <module>()
----> 1 df_addr[['LAT','LON']] =  df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' +                                                           df_addr['PROVINCE'] + ' ' +                                                           df_addr['STATE'] + ' ' +                                                           df_addr['ZIP_CODE'] + ' ' +                                                           df_addr['COUNTRY'])) # Geocode it!
      2 df_addr.head()

<ipython-input-3-d957ac2e2e2e> in locate(x)
      3     geolocator = Nominatim()
      4     print("'" + x + "'")
----> 5     location = geolocator.geocode(x,timeout=20)  # Get geocode
      6     print(location)
      7     lat = location.latitude

C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\osm.py in geocode(self, query, exactly_one, timeout, addressdetails, language, geometry)
    190         logger.debug("%s.geocode: %s", self.__class__.__name__, url)
    191         return self._parse_json(
--> 192             self._call_geocoder(url, timeout=timeout), exactly_one
    193         )
    194 

C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    147                     raise ERROR_CODE_MAP[code](message)
    148                 except KeyError:
--> 149                     raise GeocoderServiceError(message)
    150             elif isinstance(error, URLError):
    151                 if "timed out" in message:

GeocoderServiceError: HTTP Error 500: Internal Server Error

Am in over my head. Updated all libraries, but no change in the problem.

Thanks in advance


Solution

  • What you're doing is a little perverse to be honest, you're calling apply on a series and then trying to construct a str from lots of columns, this is the wrong way to go about this, you can call apply on the df and pass axis=1 so that the row is passed and either access each column in a lambda func and pass them to locate or in locate extract each column value, or just create a series from the concatenation of all the columns and call apply on this:

    df_addr[['LAT','LON']] = (df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']).apply(locate)
    

    The above should work I believe.