I have a pandas dataframe that contains latitude and longitude and am trying to create a function to take the coordinates and determine what state each location is in. I am using geopy to obtain location information from the coordinates.
Sample data:
If I wanted just one location, the code would be as follows:
geolocator = Nominatim(user_agent='geoapiExercises')
latitude = '38.0525'
longitude = '-85.9103'
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
However, I have around 10,000 coordinates that I need to find the state for. I have tried my hand at iterating over the dataframe using a for loop, but I'll admit I'm not the best at it. My current function returns a ValueError.
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
df = pd.read_csv('filepath.csv')
us_df = df.loc[df['country_name'] == 'United States of America']
def state(df):
for row in df:
# get lat & long
latitude = str(df['latitude'])
longitude = str(df['longitude'])
geolocator = Nominatim(user_agent='geoapiExercises')
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
df['state'] = state
state(us_df)
ValueError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:350, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
349 try:
--> 350 lat, lon = self._coerce_point_to_string(query).split(',')
351 except ValueError:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/base.py:300, in Geocoder._coerce_point_to_string(self, point, output_format)
299 if not isinstance(point, Point):
--> 300 point = Point(point)
302 # Altitude is silently dropped.
303 #
304 # Geocoding services (almost?) always consider only lat and lon
(...)
307 # though, because PoIs are assumed to span the whole
308 # altitude axis (i.e. not just the 0km plane).
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/point.py:166, in Point.__new__(cls, latitude, longitude, altitude)
165 elif isinstance(arg, str):
--> 166 return cls.from_string(arg)
167 else:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/point.py:457, in Point.from_string(cls, string)
456 else:
--> 457 raise ValueError(
458 "Failed to create Point instance from string: unknown format."
459 )
ValueError: Failed to create Point instance from string: unknown format.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[15], line 17
14 state = address.get('state', '')
15 df['state'] = state
---> 17 city_state(us_df)
Cell In[15], line 12, in city_state(df)
9 longitude = str(df['longitude'])
11 geolocator = Nominatim(user_agent='geoapiExercises')
---> 12 location = geolocator.reverse(latitude + ',' + longitude)
13 address = location.raw['address']
14 state = address.get('state', '')
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:352, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
350 lat, lon = self._coerce_point_to_string(query).split(',')
351 except ValueError:
--> 352 raise ValueError("Must be a coordinate pair or Point")
353 params = {
354 'lat': lat,
355 'lon': lon,
356 'format': 'json',
357 }
358 if language:
ValueError: Must be a coordinate pair or Point
It says the value must be a coordinate, so I must not be iterating correctly. Any help is appreciated.
I am using Python 3.10
Edit: When using the iterrows() solution below, a TimeoutError is returned:
TimeoutError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1282, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1281 """Send a complete request to the server."""
-> 1282 self._send_request(method, url, body, headers, encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1328, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1327 body = _encode(body, 'body')
-> 1328 self.endheaders(body, encode_chunked=encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1277, in HTTPConnection.endheaders(self, message_body, encode_chunked)
1276 raise CannotSendHeader()
-> 1277 self._send_output(message_body, encode_chunked=encode_chunked)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1037, in HTTPConnection._send_output(self, message_body, encode_chunked)
1036 del self._buffer[:]
-> 1037 self.send(msg)
1039 if message_body is not None:
1040
1041 # create a consistent interface to message_body
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:975, in HTTPConnection.send(self, data)
974 if self.auto_open:
--> 975 self.connect()
976 else:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/http/client.py:1454, in HTTPSConnection.connect(self)
1452 server_hostname = self.host
-> 1454 self.sock = self._context.wrap_socket(self.sock,
1455 server_hostname=server_hostname)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:513, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
507 def wrap_socket(self, sock, server_side=False,
508 do_handshake_on_connect=True,
509 suppress_ragged_eofs=True,
510 server_hostname=None, session=None):
511 # SSLSocket class handles server_hostname encoding before it calls
512 # ctx._wrap_socket()
--> 513 return self.sslsocket_class._create(
514 sock=sock,
515 server_side=server_side,
516 do_handshake_on_connect=do_handshake_on_connect,
517 suppress_ragged_eofs=suppress_ragged_eofs,
518 server_hostname=server_hostname,
519 context=self,
520 session=session
521 )
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:1071, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1070 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1071 self.do_handshake()
1072 except (OSError, ValueError):
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/ssl.py:1342, in SSLSocket.do_handshake(self, block)
1341 self.settimeout(None)
-> 1342 self._sslobj.do_handshake()
1343 finally:
TimeoutError: _ssl.c:980: The handshake operation timed out
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:297, in URLLibAdapter.get_text(self, url, timeout, headers)
296 try:
--> 297 page = self.urlopen(req, timeout=timeout)
298 except Exception as error:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout)
518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 519 response = self._open(req, data)
521 # post-process response
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:536, in OpenerDirector._open(self, req, data)
535 protocol = req.type
--> 536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1391, in HTTPSHandler.https_open(self, req)
1390 def https_open(self, req):
-> 1391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1350 except OSError as err: # timeout error
-> 1351 raise URLError(err)
1352 r = h.getresponse()
URLError: <urlopen error _ssl.c:980: The handshake operation timed out>
During handling of the above exception, another exception occurred:
GeocoderTimedOut Traceback (most recent call last)
Cell In[16], line 17
14 state = address.get('state', '')
15 df.at[index, 'state'] = state
---> 17 city_state(us_df)
Cell In[16], line 12, in city_state(df)
9 longitude = str(row['longitude'])
11 geolocator = Nominatim(user_agent='geoapiExercises')
---> 12 location = geolocator.reverse(latitude + ',' + longitude)
13 address = location.raw['address']
14 state = address.get('state', '')
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/nominatim.py:372, in Nominatim.reverse(self, query, exactly_one, timeout, language, addressdetails, zoom, namedetails)
370 logger.debug("%s.reverse: %s", self.__class__.__name__, url)
371 callback = partial(self._parse_json, exactly_one=exactly_one)
--> 372 return self._call_geocoder(url, callback, timeout=timeout)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/geocoders/base.py:368, in Geocoder._call_geocoder(self, url, callback, timeout, is_json, headers)
366 try:
367 if is_json:
--> 368 result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
369 else:
370 result = self.adapter.get_text(url, timeout=timeout, headers=req_headers)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:286, in URLLibAdapter.get_json(self, url, timeout, headers)
285 def get_json(self, url, *, timeout, headers):
--> 286 text = self.get_text(url, timeout=timeout, headers=headers)
287 try:
288 return json.loads(text)
File ~/opt/anaconda3/envs/Python3.10/lib/python3.10/site-packages/geopy/adapters.py:315, in URLLibAdapter.get_text(self, url, timeout, headers)
313 elif isinstance(error, URLError):
314 if "timed out" in message:
--> 315 raise GeocoderTimedOut("Service timed out")
316 elif "unreachable" in message:
317 raise GeocoderUnavailable("Service not available")
GeocoderTimedOut: Service timed out
Not having a sample of your data makes it hard to troubleshoot, but I think the issue is with the way you are accessing the values of the latitude and longitude in the dataframe.
You are using for row in df
and you are iterating over the rows of the dataframe as series, not the values of the latitude and longitude columns.
So, to get the values of the latitude and longitude for each row, you should use the iterrows()
method on the dataframe.
So, I think you should try this instead (but as I stated in my comment, data would be useful)
def state(df):
for index, row in df.iterrows():
# get lat & long
latitude = str(row['latitude'])
longitude = str(row['longitude'])
geolocator = Nominatim(user_agent='geoapiExercises')
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
df.at[index, 'state'] = state
If it times out use this:
def state(df):
for index, row in df.iterrows():
# get lat & long
latitude = str(row['latitude'])
longitude = str(row['longitude'])
geolocator = Nominatim(user_agent='geoapiExercises', timeout=10)
location = geolocator.reverse(latitude + ',' + longitude)
address = location.raw['address']
state = address.get('state', '')
df.at[index, 'state'] = state
where you can adjust the timeout value