Search code examples
python-3.xpandasgeopandasgeopy

calculate distance from address using geopandas


I have the python 3.7 code below. I'm using it to calculate the distance in miles between a point that I specify, the target_address, and a couple points that I have in a pandas dataframe. The code uses the latitude and longitude of the street address to create a shapely point and then calculates the difference in miles. I'm getting the error message below when I run the code, and I can't understand why. The shapely Point all look pretty similar. I've provided sample data below, can anyone tell me what the issue is?

Sample data:

print(gdf)


 store                                       address   latitude   longitude  \
0      1  5101 Business Center Dr, Fairfield, CA 94534  38.216613 -122.144712   
1      2           601 Jackson St, Fairfield, CA 94533  38.248419 -122.044867   

                      geometry  
0  POINT (-122.14471 38.21661)  
1  POINT (-122.04487 38.24842)  

Code:

import geopandas as gpd
from geopy.distance import geodesic
from geopy.geocoders import Nominatim
import pandas as pd
import numpy as np



def calculate_distance(point1, point2):
    return geodesic(point1, point2,'photon').miles



target_address = '1113 Capistrano Court Fairfield, CA 94534'
max_distance = 5

#Convert the target address to a point:
target_point = gpd.tools.geocode(target_address,'photon')



#Filter the GeoDataFrame based on the distance:

gdf['distance'] = gdf['geometry'].apply(lambda x: calculate_distance(x, target_point))
filtered_df = gdf[gdf['distance'] <= max_distance]

Error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopy/point.py in __new__(cls, latitude, longitude, altitude)
    168                 try:
--> 169                     seq = iter(arg)
    170                 except TypeError:

TypeError: 'Point' object is not iterable

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
/var/folders/wd/yc91yn2s6r39blpvnppxwqy80000gp/T/ipykernel_88527/819773319.py in <module>
      1 #Filter the GeoDataFrame based on the distance:
      2 
----> 3 gdf['distance'] = gdf['geometry'].apply(lambda x: calculate_distance(x, target_point))
      4 filtered_df = gdf[gdf['distance'] <= max_distance]

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopandas/geoseries.py in apply(self, func, convert_dtype, args, **kwargs)
    572     @inherit_doc(pd.Series)
    573     def apply(self, func, convert_dtype=True, args=(), **kwargs):
--> 574         result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
    575         if isinstance(result, GeoSeries):
    576             if self.crs is not None:

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwargs)
   4355         dtype: float64
   4356         """
-> 4357         return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
   4358 
   4359     def _reduce(

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/pandas/core/apply.py in apply(self)
   1041             return self.apply_str()
   1042 
-> 1043         return self.apply_standard()
   1044 
   1045     def agg(self):

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/pandas/core/apply.py in apply_standard(self)
   1099                     values,
   1100                     f,  # type: ignore[arg-type]
-> 1101                     convert=self.convert_dtype,
   1102                 )
   1103 

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

/var/folders/wd/yc91yn2s6r39blpvnppxwqy80000gp/T/ipykernel_88527/819773319.py in <lambda>(x)
      1 #Filter the GeoDataFrame based on the distance:
      2 
----> 3 gdf['distance'] = gdf['geometry'].apply(lambda x: calculate_distance(x, target_point))
      4 filtered_df = gdf[gdf['distance'] <= max_distance]

/var/folders/wd/yc91yn2s6r39blpvnppxwqy80000gp/T/ipykernel_88527/442241440.py in calculate_distance(point1, point2)
      2 
      3 def calculate_distance(point1, point2):
----> 4     return geodesic(point1, point2,'photon').miles

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopy/distance.py in __init__(self, *args, **kwargs)
    538         self.set_ellipsoid(kwargs.pop('ellipsoid', 'WGS-84'))
    539         major, minor, f = self.ELLIPSOID
--> 540         super().__init__(*args, **kwargs)
    541 
    542     def set_ellipsoid(self, ellipsoid):

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopy/distance.py in __init__(self, *args, **kwargs)
    274         elif len(args) > 1:
    275             for a, b in util.pairwise(args):
--> 276                 kilometers += self.measure(a, b)
    277 
    278         kilometers += units.kilometers(**kwargs)

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopy/distance.py in measure(self, a, b)
    554 
    555     def measure(self, a, b):
--> 556         a, b = Point(a), Point(b)
    557         _ensure_same_altitude(a, b)
    558         lat1, lon1 = a.latitude, a.longitude

~/anaconda3/envs/gpthackathon/lib/python3.7/site-packages/geopy/point.py in __new__(cls, latitude, longitude, altitude)
    170                 except TypeError:
    171                     raise TypeError(
--> 172                         "Failed to create Point instance from %r." % (arg,)
    173                     )
    174                 else:

TypeError: Failed to create Point instance from <shapely.geometry.point.Point object at 0x7f940800d690>.

Solution

  • The TypeError is legit since geodesic expect a tuple of floats and not Point objects.

    If ellipsoid is a string, it is looked up in the ELLIPSOIDS dictionary to obtain the major and minor semiaxes and the flattening. Otherwise, it should be a tuple with those values.

    There is also some other issues you need to fix. So you can try this :

    def calculate_distance(point1, point2):
        return geodesic(point1, point2).miles
    
    target_point = gpd.tools.geocode(target_address, 'photon').at[0, 'geometry']
    
    gdf['distance'] = (
        gdf['geometry'].apply(
            lambda p: calculate_distance((p.y, p.x), (target_point.y, target_point.x)))
    )
    
    filtered_df = gdf[gdf['distance'] <= max_distance]
    

    Output :

    print(filtered_df)
    
       store                                       address  latitude   longitude                     geometry  distance
    0      1  5101 Business Center Dr, Fairfield, CA 94534 38.216613 -122.144712  POINT (-122.14471 38.21661)  2.373388