Search code examples
pythonpandasgeopandaskeyerrorshapely

Multipoint(df['geometry']) key error from dataframe but key exist. KeyError: 13 geopandas


data source: https://catalog.data.gov/dataset/nyc-transit-subway-entrance-and-exit-data

I tried looking for a similar problem but I can't find an answer and the error does not help much. I'm kinda frustrated at this point. Thanks for the help. I'm calculating the closest distance from a point.

    df_subway = pd.read_csv('/content/drive/MyDrive/Despliegue_de_modelos/NYC_Transit_Subway_Entrance_And_Exit_Data.csv')

geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]

# Coordinate reference system :
crs = {'init': 'EPSG:4326'}

# Creating a Geographic data frame 
gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
gdf_subway_entrance_geometry

df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
df_yes_entry

from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
pts = MultiPoint(df_yes_entry['geometry']) #it fails in this line
pt = Point(gpdPoint.x, gpdPoint.y)
#[o.wkt for o in nearest_points(pt, pts)]
for o in nearest_points(pt, pts):
  print(o)

The problem is that if I do the same but with gdf_subway_entrance_geometry instead of df_yes_entry it works, but I need to do some filters!

This is the error:
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2897             try:
-> 2898                 return self._engine.get_loc(casted_key)
   2899             except KeyError as err:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 13


    The above exception was the direct cause of the following exception:
    
    KeyError                                  Traceback (most recent call last)
    
    7 frames
    
    <ipython-input-480-b6733def46e0> in <module>()
          1 from shapely.geometry import Point, MultiPoint
          2 from shapely.ops import nearest_points
    ----> 3 pts = MultiPoint(df_yes_entry['geometry'])
          4 pt = Point(gpdPoint.x, gpdPoint.y)
          5 #[o.wkt for o in nearest_points(pt, pts)]
    
    /usr/local/lib/python3.7/dist-packages/shapely/geometry/multipoint.py in __init__(self, points)
         56             pass
         57         else:
    ---> 58             self._geom, self._ndim = geos_multipoint_from_py(points)
         59 
         60     def shape_factory(self, *args):
    
    /usr/local/lib/python3.7/dist-packages/shapely/geometry/multipoint.py in geos_multipoint_from_py(ob)
        169     # add to coordinate sequence
        170     for i in range(m):
    --> 171         coords = ob[i]
        172         geom, ndims = point.geos_point_from_py(coords)
        173 
    
    /usr/local/lib/python3.7/dist-packages/geopandas/geoseries.py in __getitem__(self, key)
        606 
        607     def __getitem__(self, key):
    --> 608         return self._wrapped_pandas_method("__getitem__", key)
        609 
        610     @doc(pd.Series)
    
    /usr/local/lib/python3.7/dist-packages/geopandas/geoseries.py in _wrapped_pandas_method(self, mtd, *args, **kwargs)
        599     def _wrapped_pandas_method(self, mtd, *args, **kwargs):
        600         """Wrap a generic pandas method to ensure it returns a GeoSeries"""
    --> 601         val = getattr(super(), mtd)(*args, **kwargs)
        602         if type(val) == Series:
        603             val.__class__ = GeoSeries
    
    /usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __getitem__(self, key)
        880 
        881         elif key_is_scalar:
    --> 882             return self._get_value(key)
        883 
        884         if is_hashable(key):
    
    /usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _get_value(self, label, takeable)
        988 
        989         # Similar to Index.get_value, but we do not fall back to positional
    --> 990         loc = self.index.get_loc(label)
        991         return self.index._get_values_for_loc(self, loc, label)
        992 
    
    /usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
       2898                 return self._engine.get_loc(casted_key)
       2899             except KeyError as err:
    -> 2900                 raise KeyError(key) from err
       2901 
       2902         if tolerance is not None:
    
    KeyError: 13

I am working with colab this are my packages:

Package                       Version
----------------------------- --------------
absl-py                       0.12.0
alabaster                     0.7.12
albumentations                0.1.12
altair                        4.1.0
appdirs                       1.4.4
argcomplete                   1.12.3
argon2-cffi                   21.1.0
arviz                         0.11.4
astor                         0.8.1
astropy                       4.3.1
astunparse                    1.6.3
atari-py                      0.2.9
atomicwrites                  1.4.0
attrs                         21.2.0
audioread                     2.1.9
autograd                      1.3
Babel                         2.9.1
backcall                      0.2.0
beautifulsoup4                4.6.3
bleach                        4.1.0
blis                          0.4.1
bokeh                         2.4.0
Bottleneck                    1.3.2
branca                        0.4.2
bs4                           0.0.1
CacheControl                  0.12.6
cached-property               1.5.2
cachetools                    4.2.4
Cartopy                       0.19.0.post1
catalogue                     1.0.0
certifi                       2021.5.30
cffi                          1.14.6
cftime                        1.5.1
chardet                       3.0.4
charset-normalizer            2.0.6
clang                         5.0
click                         7.1.2
click-plugins                 1.1.1
cligj                         0.7.2
cloudpickle                   1.3.0
cmake                         3.12.0
cmdstanpy                     0.9.5
colorcet                      2.0.6
colorlover                    0.3.0
community                     1.0.0b1
contextlib2                   0.5.5
convertdate                   2.3.2
coverage                      3.7.1
coveralls                     0.5
crcmod                        1.7
cufflinks                     0.17.3
cvxopt                        1.2.7
cvxpy                         1.0.31
cycler                        0.10.0
cymem                         2.0.5
Cython                        0.29.24
daft                          0.0.4
dask                          2.12.0
datascience                   0.10.6
debugpy                       1.0.0
decorator                     4.4.2
defusedxml                    0.7.1
descartes                     1.1.0
dill                          0.3.4
distributed                   1.25.3
dlib                          19.18.0
dm-tree                       0.1.6
docopt                        0.6.2
docutils                      0.17.1
dopamine-rl                   1.0.5
earthengine-api               0.1.284
easydict                      1.9
ecos                          2.0.7.post1
editdistance                  0.5.3
en-core-web-sm                2.2.5
entrypoints                   0.3
ephem                         4.1
et-xmlfile                    1.1.0
fa2                           0.3.5
fastai                        1.0.61
fastdtw                       0.3.4
fastprogress                  1.0.0
fastrlock                     0.6
fbprophet                     0.7.1
feather-format                0.4.1
filelock                      3.3.0
Fiona                         1.8.20
firebase-admin                4.4.0
fix-yahoo-finance             0.0.22
Flask                         1.1.4
flatbuffers                   1.12
folium                        0.8.3
future                        0.16.0
gast                          0.4.0
GDAL                          2.2.2
gdown                         3.6.4
gensim                        3.6.0
geographiclib                 1.52
geopandas                     0.10.1
geopy                         1.17.0
geoviews                      1.9.2
gin-config                    0.4.0
glob2                         0.7
google                        2.0.3
google-api-core               1.26.3
google-api-python-client      1.12.8
google-auth                   1.35.0
google-auth-httplib2          0.0.4
google-auth-oauthlib          0.4.6
google-cloud-bigquery         1.21.0
google-cloud-bigquery-storage 1.1.0
google-cloud-core             1.0.3
google-cloud-datastore        1.8.0
google-cloud-firestore        1.7.0
google-cloud-language         1.2.0
google-cloud-storage          1.18.1
google-cloud-translate        1.5.0
google-colab                  1.0.0
google-pasta                  0.2.0
google-resumable-media        0.4.1
googleapis-common-protos      1.53.0
googledrivedownloader         0.4
graphviz                      0.10.1
greenlet                      1.1.2
grpcio                        1.41.0
gspread                       3.0.1
gspread-dataframe             3.0.8
gym                           0.17.3
h5py                          3.1.0
HeapDict                      1.0.1
hijri-converter               2.2.2
holidays                      0.10.5.2
holoviews                     1.14.6
html5lib                      1.0.1
htmlmin                       0.1.12
httpimport                    0.5.18
httplib2                      0.17.4
httplib2shim                  0.0.3
humanize                      0.5.1
hyperopt                      0.1.2
ideep4py                      2.0.0.post3
idna                          2.10
ImageHash                     4.2.1
imageio                       2.4.1
imagesize                     1.2.0
imbalanced-learn              0.4.3
imblearn                      0.0
imgaug                        0.2.9
importlib-metadata            4.8.1
importlib-resources           5.2.2
imutils                       0.5.4
inflect                       2.1.0
iniconfig                     1.1.1
intel-openmp                  2021.4.0
intervaltree                  2.1.0
ipykernel                     4.10.1
ipython                       5.5.0
ipython-genutils              0.2.0
ipython-sql                   0.3.9
ipywidgets                    7.6.5
itsdangerous                  1.1.0
jax                           0.2.21
jaxlib                        0.1.71+cuda111
jdcal                         1.4.1
jedi                          0.18.0
jieba                         0.42.1
Jinja2                        2.11.3
joblib                        1.0.1
jpeg4py                       0.1.4
jsonschema                    2.6.0
jupyter                       1.0.0
jupyter-client                5.3.5
jupyter-console               5.2.0
jupyter-core                  4.8.1
jupyterlab-pygments           0.1.2
jupyterlab-widgets            1.0.2
kaggle                        1.5.12
kapre                         0.3.5
keras                         2.6.0
Keras-Preprocessing           1.1.2
keras-vis                     0.4.1
kiwisolver                    1.3.2
korean-lunar-calendar         0.2.1
librosa                       0.8.1
lightgbm                      2.2.3
llvmlite                      0.34.0
lmdb                          0.99
LunarCalendar                 0.0.9
lxml                          4.2.6
Markdown                      3.3.4
MarkupSafe                    2.0.1
matplotlib                    3.2.2
matplotlib-inline             0.1.3
matplotlib-venn               0.11.6
missingno                     0.5.0
mistune                       0.8.4
mizani                        0.6.0
mkl                           2019.0
mlxtend                       0.14.0
more-itertools                8.10.0
moviepy                       0.2.3.5
mpmath                        1.2.1
msgpack                       1.0.2
multimethod                   1.6
multiprocess                  0.70.12.2
multitasking                  0.0.9
munch                         2.5.0
murmurhash                    1.0.5
music21                       5.5.0
natsort                       5.5.0
nbclient                      0.5.4
nbconvert                     5.6.1
nbformat                      5.1.3
nest-asyncio                  1.5.1
netCDF4                       1.5.7
networkx                      2.6.3
nibabel                       3.0.2
nltk                          3.2.5
notebook                      5.3.1
numba                         0.51.2
numexpr                       2.7.3
numpy                         1.19.5
nvidia-ml-py3                 7.352.0
oauth2client                  4.1.3
oauthlib                      3.1.1
okgrade                       0.4.3
opencv-contrib-python         4.1.2.30
opencv-python                 4.1.2.30
openpyxl                      2.5.9
opt-einsum                    3.3.0
osqp                          0.6.2.post0
packaging                     21.0
palettable                    3.3.0
pandas                        1.1.5
pandas-datareader             0.9.0
pandas-gbq                    0.13.3
pandas-profiling              3.1.0
pandocfilters                 1.5.0
panel                         0.12.4
param                         1.11.1
parso                         0.8.2
pathlib                       1.0.1
patsy                         0.5.2
pep517                        0.11.0
pexpect                       4.8.0
phik                          0.12.0
pickleshare                   0.7.5
Pillow                        7.1.2
pip                           21.1.3
pip-tools                     6.2.0
plac                          1.1.3
plotly                        4.4.1
plotnine                      0.6.0
pluggy                        0.7.1
pooch                         1.5.1
portpicker                    1.3.9
prefetch-generator            1.0.1
preshed                       3.0.5
prettytable                   2.2.1
progressbar2                  3.38.0
prometheus-client             0.11.0
promise                       2.3
prompt-toolkit                1.0.18
protobuf                      3.17.3
psutil                        5.4.8
psycopg2                      2.7.6.1
ptyprocess                    0.7.0
py                            1.10.0
pyarrow                       3.0.0
pyasn1                        0.4.8
pyasn1-modules                0.2.8
pycocotools                   2.0.2
pycparser                     2.20
pyct                          0.4.8
pydantic                      1.8.2
pydata-google-auth            1.2.0
pydot                         1.3.0
pydot-ng                      2.0.0
pydotplus                     2.0.2
PyDrive                       1.3.1
pyemd                         0.5.1
pyerfa                        2.0.0
pyglet                        1.5.0
Pygments                      2.6.1
pygobject                     3.26.1
pymc3                         3.11.4
PyMeeus                       0.5.11
pymongo                       3.12.0
pymystem3                     0.2.0
PyOpenGL                      3.1.5
pyparsing                     2.4.7
pyproj                        3.2.1
pyrsistent                    0.18.0
pyshp                         2.1.3
pysndfile                     1.3.8
PySocks                       1.7.1
pystan                        2.19.1.1
pytest                        3.6.4
python-apt                    0.0.0
python-chess                  0.23.11
python-dateutil               2.8.2
python-louvain                0.15
python-slugify                5.0.2
python-utils                  2.5.6
pytz                          2018.9
pyviz-comms                   2.1.0
PyWavelets                    1.1.1
PyYAML                        5.4.1
pyzmq                         22.3.0
qdldl                         0.1.5.post0
qtconsole                     5.1.1
QtPy                          1.11.2
regex                         2019.12.20
requests                      2.26.0
requests-oauthlib             1.3.0
resampy                       0.2.2
retrying                      1.3.3
rpy2                          3.4.5
rsa                           4.7.2
scikit-image                  0.16.2
scikit-learn                  0.22.2.post1
scipy                         1.7.1
screen-resolution-extra       0.0.0
scs                           2.1.4
seaborn                       0.11.2
semver                        2.13.0
Send2Trash                    1.8.0
setuptools                    57.4.0
setuptools-git                1.2
Shapely                       1.7.1
simplegeneric                 0.8.1
six                           1.15.0
sklearn                       0.0
sklearn-pandas                1.8.0
smart-open                    5.2.1
snowballstemmer               2.1.0
sortedcontainers              2.4.0
SoundFile                     0.10.3.post1
spacy                         2.2.4
Sphinx                        1.8.5
sphinxcontrib-serializinghtml 1.1.5
sphinxcontrib-websupport      1.2.4
SQLAlchemy                    1.4.25
sqlparse                      0.4.2
srsly                         1.0.5
statsmodels                   0.10.2
sympy                         1.7.1
tables                        3.4.4
tabulate                      0.8.9
tangled-up-in-unicode         0.1.0
tblib                         1.7.0
tensorboard                   2.6.0
tensorboard-data-server       0.6.1
tensorboard-plugin-wit        1.8.0
tensorflow                    2.6.0
tensorflow-datasets           4.0.1
tensorflow-estimator          2.6.0
tensorflow-gcs-config         2.6.0
tensorflow-hub                0.12.0
tensorflow-metadata           1.2.0
tensorflow-probability        0.14.1
termcolor                     1.1.0
terminado                     0.12.1
testpath                      0.5.0
text-unidecode                1.3
textblob                      0.15.3
Theano-PyMC                   1.1.2
thinc                         7.4.0
tifffile                      2021.8.30
toml                          0.10.2
tomli                         1.2.1
toolz                         0.11.1
torch                         1.9.0+cu111
torchsummary                  1.5.1
torchtext                     0.10.0
torchvision                   0.10.0+cu111
tornado                       5.1.1
tqdm                          4.62.3
traitlets                     5.1.0
tweepy                        3.10.0
typeguard                     2.7.1
typing-extensions             3.10.0.2
tzlocal                       1.5.1
uritemplate                   3.0.1
urllib3                       1.24.3
vega-datasets                 0.9.0
visions                       0.7.4
wasabi                        0.8.2
wcwidth                       0.2.5
webencodings                  0.5.1
Werkzeug                      1.0.1
wheel                         0.37.0
widgetsnbextension            3.5.1
wordcloud                     1.5.0
wrapt                         1.12.1
xarray                        0.18.2
xgboost                       0.90
xkit                          0.0.0
xlrd                          1.1.0
xlwt                          1.3.0
yellowbrick                   0.9.1
zict                          2.0.0
zipp                          3.6.0

Solution

  • geopandas 0.10.1

    • have noted that your data is on kaggle, so start by sourcing it
    • there really is only one issue shapely.geometry.MultiPoint() constructor does not work with a filtered series. Pass it a numpy array instead and it works.
    • full code below, have randomly selected a point to serve as gpdPoint
    # https://www.kaggle.com/new-york-state/nys-nyc-transit-subway-entrance-and-exit-data
    import kaggle.cli
    import sys, requests, urllib
    import pandas as pd
    from pathlib import Path
    from zipfile import ZipFile
    
    # fmt: off
    # download data set
    url = "https://www.kaggle.com/new-york-state/nys-nyc-transit-subway-entrance-and-exit-data"
    sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
    kaggle.cli.main()
    zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
    dfs = {f.filename: pd.read_csv(zfile.open(f)) for f in zfile.infolist() if Path(f.filename).suffix in [".csv"]}
    # fmt: on
    
    df_subway = dfs['nyc-transit-subway-entrance-and-exit-data.csv']
    
    from shapely.geometry import Point, MultiPoint
    from shapely.ops import nearest_points
    import geopandas as gpd
    
    geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]
    
    # Coordinate reference system :
    crs = {'init': 'EPSG:4326'}
    
    # Creating a Geographic data frame 
    gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
    gdf_subway_entrance_geometry
    
    df_yes_entry = gdf_subway_entrance_geometry
    df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
    df_yes_entry
    
    # randomly select a point....
    gpdPoint = gdf_subway_entrance_geometry.sample(1).geometry.tolist()[0]
    pts = MultiPoint(df_yes_entry['geometry'].values) # does not work with a geopandas series, works with a numpy array
    pt = Point(gpdPoint.x, gpdPoint.y)
    #[o.wkt for o in nearest_points(pt, pts)]
    for o in nearest_points(pt, pts):
      print(o)