data source: https://catalog.data.gov/dataset/nyc-transit-subway-entrance-and-exit-data
I tried looking for a similar problem but I can't find an answer and the error does not help much. I'm kinda frustrated at this point. Thanks for the help. I'm calculating the closest distance from a point.
df_subway = pd.read_csv('/content/drive/MyDrive/Despliegue_de_modelos/NYC_Transit_Subway_Entrance_And_Exit_Data.csv')
geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]
# Coordinate reference system :
crs = {'init': 'EPSG:4326'}
# Creating a Geographic data frame
gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
gdf_subway_entrance_geometry
df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
df_yes_entry
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
pts = MultiPoint(df_yes_entry['geometry']) #it fails in this line
pt = Point(gpdPoint.x, gpdPoint.y)
#[o.wkt for o in nearest_points(pt, pts)]
for o in nearest_points(pt, pts):
print(o)
The problem is that if I do the same but with gdf_subway_entrance_geometry instead of df_yes_entry it works, but I need to do some filters!
This is the error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2897 try:
-> 2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 13
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
7 frames
<ipython-input-480-b6733def46e0> in <module>()
1 from shapely.geometry import Point, MultiPoint
2 from shapely.ops import nearest_points
----> 3 pts = MultiPoint(df_yes_entry['geometry'])
4 pt = Point(gpdPoint.x, gpdPoint.y)
5 #[o.wkt for o in nearest_points(pt, pts)]
/usr/local/lib/python3.7/dist-packages/shapely/geometry/multipoint.py in __init__(self, points)
56 pass
57 else:
---> 58 self._geom, self._ndim = geos_multipoint_from_py(points)
59
60 def shape_factory(self, *args):
/usr/local/lib/python3.7/dist-packages/shapely/geometry/multipoint.py in geos_multipoint_from_py(ob)
169 # add to coordinate sequence
170 for i in range(m):
--> 171 coords = ob[i]
172 geom, ndims = point.geos_point_from_py(coords)
173
/usr/local/lib/python3.7/dist-packages/geopandas/geoseries.py in __getitem__(self, key)
606
607 def __getitem__(self, key):
--> 608 return self._wrapped_pandas_method("__getitem__", key)
609
610 @doc(pd.Series)
/usr/local/lib/python3.7/dist-packages/geopandas/geoseries.py in _wrapped_pandas_method(self, mtd, *args, **kwargs)
599 def _wrapped_pandas_method(self, mtd, *args, **kwargs):
600 """Wrap a generic pandas method to ensure it returns a GeoSeries"""
--> 601 val = getattr(super(), mtd)(*args, **kwargs)
602 if type(val) == Series:
603 val.__class__ = GeoSeries
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in __getitem__(self, key)
880
881 elif key_is_scalar:
--> 882 return self._get_value(key)
883
884 if is_hashable(key):
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in _get_value(self, label, takeable)
988
989 # Similar to Index.get_value, but we do not fall back to positional
--> 990 loc = self.index.get_loc(label)
991 return self.index._get_values_for_loc(self, loc, label)
992
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2898 return self._engine.get_loc(casted_key)
2899 except KeyError as err:
-> 2900 raise KeyError(key) from err
2901
2902 if tolerance is not None:
KeyError: 13
I am working with colab this are my packages:
Package Version
----------------------------- --------------
absl-py 0.12.0
alabaster 0.7.12
albumentations 0.1.12
altair 4.1.0
appdirs 1.4.4
argcomplete 1.12.3
argon2-cffi 21.1.0
arviz 0.11.4
astor 0.8.1
astropy 4.3.1
astunparse 1.6.3
atari-py 0.2.9
atomicwrites 1.4.0
attrs 21.2.0
audioread 2.1.9
autograd 1.3
Babel 2.9.1
backcall 0.2.0
beautifulsoup4 4.6.3
bleach 4.1.0
blis 0.4.1
bokeh 2.4.0
Bottleneck 1.3.2
branca 0.4.2
bs4 0.0.1
CacheControl 0.12.6
cached-property 1.5.2
cachetools 4.2.4
Cartopy 0.19.0.post1
catalogue 1.0.0
certifi 2021.5.30
cffi 1.14.6
cftime 1.5.1
chardet 3.0.4
charset-normalizer 2.0.6
clang 5.0
click 7.1.2
click-plugins 1.1.1
cligj 0.7.2
cloudpickle 1.3.0
cmake 3.12.0
cmdstanpy 0.9.5
colorcet 2.0.6
colorlover 0.3.0
community 1.0.0b1
contextlib2 0.5.5
convertdate 2.3.2
coverage 3.7.1
coveralls 0.5
crcmod 1.7
cufflinks 0.17.3
cvxopt 1.2.7
cvxpy 1.0.31
cycler 0.10.0
cymem 2.0.5
Cython 0.29.24
daft 0.0.4
dask 2.12.0
datascience 0.10.6
debugpy 1.0.0
decorator 4.4.2
defusedxml 0.7.1
descartes 1.1.0
dill 0.3.4
distributed 1.25.3
dlib 19.18.0
dm-tree 0.1.6
docopt 0.6.2
docutils 0.17.1
dopamine-rl 1.0.5
earthengine-api 0.1.284
easydict 1.9
ecos 2.0.7.post1
editdistance 0.5.3
en-core-web-sm 2.2.5
entrypoints 0.3
ephem 4.1
et-xmlfile 1.1.0
fa2 0.3.5
fastai 1.0.61
fastdtw 0.3.4
fastprogress 1.0.0
fastrlock 0.6
fbprophet 0.7.1
feather-format 0.4.1
filelock 3.3.0
Fiona 1.8.20
firebase-admin 4.4.0
fix-yahoo-finance 0.0.22
Flask 1.1.4
flatbuffers 1.12
folium 0.8.3
future 0.16.0
gast 0.4.0
GDAL 2.2.2
gdown 3.6.4
gensim 3.6.0
geographiclib 1.52
geopandas 0.10.1
geopy 1.17.0
geoviews 1.9.2
gin-config 0.4.0
glob2 0.7
google 2.0.3
google-api-core 1.26.3
google-api-python-client 1.12.8
google-auth 1.35.0
google-auth-httplib2 0.0.4
google-auth-oauthlib 0.4.6
google-cloud-bigquery 1.21.0
google-cloud-bigquery-storage 1.1.0
google-cloud-core 1.0.3
google-cloud-datastore 1.8.0
google-cloud-firestore 1.7.0
google-cloud-language 1.2.0
google-cloud-storage 1.18.1
google-cloud-translate 1.5.0
google-colab 1.0.0
google-pasta 0.2.0
google-resumable-media 0.4.1
googleapis-common-protos 1.53.0
googledrivedownloader 0.4
graphviz 0.10.1
greenlet 1.1.2
grpcio 1.41.0
gspread 3.0.1
gspread-dataframe 3.0.8
gym 0.17.3
h5py 3.1.0
HeapDict 1.0.1
hijri-converter 2.2.2
holidays 0.10.5.2
holoviews 1.14.6
html5lib 1.0.1
htmlmin 0.1.12
httpimport 0.5.18
httplib2 0.17.4
httplib2shim 0.0.3
humanize 0.5.1
hyperopt 0.1.2
ideep4py 2.0.0.post3
idna 2.10
ImageHash 4.2.1
imageio 2.4.1
imagesize 1.2.0
imbalanced-learn 0.4.3
imblearn 0.0
imgaug 0.2.9
importlib-metadata 4.8.1
importlib-resources 5.2.2
imutils 0.5.4
inflect 2.1.0
iniconfig 1.1.1
intel-openmp 2021.4.0
intervaltree 2.1.0
ipykernel 4.10.1
ipython 5.5.0
ipython-genutils 0.2.0
ipython-sql 0.3.9
ipywidgets 7.6.5
itsdangerous 1.1.0
jax 0.2.21
jaxlib 0.1.71+cuda111
jdcal 1.4.1
jedi 0.18.0
jieba 0.42.1
Jinja2 2.11.3
joblib 1.0.1
jpeg4py 0.1.4
jsonschema 2.6.0
jupyter 1.0.0
jupyter-client 5.3.5
jupyter-console 5.2.0
jupyter-core 4.8.1
jupyterlab-pygments 0.1.2
jupyterlab-widgets 1.0.2
kaggle 1.5.12
kapre 0.3.5
keras 2.6.0
Keras-Preprocessing 1.1.2
keras-vis 0.4.1
kiwisolver 1.3.2
korean-lunar-calendar 0.2.1
librosa 0.8.1
lightgbm 2.2.3
llvmlite 0.34.0
lmdb 0.99
LunarCalendar 0.0.9
lxml 4.2.6
Markdown 3.3.4
MarkupSafe 2.0.1
matplotlib 3.2.2
matplotlib-inline 0.1.3
matplotlib-venn 0.11.6
missingno 0.5.0
mistune 0.8.4
mizani 0.6.0
mkl 2019.0
mlxtend 0.14.0
more-itertools 8.10.0
moviepy 0.2.3.5
mpmath 1.2.1
msgpack 1.0.2
multimethod 1.6
multiprocess 0.70.12.2
multitasking 0.0.9
munch 2.5.0
murmurhash 1.0.5
music21 5.5.0
natsort 5.5.0
nbclient 0.5.4
nbconvert 5.6.1
nbformat 5.1.3
nest-asyncio 1.5.1
netCDF4 1.5.7
networkx 2.6.3
nibabel 3.0.2
nltk 3.2.5
notebook 5.3.1
numba 0.51.2
numexpr 2.7.3
numpy 1.19.5
nvidia-ml-py3 7.352.0
oauth2client 4.1.3
oauthlib 3.1.1
okgrade 0.4.3
opencv-contrib-python 4.1.2.30
opencv-python 4.1.2.30
openpyxl 2.5.9
opt-einsum 3.3.0
osqp 0.6.2.post0
packaging 21.0
palettable 3.3.0
pandas 1.1.5
pandas-datareader 0.9.0
pandas-gbq 0.13.3
pandas-profiling 3.1.0
pandocfilters 1.5.0
panel 0.12.4
param 1.11.1
parso 0.8.2
pathlib 1.0.1
patsy 0.5.2
pep517 0.11.0
pexpect 4.8.0
phik 0.12.0
pickleshare 0.7.5
Pillow 7.1.2
pip 21.1.3
pip-tools 6.2.0
plac 1.1.3
plotly 4.4.1
plotnine 0.6.0
pluggy 0.7.1
pooch 1.5.1
portpicker 1.3.9
prefetch-generator 1.0.1
preshed 3.0.5
prettytable 2.2.1
progressbar2 3.38.0
prometheus-client 0.11.0
promise 2.3
prompt-toolkit 1.0.18
protobuf 3.17.3
psutil 5.4.8
psycopg2 2.7.6.1
ptyprocess 0.7.0
py 1.10.0
pyarrow 3.0.0
pyasn1 0.4.8
pyasn1-modules 0.2.8
pycocotools 2.0.2
pycparser 2.20
pyct 0.4.8
pydantic 1.8.2
pydata-google-auth 1.2.0
pydot 1.3.0
pydot-ng 2.0.0
pydotplus 2.0.2
PyDrive 1.3.1
pyemd 0.5.1
pyerfa 2.0.0
pyglet 1.5.0
Pygments 2.6.1
pygobject 3.26.1
pymc3 3.11.4
PyMeeus 0.5.11
pymongo 3.12.0
pymystem3 0.2.0
PyOpenGL 3.1.5
pyparsing 2.4.7
pyproj 3.2.1
pyrsistent 0.18.0
pyshp 2.1.3
pysndfile 1.3.8
PySocks 1.7.1
pystan 2.19.1.1
pytest 3.6.4
python-apt 0.0.0
python-chess 0.23.11
python-dateutil 2.8.2
python-louvain 0.15
python-slugify 5.0.2
python-utils 2.5.6
pytz 2018.9
pyviz-comms 2.1.0
PyWavelets 1.1.1
PyYAML 5.4.1
pyzmq 22.3.0
qdldl 0.1.5.post0
qtconsole 5.1.1
QtPy 1.11.2
regex 2019.12.20
requests 2.26.0
requests-oauthlib 1.3.0
resampy 0.2.2
retrying 1.3.3
rpy2 3.4.5
rsa 4.7.2
scikit-image 0.16.2
scikit-learn 0.22.2.post1
scipy 1.7.1
screen-resolution-extra 0.0.0
scs 2.1.4
seaborn 0.11.2
semver 2.13.0
Send2Trash 1.8.0
setuptools 57.4.0
setuptools-git 1.2
Shapely 1.7.1
simplegeneric 0.8.1
six 1.15.0
sklearn 0.0
sklearn-pandas 1.8.0
smart-open 5.2.1
snowballstemmer 2.1.0
sortedcontainers 2.4.0
SoundFile 0.10.3.post1
spacy 2.2.4
Sphinx 1.8.5
sphinxcontrib-serializinghtml 1.1.5
sphinxcontrib-websupport 1.2.4
SQLAlchemy 1.4.25
sqlparse 0.4.2
srsly 1.0.5
statsmodels 0.10.2
sympy 1.7.1
tables 3.4.4
tabulate 0.8.9
tangled-up-in-unicode 0.1.0
tblib 1.7.0
tensorboard 2.6.0
tensorboard-data-server 0.6.1
tensorboard-plugin-wit 1.8.0
tensorflow 2.6.0
tensorflow-datasets 4.0.1
tensorflow-estimator 2.6.0
tensorflow-gcs-config 2.6.0
tensorflow-hub 0.12.0
tensorflow-metadata 1.2.0
tensorflow-probability 0.14.1
termcolor 1.1.0
terminado 0.12.1
testpath 0.5.0
text-unidecode 1.3
textblob 0.15.3
Theano-PyMC 1.1.2
thinc 7.4.0
tifffile 2021.8.30
toml 0.10.2
tomli 1.2.1
toolz 0.11.1
torch 1.9.0+cu111
torchsummary 1.5.1
torchtext 0.10.0
torchvision 0.10.0+cu111
tornado 5.1.1
tqdm 4.62.3
traitlets 5.1.0
tweepy 3.10.0
typeguard 2.7.1
typing-extensions 3.10.0.2
tzlocal 1.5.1
uritemplate 3.0.1
urllib3 1.24.3
vega-datasets 0.9.0
visions 0.7.4
wasabi 0.8.2
wcwidth 0.2.5
webencodings 0.5.1
Werkzeug 1.0.1
wheel 0.37.0
widgetsnbextension 3.5.1
wordcloud 1.5.0
wrapt 1.12.1
xarray 0.18.2
xgboost 0.90
xkit 0.0.0
xlrd 1.1.0
xlwt 1.3.0
yellowbrick 0.9.1
zict 2.0.0
zipp 3.6.0
geopandas 0.10.1
shapely.geometry.MultiPoint()
constructor does not work with a filtered series. Pass it a numpy array instead and it works.gpdPoint
# https://www.kaggle.com/new-york-state/nys-nyc-transit-subway-entrance-and-exit-data
import kaggle.cli
import sys, requests, urllib
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
# fmt: off
# download data set
url = "https://www.kaggle.com/new-york-state/nys-nyc-transit-subway-entrance-and-exit-data"
sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
kaggle.cli.main()
zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
dfs = {f.filename: pd.read_csv(zfile.open(f)) for f in zfile.infolist() if Path(f.filename).suffix in [".csv"]}
# fmt: on
df_subway = dfs['nyc-transit-subway-entrance-and-exit-data.csv']
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
import geopandas as gpd
geometry = [Point(xy) for xy in zip(df_subway['Station Longitude'], df_subway['Station Latitude'])]
# Coordinate reference system :
crs = {'init': 'EPSG:4326'}
# Creating a Geographic data frame
gdf_subway_entrance_geometry = gpd.GeoDataFrame(df_subway, crs=crs, geometry=geometry).to_crs('EPSG:5234')
gdf_subway_entrance_geometry
df_yes_entry = gdf_subway_entrance_geometry
df_yes_entry = gdf_subway_entrance_geometry[gdf_subway_entrance_geometry.Entry=='YES']
df_yes_entry
# randomly select a point....
gpdPoint = gdf_subway_entrance_geometry.sample(1).geometry.tolist()[0]
pts = MultiPoint(df_yes_entry['geometry'].values) # does not work with a geopandas series, works with a numpy array
pt = Point(gpdPoint.x, gpdPoint.y)
#[o.wkt for o in nearest_points(pt, pts)]
for o in nearest_points(pt, pts):
print(o)