I'm having a go at setting up my first Postgres database, but am getting quite confused about why I'm having issues with enabling Postgis in one schema and then using it in another.
I'm using Google Cloud SQL with Postgres v12. I'm connecting via the Cloud SQL proxy, and have connected to the server via both pgAdmin4 and a Jupyter Notebook (SQLAlchemy).
My goal is to write a shapefile full of points into a table on the database via geopandas.
Based on this thread I'm trying to install postgis into a separate schema, while creating a couple more different schemas where my data will actually go. I'm opening the shapefile in geopandas, and then using .to_postgis() to try and save the shapefile contents into a new table in one of my data schemas. This always fails.
However, if I try to write the geodataframe into the extensions schema, it works just fine. This makes me think that there's something wrong with the search_path, but no idea what. I'm setting the search path as suggested here. I've tried a range ways to setup the search_path, but I think there's something lacking in my fundamental understanding of what it does and how it works.
Here's the workflow:
SQL:
CREATE SCHEMA extensions;
CREATE SCHEMA geodata;
ALTER DATABASE mydb SET search_path = geodata,extensions,public;
CREATE EXTENSION postgis SCHEMA extensions;
And then in the jupyter notebook:
# imports
from sqlalchemy import create_engine
import geopandas as gpd
# connect to database
dbschema = 'geodata' # note, if I change this to 'extensions' it works
engine = create_engine('postgresql+psycopg2://postgres:<password>@localhost:5432/mydb', connect_args={'options': '-csearch_path={}'.format(dbschema)})
# Open shapefile
gdf = gpd.read_file(r"C:\path\to\shapefile.shp")
# Attempt to write shapefile to postgis
gdf.to_postgis(name = 'geotable', con = engine)
And this is the error I get:
UndefinedObject Traceback (most recent call last)
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1275 if not evt_handled:
-> 1276 self.dialect.do_execute(
1277 cursor, statement, parameters, context
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
592 def do_execute(self, cursor, statement, parameters, context=None):
--> 593 cursor.execute(statement, parameters)
594
UndefinedObject: type "geometry" does not exist
LINE 7: geometry geometry(POINT,28354)
^
The above exception was the direct cause of the following exception:
ProgrammingError Traceback (most recent call last)
<ipython-input-6-a0dc69a1128e> in <module>
1 from geoalchemy2 import Geometry
----> 2 gdf.to_postgis(name = 'vines', con = engine, if_exists = 'replace')
~\Anaconda3\envs\rasterio\lib\site-packages\geopandas\geodataframe.py in to_postgis(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
1095 >>> gdf.to_postgis("my_table", engine)
1096 """
-> 1097 geopandas.io.sql._write_postgis(
1098 self, name, con, schema, if_exists, index, index_label, chunksize, dtype
1099 )
~\Anaconda3\envs\rasterio\lib\site-packages\geopandas\io\sql.py in _write_postgis(gdf, name, con, schema, if_exists, index, index_label, chunksize, dtype)
390 with con.begin() as connection:
391
--> 392 gdf.to_sql(
393 name,
394 connection,
~\Anaconda3\envs\rasterio\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2651 from pandas.io import sql
2652
-> 2653 sql.to_sql(
2654 self,
2655 name,
~\Anaconda3\envs\rasterio\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
510 )
511
--> 512 pandas_sql.to_sql(
513 frame,
514 name,
~\Anaconda3\envs\rasterio\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1314 dtype=dtype,
1315 )
-> 1316 table.create()
1317 table.insert(chunksize, method=method)
1318 if not name.isdigit() and not name.islower():
~\Anaconda3\envs\rasterio\lib\site-packages\pandas\io\sql.py in create(self)
653 raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
654 else:
--> 655 self._execute_create()
656
657 def _execute_insert(self, conn, keys, data_iter):
~\Anaconda3\envs\rasterio\lib\site-packages\pandas\io\sql.py in _execute_create(self)
639 # Inserting table into database, add to MetaData object
640 self.table = self.table.tometadata(self.pd_sql.meta)
--> 641 self.table.create()
642
643 def create(self):
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\sql\schema.py in create(self, bind, checkfirst)
925 if bind is None:
926 bind = _bind_or_error(self)
--> 927 bind._run_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
928
929 def drop(self, bind=None, checkfirst=False):
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _run_visitor(self, visitorcallable, element, **kwargs)
1654
1655 def _run_visitor(self, visitorcallable, element, **kwargs):
-> 1656 visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
1657
1658
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\sql\visitors.py in traverse_single(self, obj, **kw)
143 meth = getattr(v, "visit_%s" % obj.__visit_name__, None)
144 if meth:
--> 145 return meth(obj, **kw)
146
147 def iterate(self, obj):
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\sql\ddl.py in visit_table(self, table, create_ok, include_foreign_key_constraints, _is_metadata_operation)
825 include_foreign_key_constraints = None
826
--> 827 self.connection.execute(
828 # fmt: off
829 CreateTable(
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
1009 )
1010 else:
-> 1011 return meth(self, multiparams, params)
1012
1013 def _execute_function(self, func, multiparams, params):
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\sql\ddl.py in _execute_on_connection(self, connection, multiparams, params)
70
71 def _execute_on_connection(self, connection, multiparams, params):
---> 72 return connection._execute_ddl(self, multiparams, params)
73
74 def execute(self, bind=None, target=None):
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _execute_ddl(self, ddl, multiparams, params)
1066 else None,
1067 )
-> 1068 ret = self._execute_context(
1069 dialect,
1070 dialect.execution_ctx_cls._init_ddl,
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1314
1315 except BaseException as e:
-> 1316 self._handle_dbapi_exception(
1317 e, statement, parameters, cursor, context
1318 )
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1508 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
1509 elif should_wrap:
-> 1510 util.raise_(
1511 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
1512 )
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\util\compat.py in raise_(***failed resolving arguments***)
180
181 try:
--> 182 raise exception
183 finally:
184 # credit to
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1274 break
1275 if not evt_handled:
-> 1276 self.dialect.do_execute(
1277 cursor, statement, parameters, context
1278 )
~\Anaconda3\envs\rasterio\lib\site-packages\sqlalchemy\engine\default.py in do_execute(self, cursor, statement, parameters, context)
591
592 def do_execute(self, cursor, statement, parameters, context=None):
--> 593 cursor.execute(statement, parameters)
594
595 def do_execute_no_params(self, cursor, statement, context=None):
ProgrammingError: (psycopg2.errors.UndefinedObject) type "geometry" does not exist
LINE 7: geometry geometry(POINT,28354)
^
[SQL:
CREATE TABLE geotable (
id BIGINT,
geometry geometry(POINT,28354)
)
]
(Background on this error at: http://sqlalche.me/e/13/f405)
The problem is that you are explicitly setting search_path
when you connect to PostgreSQL. That overrides the value set with ALTER DATABASE
.
You have two options:
Set the search_path
correctly:
dbschema = 'geodata,extensions,public'
Leave the default setting by not setting the options
parameter in create_engine
.
Since you have taken the pain to set up a good default value, I'd go with the second option.
I personally use the public
schema for extensions (after revoking the CREATE
privilege from PUBLIC
), but you might opt for dropping the schema altogether.