Search code examples
pythonpostgresqlpostgis

Geoparsepy inserting new database gives me an error


I would like to use more detailed db for geoparsing. But I am facing with some problems.

enter image description here

when I run the script:

import soton_corenlppy
import geoparsepy
import logging
import nltk
nltk.download('stopwords')

logger = logging.getLogger("geoparsepy")
logging.basicConfig(level=logging.INFO, format="%(message)s")
logger.info('Logging started')

geospatial_config = geoparsepy.geo_parse_lib.get_geoparse_config(
    lang_codes=['it', 'en'],
    logger=logger
)

location_ids = {}
focus_areas = ['italia_posti']

for focus_area in focus_areas:
    location_ids[focus_area + '_admin'] = [-1, -1]
    location_ids[focus_area + '_poly'] = [-1, -1]
    location_ids[focus_area + '_line'] = [-1, -1]
    location_ids[focus_area + '_point'] = [-1, -1]


# Create a connection with the database
database_handler = soton_corenlppy.PostgresqlHandler.PostgresqlHandler(
    user='user',
    passw='password',
    hostname='hostname',
    port=5432,
    database='db'
)

# Load a set of previously preprocessed locations from database
cached_locations = geoparsepy.geo_preprocess_lib.cache_preprocessed_locations(
    database_handle=database_handler,
    location_ids=location_ids,
    schema='public',
    geospatial_config=geospatial_config
)
logger.info(f"Loaded {len(cached_locations)} position")

# Close connection with the database
database_handler.close()


# Compile an inverted index from a list of arbirary data where one column is a phrase string
indexed_locations = geoparsepy.geo_parse_lib.calc_inverted_index(
    list_data=cached_locations,
    dict_geospatial_config=geospatial_config
)
logger.info(f"Indexed {len(indexed_locations.keys())} phrases")

# Create an index of osmid to row indexes in the cached_locations
osmid_lookup = geoparsepy.geo_parse_lib.calc_osmid_lookup(cached_locations=cached_locations)


listText = [
    u'Io sono di Sciacca, in provincia di agrigento',
    u'Vengo dalla provincia di Agrigento, in Sicilia',
    u'Mi sdraio sul prato del mio vicino',
    u'Pavia e Ravenna sono belle città',
    u'Voglio andare a new york',
    u'Mi trovo a San Giuliano Terme',
    u'Io sono di Sciacca, in provincia di Agrigento',
    u'Martina vive a Nuoro ma vorrebbe andare ad Agrigento',
    u'Agrigento è la provincia che contiene il comune di Sciacca',
    u'Vicino san giuliano terme c\'è un comune che si chiama Asciano',
    u'La città di Sciacca si trova in provincia di Agrigento',
    u'Mi trovo a Sciacca'
]

listTokenSets = []
for text in listText:
    # Tokenize a text entry into unigram tokens text will be cleaned and tokenize
    listToken = soton_corenlppy.common_parse_lib.unigram_tokenize_text(
        text=text,
        dict_common_config=geospatial_config
    )
    listTokenSets.append(listToken)


# Geoparse token sets using a set of cached locations
listMatchSet = geoparsepy.geo_parse_lib.geoparse_token_set(
    token_set=listTokenSets,
    dict_inverted_index=indexed_locations,
    dict_geospatial_config=geospatial_config
)

# Print the matched location
for i in range(len(listMatchSet)):
    logger.info(f"\nText: {listText[i]}")
    listMatch = listMatchSet[i]
    for tupleMatch in listMatch:
        logger.info(str(tupleMatch))

I get the following error:

<ipython-input-17-c67b3eda95f5> in <module>
     34 
     35 # Load a set of previously preprocessed locations from database
---> 36 cached_locations = geoparsepy.geo_preprocess_lib.cache_preprocessed_locations(
     37     database_handle=database_handler,
     38     location_ids=location_ids,

c:\users\katarina\appdata\local\programs\python\python38\lib\site-packages\geoparsepy\geo_preprocess_lib.py in cache_preprocessed_locations(database_handle, location_ids, schema, geospatial_config, timeout_statement, timeout_overall, spatial_filter)
   1647 
   1648         # note: SQL returns UTF8 encoded <str> objects. to get <unicode> use unicode( strText, 'utf8' )
-> 1649         listRows = database_handle.execute_sql_query_batch( listSQL, timeout_statement, timeout_overall )
   1650 
   1651         listResult = []

c:\users\katarina\appdata\local\programs\python\python38\lib\site-packages\soton_corenlppy\PostgresqlHandler.py in execute_sql_query_batch(self, query_list, timeout_statement, timeout_overall)
    346 
    347                 # failure
--> 348                 raise Exception( 'SQL query failed (timeout retrying) : ' + strLastError + ' : ' + tupleStatement[0] )
    349 
    350         def execute_sql_statement( self, statement_list, timeout_statement = 60, timeout_overall = 180 ) :

Exception: SQL query failed (timeout retrying) : ['42883'] UndefinedFunction("function st_astext(geometry[]) does not exist\nLINE 1: ...osti_line_',loc_id),name,osm_id_set,admin_regions,ST_AsText(...\n                                                             ^\nHINT:  No function matches the given name and argument types. You might need to add explicit type casts.\n") : SELECT concat('italia_posti_line_',loc_id),name,osm_id_set,admin_regions,ST_AsText(geom),hstore_to_matrix(tags) FROM public.italia_posti_line

I made a empty line and polygon tables, as they are present in the original documentation. I have two columns more in my point table and admin_regions is empty column, can it be a problem? ​


Solution

  • The database is complaining that you're calling the function ST_AsText using an array of geometries ST_AsText(geometry[]) and it should be a single geometry ST_AsText(geometry)

    Example:

    SELECT ST_AsText('0101000000000000000000F03F0000000000000040');
    
     st_astext  
    ------------
     POINT(1 2)