I'm trying to run a script to change addresses (around 1,000) into geo coordinates but for some reason I'm getting an OVER_QUERY_LIMIT response after hitting the 50th address in my input list.
To avoid the query limit I already added a time.sleep command in the loop but for some reason it says I'm over limit again.
Can anyone help? (FYI, I'm running it on my laptop)
import pandas as pd
import requests
import logging
import time
logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)
API_KEY = my_key #using my API key
BACKOFF_TIME = 5
output_filename = 'result.csv'
input_filename = 'input.csv'
address_column_name = "Address"
RETURN_FULL_RESULTS = False
data = pd.read_csv(input_filename, encoding='utf8')
if address_column_name not in data.columns:
raise ValueError("Missing Address column in input data")
addresses = data[address_column_name].tolist()
def get_google_results(address, api_key=my_key, return_full_response=False):
geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(address)
if api_key is not None:
geocode_url = geocode_url + "&key={}".format(api_key)
results = requests.get(geocode_url)
results = results.json()
if len(results['results']) == 0:
output = {
"formatted_address" : None,
"latitude": None,
"longitude": None,
"accuracy": None,
"google_place_id": None,
"type": None,
"postcode": None
}
else:
answer = results['results'][0]
output = {
"formatted_address" : answer.get('formatted_address'),
"latitude": answer.get('geometry').get('location').get('lat'),
"longitude": answer.get('geometry').get('location').get('lng'),
"accuracy": answer.get('geometry').get('location_type'),
"google_place_id": answer.get("place_id"),
"type": ",".join(answer.get('types')),
"postcode": ",".join([x['long_name'] for x in answer.get('address_components')
if 'postal_code' in x.get('types')])
}
output['input_string'] = address
output['number_of_results'] = len(results['results'])
output['status'] = results.get('status')
if return_full_response is True:
output['response'] = results
return output
results = []
for address in addresses:
geocoded = False
while geocoded is not True:
try:
geocode_result = get_google_results(address, API_KEY,
return_full_response=RETURN_FULL_RESULTS)
time.sleep(5)
except Exception as e:
logger.exception(e)
logger.error("Major error with {}".format(address))
logger.error("Skipping!")
geocoded = True
if geocode_result['status'] == 'OVER_QUERY_LIMIT':
logger.info("Hit Query Limit! Backing off for a bit.")
time.sleep(BACKOFF_TIME * 60) # sleep
geocoded = False
else:
if geocode_result['status'] != 'OK':
logger.warning("Error geocoding {}: {}".format(address, geocode_result['status']))
logger.debug("Geocoded: {}: {}".format(address, geocode_result['status']))
results.append(geocode_result)
geocoded = True
if len(results) % 100 == 0:
logger.info("Completed {} of {} address".format(len(results), len(addresses)))
if len(results) % 50 == 0:
pd.DataFrame(results).to_csv("{}_bak".format(output_filename))
logger.info("Finished geocoding all addresses")
pd.DataFrame(results).to_csv(output_filename, encoding='utf8')
The Geocoding API has a queries per second (QPS) limit. You cannot send more than 50 QPS.
This limit is documented at
https://developers.google.com/maps/documentation/geocoding/usage-and-billing#other-usage-limits
While you are no longer limited to a maximum number of requests per day (QPD), the following usage limits are still in place for the Geocoding API:
- 50 requests per second (QPS), calculated as the sum of client-side and server-side queries.
In order to solve your issue I would suggest using the Python client library for Google Maps API Web Services:
https://github.com/googlemaps/google-maps-services-python
This library controls QPS internally, so your requests will be queued properly.
I hope this helps!