I'm sure someone's answered something similar before, but those posts seem to have been lost amongst more popular questions about global variables and loops.
What I'm trying to do is write a loop to step through the Google Analytics API, pulling out data one day at a time. I'm sure there are already some packages that do this but it's somewhat of a learning experience for me.
Where I'm getting stuck is that my loop seems to be progressing the correct number of days however the actual request being sent to the GA Reporting API (V4) isn't updating with the global variable, even though a simple print() seems to indicate it does update.
Here's my code:
# Reporting API V4
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
# Import JSON
import json
# Open JSON config file
jsonConfig = open("config.json", "r")
# Parse the JSON file
configRead = json.loads(jsonConfig.read())
# Define global vars
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
KEY_FILE_LOCATION = configRead['keyFileLocation']
VIEW_ID = 12345678 # made up for this exercise
startDate = 732
endDate = 732
# Define report #1
request1 = {
'reportRequests':
[
{
'viewId': VIEW_ID,
'dateRanges':
[
{
'startDate': f'{startDate}daysAgo',
'endDate': f'{endDate}daysAgo'
}
],
'metrics':
[
{
'expression': 'ga:sessions'
}
],
'dimensions':
[
{
'name': 'ga:date'
}
],
'pageSize': 1
}
]
}
def initialize_analyticsreporting():
# Initializes an Analytics Reporting API V4 service object.
# Returns: An authorized Analytics Reporting API V4 service object.
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics):
# Queries the Analytics Reporting API V4.
# Args: _ analytics _ : An authorized Analytics Reporting API V4 service object.
# Returns: The Analytics Reporting API V4 response.
global startDate
global endDate
global request1
return analytics.reports().batchGet(
body=request1
).execute()
def print_response(response):
# Parses and prints the Analytics Reporting API V4 response.
# Args: _ response _ : An Analytics Reporting API V4 response.
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
print(header + ': ', dimension)
for i, values in enumerate(dateRangeValues):
for metricHeader, value in zip(metricHeaders, values.get('values')):
print(metricHeader.get('name') + ':', value)
def main():
global startDate
global endDate
analytics = initialize_analyticsreporting()
while startDate >= 725:
response = get_report(analytics)
print(f'{startDate}daysAgo')
print_response(response)
startDate -= 1
endDate -= 1
print("Done now, cheers")
if __name__ == '__main__':
main()
In that specific example, it's getting startDate and endDate to update within request1 when it loops.
Output as it currently stands:
732daysAgo
ga:date: 20181123
ga:sessions: 2887
731daysAgo
ga:date: 20181123
ga:sessions: 2887
730daysAgo
ga:date: 20181123
ga:sessions: 2887
729daysAgo
ga:date: 20181123
ga:sessions: 2887
728daysAgo
ga:date: 20181123
ga:sessions: 2887
727daysAgo
ga:date: 20181123
ga:sessions: 2887
726daysAgo
ga:date: 20181123
ga:sessions: 2887
725daysAgo
ga:date: 20181123
ga:sessions: 2887
Done now, cheers
Any idea what might be the issue?
The issue is that you want request1
to be dynamically generated depending on the values of startDate
and endDate
, but you currently only define request1
once at the beginning of your program, so it'll never change its definition depending on either of those variables.
To have request1
be dynamic, you can create a function that returns the request instead:
startDate = 732
endDate = 732
VIEW_ID = 12345678
def get_request1():
global startDate
global endDate
global VIEW_ID
return {
'reportRequests':
[
{
'viewId': VIEW_ID,
'dateRanges':
[
{
'startDate': f'{startDate}daysAgo',
'endDate': f'{endDate}daysAgo'
}
],
'metrics':
[
{
'expression': 'ga:sessions'
}
],
'dimensions':
[
{
'name': 'ga:date'
}
],
'pageSize': 1
}
]
}
Thus, every time you call get_request1()
, its returned value will always contain the current values of start and end date.