Im currently run daily this Python's script into a Windows VM.
import os
import pandas as pd
import itertools
from pandas_gbq import to_gbq
property_id = "xxxxxx"
starting_date = "yesterday"
ending_date = "yesterday"
try:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "auth.json"
from google.analytics.data_v1beta import BetaAnalyticsDataClient
from google.analytics.data_v1beta.types import (
DateRange,
Dimension,
Metric,
RunReportRequest,
)
client = BetaAnalyticsDataClient()
request_api = RunReportRequest(
property=f"properties/{property_id}",
dimensions=[
Dimension(name="date"),
Dimension(name="googleAdsCampaignName")
],
metrics=[
Metric(name="sessions"),
Metric(name="advertiserAdCost"),
Metric(name="totalUsers"),
Metric(name="advertiserAdClicks"),
Metric(name="advertiserAdImpressions"),
Metric(name="conversions"),
Metric(name="totalRevenue")
],
date_ranges=[DateRange(start_date=starting_date, end_date=ending_date)],
)
response = client.run_report(request_api)
def query_data(api_response):
dimension_headers = [header.name for header in api_response.dimension_headers]
metric_headers = [header.name for header in api_response.metric_headers]
dimensions = []
metrics = []
for i in range(len(dimension_headers)):
dimensions.append([row.dimension_values[i].value for row in api_response.rows])
dimensions
for i in range(len(metric_headers)):
metrics.append([row.metric_values[i].value for row in api_response.rows])
headers = dimension_headers, metric_headers
headers = list(itertools.chain.from_iterable(headers))
data = dimensions, metrics
data = list(itertools.chain.from_iterable(data))
df = pd.DataFrame(data)
df = df.transpose()
df.columns = headers
return df
campanas_df = pd.DataFrame(query_data(response))
campanas_df['advertiserAdCost'] = pd.to_numeric(campanas_df['advertiserAdCost'])
campanas_df['advertiserAdClicks'] = pd.to_numeric(campanas_df['advertiserAdClicks'])
campanas_df['conversions'] = pd.to_numeric(campanas_df['conversions'])
campanas_df['totalUsers'] = pd.to_numeric(campanas_df['totalUsers'])
campanas_df['advertiserAdImpressions'] = pd.to_numeric(campanas_df['advertiserAdImpressions'])
campanas_df['totalRevenue'] = pd.to_numeric(campanas_df['totalRevenue'])
campanas_df['sessions'] = pd.to_numeric(campanas_df['sessions'])
campanas_df['date'] = pd.to_datetime(campanas_df['date'], format='%Y%m%d')
campanas_df = campanas_df.sort_values(by='date')
campanas_df['date'] = campanas_df['date'].dt.strftime('%Y-%m-%d')
dataset_id = 'xxxxxx'
table_name = 'xxxxxx'
from google.cloud import bigquery
from google.oauth2 import service_account
service_account_key_path = "auth.json"
credentials = service_account.Credentials.from_service_account_file(
service_account_key_path,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
# Create a BigQuery client with the authenticated credentials
client = bigquery.Client(credentials=credentials, project=credentials.project_id)
to_gbq(campanas_df, destination_table=f"{dataset_id}.{table_name}", project_id='mcc-ga4', if_exists='replace', credentials= credentials)
print("Funciono bien")
except Exception as e:
print("Fallo la carga")
print(e)
The script is really simple, just call the data from GA4 (google analytics 4) and then load that data into a Bigquery table.
My client ask if i can run this script into GCP using Cloud Functions. Based on my reads, i know that is possible but I'm pretty new in GCP and Clouds topics.
Someone can help or bring me a really basic tutorioal for this?
I really apreciate!
If you have a linux container, you can use Cloud Run Jobs. It's perfectly designed for the scripts.
Cloud Functions or Cloud Run are designed for handling HTTP request and therefore you must deploy a webserver to listen and process HTTP request. It's not your use case.
ADC is the application default credential. an internal mechanism implemented in all the Google Cloud libraries to detect automatically the credential according to the runtime environment. Typically, on Google Cloud, every service has a default service account and you can customize it (for many services). With Cloud Run Jobs, you have a runtime service account and it will be used by default by the libraries.
Therefore, remove your line with the auth.json file, and you will see it will work automatically without having a secret file to manage on your end.