In Google BigQuery linked to GA4, I am trying to count the number of views and "Exits" via query
I wrote the following code to count the views by page path, however still struggling to count the "Exits", where where “Exits” counts the number of times that the last event recorded for a session occurred on a particular screen, in other words sessions that ended on a page path
SELECT
event_params.value.string_value AS page_path,
COUNT(*) AS page_views
FROM
`MY_ga4_dataset.events_*`,
UNNEST(event_params) AS event_params
WHERE
_table_suffix BETWEEN '20230207' AND '20230207'
AND event_name = 'page_view'
AND event_params.key = 'page_location'
GROUP BY
page_path
ORDER BY
page_views DESC
Could you propose the solution?
This is my Schema:
fullname mode type description
event_date NULLABLE STRING
event_timestamp NULLABLE INTEGER
event_name NULLABLE STRING
event_params REPEATED RECORD
event_previous_timestamp NULLABLE INTEGER
event_value_in_usd NULLABLE FLOAT
event_bundle_sequence_id NULLABLE INTEGER
event_server_timestamp_offset NULLABLE INTEGER
user_id NULLABLE STRING
user_pseudo_id NULLABLE STRING
privacy_info NULLABLE RECORD
user_properties REPEATED RECORD
user_first_touch_timestamp NULLABLE INTEGER
user_ltv NULLABLE RECORD
device NULLABLE RECORD
geo NULLABLE RECORD
app_info NULLABLE RECORD
traffic_source NULLABLE RECORD
stream_id NULLABLE STRING
platform NULLABLE STRING
event_dimensions NULLABLE RECORD
ecommerce NULLABLE RECORD
items REPEATED RECORD
event_params.key NULLABLE STRING
event_params.value NULLABLE RECORD
event_params.value.string_value NULLABLE STRING
event_params.value.int_value NULLABLE INTEGER
event_params.value.float_value NULLABLE FLOAT
event_params.value.double_value NULLABLE FLOAT
privacy_info.analytics_storage NULLABLE STRING
privacy_info.ads_storage NULLABLE STRING
privacy_info.uses_transient_token NULLABLE STRING
user_properties.key NULLABLE STRING
user_properties.value NULLABLE RECORD
user_properties.value.string_value NULLABLE STRING
user_properties.value.int_value NULLABLE INTEGER
user_properties.value.float_value NULLABLE FLOAT
user_properties.value.double_value NULLABLE FLOAT
user_properties.value.set_timestamp_micros NULLABLE INTEGER
user_ltv.revenue NULLABLE FLOAT
user_ltv.currency NULLABLE STRING
device.category NULLABLE STRING
device.mobile_brand_name NULLABLE STRING
device.mobile_model_name NULLABLE STRING
device.mobile_marketing_name NULLABLE STRING
device.mobile_os_hardware_model NULLABLE STRING
device.operating_system NULLABLE STRING
device.operating_system_version NULLABLE STRING
device.vendor_id NULLABLE STRING
device.advertising_id NULLABLE STRING
device.language NULLABLE STRING
device.is_limited_ad_tracking NULLABLE STRING
device.time_zone_offset_seconds NULLABLE INTEGER
device.browser NULLABLE STRING
device.browser_version NULLABLE STRING
device.web_info NULLABLE RECORD
device.web_info.browser NULLABLE STRING
device.web_info.browser_version NULLABLE STRING
device.web_info.hostname NULLABLE STRING
geo.continent NULLABLE STRING
geo.country NULLABLE STRING
geo.region NULLABLE STRING
geo.city NULLABLE STRING
geo.sub_continent NULLABLE STRING
geo.metro NULLABLE STRING
app_info.id NULLABLE STRING
app_info.version NULLABLE STRING
app_info.install_store NULLABLE STRING
app_info.firebase_app_id NULLABLE STRING
app_info.install_source NULLABLE STRING
traffic_source.name NULLABLE STRING
traffic_source.medium NULLABLE STRING
traffic_source.source NULLABLE STRING
event_dimensions.hostname NULLABLE STRING
ecommerce.total_item_quantity NULLABLE INTEGER
ecommerce.purchase_revenue_in_usd NULLABLE FLOAT
ecommerce.purchase_revenue NULLABLE FLOAT
ecommerce.refund_value_in_usd NULLABLE FLOAT
ecommerce.refund_value NULLABLE FLOAT
ecommerce.shipping_value_in_usd NULLABLE FLOAT
ecommerce.shipping_value NULLABLE FLOAT
ecommerce.tax_value_in_usd NULLABLE FLOAT
ecommerce.tax_value NULLABLE FLOAT
ecommerce.unique_items NULLABLE INTEGER
ecommerce.transaction_id NULLABLE STRING
items.item_id NULLABLE STRING
items.item_name NULLABLE STRING
items.item_brand NULLABLE STRING
items.item_variant NULLABLE STRING
items.item_category NULLABLE STRING
items.item_category2 NULLABLE STRING
items.item_category3 NULLABLE STRING
items.item_category4 NULLABLE STRING
items.item_category5 NULLABLE STRING
items.price_in_usd NULLABLE FLOAT
items.price NULLABLE FLOAT
items.quantity NULLABLE INTEGER
items.item_revenue_in_usd NULLABLE FLOAT
items.item_revenue NULLABLE FLOAT
items.item_refund_in_usd NULLABLE FLOAT
items.item_refund NULLABLE FLOAT
items.coupon NULLABLE STRING
items.affiliation NULLABLE STRING
items.location_id NULLABLE STRING
items.item_list_id NULLABLE STRING
items.item_list_name NULLABLE STRING
items.item_list_index NULLABLE STRING
items.promotion_id NULLABLE STRING
items.promotion_name NULLABLE STRING
items.creative_name NULLABLE STRING
items.creative_slot NULLABLE STRING
BigQuery does not count exits so it has to guess based on last event, which is a different methodology to GA4 which means the number of exits will always be slightly different
Here is a code to show the previous and next page visited showing entrances and exits
with events as (
select
user_pseudo_id,
--get seesion ID
concat(
user_pseudo_id,
(
select
value.int_value
from
unnest(event_params)
where
key = 'ga_session_id'
)
) as unique_session_id,
(case when(
select
value.string_value
from
unnest(event_params)
where
key = 'session_engaged' AND event_name = 'page_view'
) ="1" then 1 else 0 end) as engaged_sessions_id,
event_name,
event_timestamp,
regexp_replace(
regexp_replace(
(
select
p.value.string_value
from
unnest(event_params) as p
where
p.key = 'page_location'
),
r'^https?://[^/]+',
''
),
r'[\?].*',
''
) as page_path,
event_date as date
FROM
`bigquery-3.analytics_2.events_*`
where
_table_suffix between '20221129'and '20221130')
select
page_path,
date,
event_timestamp,
-- look for the previous page_path
if(
event_name = 'page_view',
coalesce(
last_value(
if(event_name = 'page_view', page_path, null) ignore nulls
) over(
partition by unique_session_id
order by
event_timestamp asc rows between unbounded preceding
and 1 preceding
),
'(entrance)'
),
null
) as previous_page,
-- look for the next page_path
if(
event_name = 'page_view',
coalesce(
first_value(
if(event_name = 'page_view', page_path, null) ignore nulls
) over(
partition by unique_session_id
order by
event_timestamp asc rows between 1 following
and unbounded following
),
'(exit)'
),
null
) as next_page
from
events
where
event_name = 'page_view'