Search code examples
sqlgoogle-bigquerygoogle-analyticsgoogle-analytics-4

How to count page Views and Exits in Google Big Query (linked to GA4)?


In Google BigQuery linked to GA4, I am trying to count the number of views and "Exits" via query

enter image description here

I wrote the following code to count the views by page path, however still struggling to count the "Exits", where where “Exits” counts the number of times that the last event recorded for a session occurred on a particular screen, in other words sessions that ended on a page path

  SELECT
  event_params.value.string_value AS page_path,
  COUNT(*) AS page_views
FROM
  `MY_ga4_dataset.events_*`,
  UNNEST(event_params) AS event_params
WHERE
  _table_suffix BETWEEN '20230207' AND '20230207'
  AND event_name = 'page_view'
  AND event_params.key = 'page_location'
GROUP BY
  page_path
ORDER BY
  page_views DESC

Could you propose the solution?

This is my Schema:

fullname    mode    type    description
event_date  NULLABLE    STRING  
event_timestamp NULLABLE    INTEGER 
event_name  NULLABLE    STRING  
event_params    REPEATED    RECORD  
event_previous_timestamp    NULLABLE    INTEGER 
event_value_in_usd  NULLABLE    FLOAT   
event_bundle_sequence_id    NULLABLE    INTEGER 
event_server_timestamp_offset   NULLABLE    INTEGER 
user_id NULLABLE    STRING  
user_pseudo_id  NULLABLE    STRING  
privacy_info    NULLABLE    RECORD  
user_properties REPEATED    RECORD  
user_first_touch_timestamp  NULLABLE    INTEGER 
user_ltv    NULLABLE    RECORD  
device  NULLABLE    RECORD  
geo NULLABLE    RECORD  
app_info    NULLABLE    RECORD  
traffic_source  NULLABLE    RECORD  
stream_id   NULLABLE    STRING  
platform    NULLABLE    STRING  
event_dimensions    NULLABLE    RECORD  
ecommerce   NULLABLE    RECORD  
items   REPEATED    RECORD  
event_params.key    NULLABLE    STRING  
event_params.value  NULLABLE    RECORD  
event_params.value.string_value NULLABLE    STRING  
event_params.value.int_value    NULLABLE    INTEGER 
event_params.value.float_value  NULLABLE    FLOAT   
event_params.value.double_value NULLABLE    FLOAT   
privacy_info.analytics_storage  NULLABLE    STRING  
privacy_info.ads_storage    NULLABLE    STRING  
privacy_info.uses_transient_token   NULLABLE    STRING  
user_properties.key NULLABLE    STRING  
user_properties.value   NULLABLE    RECORD  
user_properties.value.string_value  NULLABLE    STRING  
user_properties.value.int_value NULLABLE    INTEGER 
user_properties.value.float_value   NULLABLE    FLOAT   
user_properties.value.double_value  NULLABLE    FLOAT   
user_properties.value.set_timestamp_micros  NULLABLE    INTEGER 
user_ltv.revenue    NULLABLE    FLOAT   
user_ltv.currency   NULLABLE    STRING  
device.category NULLABLE    STRING  
device.mobile_brand_name    NULLABLE    STRING  
device.mobile_model_name    NULLABLE    STRING  
device.mobile_marketing_name    NULLABLE    STRING  
device.mobile_os_hardware_model NULLABLE    STRING  
device.operating_system NULLABLE    STRING  
device.operating_system_version NULLABLE    STRING  
device.vendor_id    NULLABLE    STRING  
device.advertising_id   NULLABLE    STRING  
device.language NULLABLE    STRING  
device.is_limited_ad_tracking   NULLABLE    STRING  
device.time_zone_offset_seconds NULLABLE    INTEGER 
device.browser  NULLABLE    STRING  
device.browser_version  NULLABLE    STRING  
device.web_info NULLABLE    RECORD  
device.web_info.browser NULLABLE    STRING  
device.web_info.browser_version NULLABLE    STRING  
device.web_info.hostname    NULLABLE    STRING  
geo.continent   NULLABLE    STRING  
geo.country NULLABLE    STRING  
geo.region  NULLABLE    STRING  
geo.city    NULLABLE    STRING  
geo.sub_continent   NULLABLE    STRING  
geo.metro   NULLABLE    STRING  
app_info.id NULLABLE    STRING  
app_info.version    NULLABLE    STRING  
app_info.install_store  NULLABLE    STRING  
app_info.firebase_app_id    NULLABLE    STRING  
app_info.install_source NULLABLE    STRING  
traffic_source.name NULLABLE    STRING  
traffic_source.medium   NULLABLE    STRING  
traffic_source.source   NULLABLE    STRING  
event_dimensions.hostname   NULLABLE    STRING  
ecommerce.total_item_quantity   NULLABLE    INTEGER 
ecommerce.purchase_revenue_in_usd   NULLABLE    FLOAT   
ecommerce.purchase_revenue  NULLABLE    FLOAT   
ecommerce.refund_value_in_usd   NULLABLE    FLOAT   
ecommerce.refund_value  NULLABLE    FLOAT   
ecommerce.shipping_value_in_usd NULLABLE    FLOAT   
ecommerce.shipping_value    NULLABLE    FLOAT   
ecommerce.tax_value_in_usd  NULLABLE    FLOAT   
ecommerce.tax_value NULLABLE    FLOAT   
ecommerce.unique_items  NULLABLE    INTEGER 
ecommerce.transaction_id    NULLABLE    STRING  
items.item_id   NULLABLE    STRING  
items.item_name NULLABLE    STRING  
items.item_brand    NULLABLE    STRING  
items.item_variant  NULLABLE    STRING  
items.item_category NULLABLE    STRING  
items.item_category2    NULLABLE    STRING  
items.item_category3    NULLABLE    STRING  
items.item_category4    NULLABLE    STRING  
items.item_category5    NULLABLE    STRING  
items.price_in_usd  NULLABLE    FLOAT   
items.price NULLABLE    FLOAT   
items.quantity  NULLABLE    INTEGER 
items.item_revenue_in_usd   NULLABLE    FLOAT   
items.item_revenue  NULLABLE    FLOAT   
items.item_refund_in_usd    NULLABLE    FLOAT   
items.item_refund   NULLABLE    FLOAT   
items.coupon    NULLABLE    STRING  
items.affiliation   NULLABLE    STRING  
items.location_id   NULLABLE    STRING  
items.item_list_id  NULLABLE    STRING  
items.item_list_name    NULLABLE    STRING  
items.item_list_index   NULLABLE    STRING  
items.promotion_id  NULLABLE    STRING  
items.promotion_name    NULLABLE    STRING  
items.creative_name NULLABLE    STRING  
items.creative_slot NULLABLE    STRING  

Solution

  • BigQuery does not count exits so it has to guess based on last event, which is a different methodology to GA4 which means the number of exits will always be slightly different

    Here is a code to show the previous and next page visited showing entrances and exits

        with events as (
          select
         user_pseudo_id,
    --get seesion ID
         concat(
            user_pseudo_id,
             (
            select
              value.int_value
            from
              unnest(event_params)
            where
              key = 'ga_session_id'
          )
        ) as unique_session_id,
    
          (case when(
            select
              value.string_value
            from
              unnest(event_params)
            where
              key = 'session_engaged' AND event_name = 'page_view'
          ) ="1" then 1 else 0 end) as engaged_sessions_id,
        event_name,
        event_timestamp,
        regexp_replace(
      regexp_replace(
        (
          select
            p.value.string_value
          from
            unnest(event_params) as p
          where
            p.key = 'page_location'
        ),
        r'^https?://[^/]+',
        ''
      ),
      r'[\?].*',
      ''
    ) as page_path,
    event_date as date
    
    FROM 
    `bigquery-3.analytics_2.events_*`
    
    where
     _table_suffix between '20221129'and '20221130')
    
    select
    
      page_path,
      date,
      event_timestamp,
    
      -- look for the previous page_path
      if(
        event_name = 'page_view',
        coalesce(
          last_value(
            if(event_name = 'page_view', page_path, null) ignore nulls
          ) over(
            partition by unique_session_id
            order by
              event_timestamp asc rows between unbounded preceding
              and 1 preceding
          ),
          '(entrance)'
        ),
        null
      ) as previous_page,
      -- look for the next page_path
      if(
        event_name = 'page_view',
        coalesce(
          first_value(
            if(event_name = 'page_view', page_path, null) ignore nulls
          ) over(
            partition by unique_session_id
            order by
              event_timestamp asc rows between 1 following
              and unbounded following
          ),
          '(exit)'
        ),
        null
      ) as next_page
    from
      events
      where
     event_name = 'page_view'