I keep recreating queries from client to client, case to case. Let’s see if I can just use this blog as a future resource.
Query using SAFE_OFFSET
SAFE_OFFSET is great. I can’t remember the purpose of this query in particular (just posting here before deleting from my files), but I do remember SAFE_OFFSET.
with v as (
SELECT
--app_info.id
extract(date FROM timestamp_micros(event_timestamp) AT TIME ZONE "Japan" ) as date
, platform
, event_name
,if (ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)] = "news",
(SELECT item_name FROM UNNEST(items)),
ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)]) as promotion_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="creative_name") [SAFE_OFFSET(0)] AS creative_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] AS location_id
, count (event_name) as view_count
FROM `xxx-770.analytics_xxx159.events_*`
where event_name = "view_promotion"
-- and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,CURRENT_DATE()-1 )
and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,date_sub(CURRENT_DATE-1 ,INTERVAL 2 DAY ) )
and platform = "IOS"
and ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] = "home"
group by 1,2,3,4,5,6
--order by view_count desc
UNION ALL
SELECT
--app_info.id
extract(date FROM timestamp_micros(event_timestamp) AT TIME ZONE "Japan" ) as date
, platform
, event_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)] as promotion_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="creative_name") [SAFE_OFFSET(0)] AS creative_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] AS location_id
, count (event_name) as view_count
FROM `xxx-770.analytics_xxx59.events_*`
where event_name = "view_promotion"
-- and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,CURRENT_DATE()-1 )
and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,date_sub(CURRENT_DATE-1 ,INTERVAL 2 DAY ) )
and platform = "ANDROID"
and ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] = "home"
group by 1,2,3,4,5,6
--order by view_count desc
)
, s as(
SELECT
--app_info.id
extract(date FROM timestamp_micros(event_timestamp) AT TIME ZONE "Japan" ) as date
, platform
, event_name
,if (ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)] = "news",
(SELECT item_name FROM UNNEST(items)),
ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)]) as promotion_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="creative_name") [SAFE_OFFSET(0)] AS creative_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] AS location_id
, count (event_name) as select_count
FROM `xxxx-770.analytics_xxxx59.events_*`
where event_name = "select_promotion"
-- and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,CURRENT_DATE()-1 )
and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,date_sub(CURRENT_DATE-1 ,INTERVAL 2 DAY ) )
and platform = "IOS"
and ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] = "home"
group by 1,2,3,4,5,6
--order by select_count desc
UNION ALL
SELECT
--app_info.id
extract(date FROM timestamp_micros(event_timestamp) AT TIME ZONE "Japan" ) as date
, platform
, event_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="promotion_name") [SAFE_OFFSET(0)] as promotion_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="creative_name") [SAFE_OFFSET(0)] AS creative_name
, ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] AS location_id
, count (event_name) as select_count
FROM `xxxx.analytics_xxxx.events_*`
where event_name = "select_promotion"
-- and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,CURRENT_DATE()-1 )
and _TABLE_SUFFIX >= FORMAT_DATE("%Y%m%d" ,date_sub(CURRENT_DATE-1 ,INTERVAL 2 DAY ) )
and platform = "ANDROID"
and ARRAY(SELECT value.string_value FROM UNNEST(event_params) WHERE key="location_id") [SAFE_OFFSET(0)] = "home"
group by 1,2,3,4,5,6
--order by select_count desc
)
SELECT
v.date
, v.platform
, v.promotion_name
, v.creative_name
, v.location_id
, sum(view_count) as view_count
, sum(select_count) as select_count
, sum(purchase_count) as purchase_count
FROM v
FULL OUTER JOIN s ON CONCAT(v.date, v.platform, v.promotion_name, v.creative_name) =
CONCAT(s.date, s.platform, s.promotion_name, s.creative_name)
group by 1,2,3,4,5
order by view_count desc
SELECT from UNNEST
Very useful for when you just want a particular value from the nested values. The below is from BigQuery, Google Analytics App + Web (app) exports.
SELECT device.operating_system
, event_name
, (SELECT value.string_value from unnest (event_params) WHERE key = "service_method") as service_method
, count(event_name) as count
FROM table
WHERE event_name = "purchase"
Group by 1,2,3
Order by count desc
LIMIT 1000
Session data from visitors that, at some point, came from CPC
This query gets data from visitors that include a visit from CPC. I’m using this to troubleshoot broken sessions where the user starts with CPC and changes to a different source/medium creating an immediate new session.
SELECT
fullvisitorid,
CONCAT(CAST(fullvisitorId AS STRING),CAST(visitID AS STRING)) as uniqueVisitID,
timestamp_micros(visitStartTime),
hits.time,
hits.hitNumber,
trafficSource.medium,
trafficSource.source,
hits.type ,
hits.eventInfo.eventCategory ,
hits.eventInfo.eventAction
FROM
dataset1
,
UNNEST (hits) as hits
WHERE fullvisitorID IN (SELECT fullvisitorID
FROM dataset1
WHERE trafficSource.medium LIKE "cpc"
GROUP BY 1)
AND device.deviceCategory = "mobile"
ORDER BY fullvisitorID, uniqueVisitID, hits.hitNumber
LIMIT
1000
Getting the client’s UID from custom dimensions and the URL.
The background is a long story, but in essence I’m getting the value for the custom dimensions index 1 and 2 as well as searching for the “uid” parameter and extracting from the pagePath. The query is not ideal as it duplicates the hits to account for each custom dimension, but it answers the question.
SELECT
date,
fullVisitorId,
hits.hitNumber,
CONCAT(CAST(fullvisitorId AS STRING),CAST(visitID AS STRING)) AS uniqueVisitID,
CASE WHEN customDimensions.index = 1 THEN customDimensions.value END AS CD_1,
CASE WHEN customDimensions.index = 2 THEN customDimensions.value END AS CD_2,
MAX(REGEXP_EXTRACT(hits.page.pagePath, r"uid=([a-zA-Z0-9-.]+$)")) AS ID_in_URL,
hits.type,
hits.page.pagePath
FROM
dataset
,
UNNEST (customDimensions) AS customDimensions,
UNNEST (hits) AS hits
GROUP BY
date,
fullVisitorId,
hits.hitNumber,
uniqueVisitID,
CD_1,
CD_2,
hits.type,
hits.page.pagePath
ORDER BY
date,
uniqueVisitID,
hits.hitNumber
LIMIT
1000
Best way for UA, Custom Dimensions, I’ve used so far
Dividing the tables seems so simple… Likely applicable to GA4 as well.
SELECT extract (week from timestamp_seconds(withVisit.visitStartTime)) as week
, withVisit.device.deviceCategory
, withCD.index
--, withCD.value
--, eventInfo.eventCategory
--, eventInfo.eventAction
--, eventInfo.eventLabel
--, eventInfo.eventValue
, withHit.isInteraction
, count(withHit.isInteraction) as eventCount
FROM PROJECT.106561692.ga_sessions_2021071* withVisit
, unnest(withVisit.hits) as withHit
, unnest(withHit.customDimensions) as withCD
-- where device.deviceCategory = "mobile"
-- and eventInfo.eventCategory = "Product Page"
-- and hits.isInteraction = true
group by 1, 2, 3, 4--, 5--, 6,7
order by week, isInteraction, eventCount desc
LIMIT 10000
Leave a Reply