From 86504632871262d7bcd2cf8d05c7737934f471df Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 21 Dec 2025 19:20:56 +0100 Subject: [PATCH 1/2] REvert deletion of reports for adoption, core web vitals, lighthouse, page weight, and technologies --- .../output/reports/cwv_tech_adoption.js | 46 ++++ .../reports/cwv_tech_core_web_vitals.js | 114 +++++++++ .../output/reports/cwv_tech_lighthouse.js | 91 +++++++ .../output/reports/cwv_tech_page_weight.js | 81 ++++++ definitions/output/reports/technologies.js | 236 ++++++++++++++++++ 5 files changed, 568 insertions(+) create mode 100644 definitions/output/reports/cwv_tech_adoption.js create mode 100644 definitions/output/reports/cwv_tech_core_web_vitals.js create mode 100644 definitions/output/reports/cwv_tech_lighthouse.js create mode 100644 definitions/output/reports/cwv_tech_page_weight.js create mode 100644 definitions/output/reports/technologies.js diff --git a/definitions/output/reports/cwv_tech_adoption.js b/definitions/output/reports/cwv_tech_adoption.js new file mode 100644 index 00000000..b5cb4749 --- /dev/null +++ b/definitions/output/reports/cwv_tech_adoption.js @@ -0,0 +1,46 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('cwv_tech_adoption', { + schema: 'reports', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['rank', 'geo'] + }, + tags: ['crux_ready'] +}).preOps(ctx => ` +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; +`).query(ctx => ` +SELECT + date, + app AS technology, + rank, + geo, + STRUCT( + COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop, + COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile + ) AS adoption +FROM ${ctx.ref('core_web_vitals', 'technologies')} +WHERE date = '${pastMonth}' +GROUP BY + date, + app, + rank, + geo +`).postOps(ctx => ` + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "adoption", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" + }''' + ); + `) diff --git a/definitions/output/reports/cwv_tech_core_web_vitals.js b/definitions/output/reports/cwv_tech_core_web_vitals.js new file mode 100644 index 00000000..04336ec0 --- /dev/null +++ b/definitions/output/reports/cwv_tech_core_web_vitals.js @@ -0,0 +1,114 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('cwv_tech_core_web_vitals', { + schema: 'reports', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['rank', 'geo'] + }, + tags: ['crux_ready'] +}).preOps(ctx => ` +CREATE TEMPORARY FUNCTION GET_VITALS( + records ARRAY>) +RETURNS ARRAY, + mobile STRUCT< + good_number INT64, + tested INT64 +>>> +LANGUAGE js AS ''' +const METRIC_MAP = { + overall: ['origins_with_good_cwv', 'origins_eligible_for_cwv'], + LCP: ['origins_with_good_lcp', 'origins_with_any_lcp'], + CLS: ['origins_with_good_cls', 'origins_with_any_cls'], + FID: ['origins_with_good_fid', 'origins_with_any_fid'], + FCP: ['origins_with_good_fcp', 'origins_with_any_fcp'], + TTFB: ['origins_with_good_ttfb', 'origins_with_any_ttfb'], + INP: ['origins_with_good_inp', 'origins_with_any_inp'] +}; + +// Initialize the vitals map. +const vitals = Object.fromEntries( + Object.keys(METRIC_MAP).map(metricName => { + return [metricName, {name: metricName}] +})); + +// Populate each client record. +records.forEach(record => { + Object.entries(METRIC_MAP).forEach( + ([metricName, [good_number, tested]]) => { + vitals[metricName][record.client] = {good_number: record[good_number], tested: record[tested]} +})}) + +return Object.values(vitals) +'''; + +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; +`).query(ctx => ` +SELECT + date, + app AS technology, + rank, + geo, + GET_VITALS(ARRAY_AGG(STRUCT( + client, + origins_with_good_fid, + origins_with_good_cls, + origins_with_good_lcp, + origins_with_good_fcp, + origins_with_good_ttfb, + origins_with_good_inp, + origins_with_any_fid, + origins_with_any_cls, + origins_with_any_lcp, + origins_with_any_fcp, + origins_with_any_ttfb, + origins_with_any_inp, + origins_with_good_cwv, + origins_eligible_for_cwv + ))) AS vitals +FROM ${ctx.ref('core_web_vitals', 'technologies')} +WHERE date = '${pastMonth}' +GROUP BY + date, + app, + rank, + geo +`).postOps(ctx => ` + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "core_web_vitals", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" + }''' + ); + `) diff --git a/definitions/output/reports/cwv_tech_lighthouse.js b/definitions/output/reports/cwv_tech_lighthouse.js new file mode 100644 index 00000000..775a482d --- /dev/null +++ b/definitions/output/reports/cwv_tech_lighthouse.js @@ -0,0 +1,91 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('cwv_tech_lighthouse', { + schema: 'reports', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['rank', 'geo'] + }, + tags: ['crux_ready'] +}).preOps(ctx => ` +CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE( + records ARRAY>) +RETURNS ARRAY, + mobile STRUCT< + median_score FLOAT64 +>>> +LANGUAGE js AS ''' +const METRIC_MAP = { + accessibility: 'median_lighthouse_score_accessibility', + best_practices: 'median_lighthouse_score_best_practices', + performance: 'median_lighthouse_score_performance', + pwa: 'median_lighthouse_score_pwa', + seo: 'median_lighthouse_score_seo', +} + +// Initialize the Lighthouse map. +const lighthouse = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => { + return [metricName, {name: metricName}] +})); + +// Populate each client record. +records.forEach(record => { + Object.entries(METRIC_MAP).forEach(([metricName, median_score]) => { + lighthouse[metricName][record.client] = {median_score: record[median_score]} + }); +}); + +return Object.values(lighthouse) +'''; + +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; +`).query(ctx => ` +SELECT + date, + app AS technology, + rank, + geo, + GET_LIGHTHOUSE(ARRAY_AGG(STRUCT( + client, + median_lighthouse_score_accessibility, + median_lighthouse_score_best_practices, + median_lighthouse_score_performance, + median_lighthouse_score_pwa, + median_lighthouse_score_seo + ))) AS lighthouse +FROM ${ctx.ref('core_web_vitals', 'technologies')} +WHERE date = '${pastMonth}' +GROUP BY + date, + app, + rank, + geo +`).postOps(ctx => ` + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "lighthouse", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" + }''' + ); + `) diff --git a/definitions/output/reports/cwv_tech_page_weight.js b/definitions/output/reports/cwv_tech_page_weight.js new file mode 100644 index 00000000..ea9e58e6 --- /dev/null +++ b/definitions/output/reports/cwv_tech_page_weight.js @@ -0,0 +1,81 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('cwv_tech_page_weight', { + schema: 'reports', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['rank', 'geo'] + }, + tags: ['crux_ready'] +}).preOps(ctx => ` +CREATE TEMPORARY FUNCTION GET_PAGE_WEIGHT( + records ARRAY>) +RETURNS ARRAY, + desktop STRUCT< + median_bytes INT64 +>>> +LANGUAGE js AS ''' +const METRICS = ['total', 'js', 'images'] + +// Initialize the page weight map. +const pageWeight = Object.fromEntries(METRICS.map(metricName => { +return [metricName, {name: metricName}] +})) + +// Populate each client record. +records.forEach(record => { + METRICS.forEach(metricName => { + pageWeight[metricName][record.client] = {median_bytes: record[metricName]} + }) +}) + +return Object.values(pageWeight) +'''; + +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; +`).query(ctx => ` +SELECT + date, + app AS technology, + rank, + geo, + GET_PAGE_WEIGHT(ARRAY_AGG(STRUCT( + client, + median_bytes_total, + median_bytes_js, + median_bytes_image + ))) AS pageWeight +FROM ${ctx.ref('core_web_vitals', 'technologies')} +WHERE date = '${pastMonth}' +GROUP BY + date, + app, + rank, + geo +`).postOps(ctx => ` + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "page_weight", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" + }''' + ); + `) diff --git a/definitions/output/reports/technologies.js b/definitions/output/reports/technologies.js new file mode 100644 index 00000000..d307929a --- /dev/null +++ b/definitions/output/reports/technologies.js @@ -0,0 +1,236 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('technologies', { + schema: 'core_web_vitals', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['geo', 'app', 'rank', 'client'], + requirePartitionFilter: true + }, + tags: ['crux_ready'], + dependOnDependencyAssertions: true +}).preOps(ctx => ` +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; + +CREATE TEMP FUNCTION IS_GOOD( + good FLOAT64, + needs_improvement FLOAT64, + poor FLOAT64 +) RETURNS BOOL AS ( + SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75 +); + +CREATE TEMP FUNCTION IS_NON_ZERO( + good FLOAT64, + needs_improvement FLOAT64, + poor FLOAT64 +) RETURNS BOOL AS ( + good + needs_improvement + poor > 0 +); +`).query(ctx => ` +WITH geo_summary AS ( + SELECT + CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\\d{4})(\\d{2})', r'\\1-\\2-01') AS DATE) AS date, + * EXCEPT (country_code), + \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo + FROM ${ctx.ref('chrome-ux-report', 'materialized', 'country_summary')} + WHERE + yyyymm = CAST(FORMAT_DATE('%Y%m', '${pastMonth}') AS INT64) AND + device IN ('desktop', 'phone') +UNION ALL + SELECT + * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin), + 'ALL' AS geo + FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')} + WHERE + date = '${pastMonth}' AND + device IN ('desktop', 'phone') +), + +crux AS ( + SELECT + geo, + CASE _rank + WHEN 100000000 THEN 'ALL' + WHEN 10000000 THEN 'Top 10M' + WHEN 1000000 THEN 'Top 1M' + WHEN 100000 THEN 'Top 100k' + WHEN 10000 THEN 'Top 10k' + WHEN 1000 THEN 'Top 1k' + END AS rank, + CONCAT(origin, '/') AS root_page, + IF(device = 'desktop', 'desktop', 'mobile') AS client, + + # CWV + IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid, + IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid, + IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls, + IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls, + IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp, + IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp, + + (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND + IS_GOOD(small_cls, medium_cls, large_cls) AND + IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024, + + (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND + IS_GOOD(small_cls, medium_cls, large_cls) AND + IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023, + + # WV + IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp, + IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp, + IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb, + IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb, + IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp, + IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp + FROM geo_summary, + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank + WHERE rank <= _rank +), + +technologies AS ( + SELECT + technology.technology, + client, + page + FROM ${ctx.ref('crawl', 'pages')}, + UNNEST(technologies) AS technology + WHERE + date = '${pastMonth}' + ${constants.devRankFilter} AND + technology.technology IS NOT NULL AND + technology.technology != '' +UNION ALL + SELECT + 'ALL' AS technology, + client, + page + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${pastMonth}' + ${constants.devRankFilter} +), + +categories AS ( + SELECT + technology.technology, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category + FROM ${ctx.ref('crawl', 'pages')}, + UNNEST(technologies) AS technology, + UNNEST(technology.categories) AS category + WHERE + date = '${pastMonth}' + ${constants.devRankFilter} + GROUP BY technology +UNION ALL + SELECT + 'ALL' AS technology, + ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category + FROM ${ctx.ref('crawl', 'pages')}, + UNNEST(technologies) AS technology, + UNNEST(technology.categories) AS category + WHERE + date = '${pastMonth}' AND + client = 'mobile' + ${constants.devRankFilter} +), + +summary_stats AS ( + SELECT + client, + page, + root_page AS root_page, + SAFE.INT64(summary.bytesTotal) AS bytesTotal, + SAFE.INT64(summary.bytesJS) AS bytesJS, + SAFE.INT64(summary.bytesImg) AS bytesImg, + SAFE.FLOAT64(lighthouse.categories.accessibility.score) AS accessibility, + SAFE.FLOAT64(lighthouse.categories['best-practices'].score) AS best_practices, + SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance, + SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa, + SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${pastMonth}' + ${constants.devRankFilter} +), + +lab_data AS ( + SELECT + client, + root_page, + technology, + ANY_VALUE(category) AS category, + AVG(bytesTotal) AS bytesTotal, + AVG(bytesJS) AS bytesJS, + AVG(bytesImg) AS bytesImg, + AVG(accessibility) AS accessibility, + AVG(best_practices) AS best_practices, + AVG(performance) AS performance, + AVG(pwa) AS pwa, + AVG(seo) AS seo + FROM summary_stats + JOIN technologies + USING (client, page) + JOIN categories + USING (technology) + GROUP BY + client, + root_page, + technology +) + +SELECT + DATE('${pastMonth}') AS date, + geo, + rank, + ANY_VALUE(category) AS category, + technology AS app, + client, + COUNT(0) AS origins, + + # CrUX data + COUNTIF(good_fid) AS origins_with_good_fid, + COUNTIF(good_cls) AS origins_with_good_cls, + COUNTIF(good_lcp) AS origins_with_good_lcp, + COUNTIF(good_fcp) AS origins_with_good_fcp, + COUNTIF(good_ttfb) AS origins_with_good_ttfb, + COUNTIF(good_inp) AS origins_with_good_inp, + COUNTIF(any_fid) AS origins_with_any_fid, + COUNTIF(any_cls) AS origins_with_any_cls, + COUNTIF(any_lcp) AS origins_with_any_lcp, + COUNTIF(any_fcp) AS origins_with_any_fcp, + COUNTIF(any_ttfb) AS origins_with_any_ttfb, + COUNTIF(any_inp) AS origins_with_any_inp, + COUNTIF(good_cwv_2024) AS origins_with_good_cwv, + COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024, + COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023, + COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, + SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv, + SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024, + SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023, + + # Lighthouse data + SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_accessibility, + SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_best_practices, + SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_performance, + SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa, + SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo, + + # Page weight stats + SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total, + SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js, + SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image + +FROM lab_data +INNER JOIN crux +USING (client, root_page) +GROUP BY + app, + geo, + rank, + client +`) From e1f04bde63ad184e8fc7ae2a1cf2aa9ac963865c Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 21 Dec 2025 19:58:37 +0100 Subject: [PATCH 2/2] Remove deprecated CWV report files and add legacy export queries to tech-report-apis database --- .../output/reports/cwv_tech_adoption.js | 46 ---- .../reports/cwv_tech_core_web_vitals.js | 114 --------- .../output/reports/cwv_tech_lighthouse.js | 91 ------- .../output/reports/cwv_tech_page_weight.js | 81 ------ .../output/reports/tech_report_adoption.js | 16 ++ .../reports/tech_report_core_web_vitals.js | 15 ++ .../output/reports/tech_report_lighthouse.js | 15 ++ .../output/reports/tech_report_page_weight.js | 15 ++ definitions/output/reports/technologies.js | 236 ------------------ 9 files changed, 61 insertions(+), 568 deletions(-) delete mode 100644 definitions/output/reports/cwv_tech_adoption.js delete mode 100644 definitions/output/reports/cwv_tech_core_web_vitals.js delete mode 100644 definitions/output/reports/cwv_tech_lighthouse.js delete mode 100644 definitions/output/reports/cwv_tech_page_weight.js delete mode 100644 definitions/output/reports/technologies.js diff --git a/definitions/output/reports/cwv_tech_adoption.js b/definitions/output/reports/cwv_tech_adoption.js deleted file mode 100644 index b5cb4749..00000000 --- a/definitions/output/reports/cwv_tech_adoption.js +++ /dev/null @@ -1,46 +0,0 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - -publish('cwv_tech_adoption', { - schema: 'reports', - type: 'incremental', - protected: true, - bigquery: { - partitionBy: 'date', - clusterBy: ['rank', 'geo'] - }, - tags: ['crux_ready'] -}).preOps(ctx => ` -DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; -`).query(ctx => ` -SELECT - date, - app AS technology, - rank, - geo, - STRUCT( - COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop, - COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile - ) AS adoption -FROM ${ctx.ref('core_web_vitals', 'technologies')} -WHERE date = '${pastMonth}' -GROUP BY - date, - app, - rank, - geo -`).postOps(ctx => ` - SELECT - reports.run_export_job( - JSON '''{ - "destination": "firestore", - "config": { - "database": "tech-report-apis-${constants.environment}", - "collection": "adoption", - "type": "report", - "date": "${pastMonth}" - }, - "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" - }''' - ); - `) diff --git a/definitions/output/reports/cwv_tech_core_web_vitals.js b/definitions/output/reports/cwv_tech_core_web_vitals.js deleted file mode 100644 index 04336ec0..00000000 --- a/definitions/output/reports/cwv_tech_core_web_vitals.js +++ /dev/null @@ -1,114 +0,0 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - -publish('cwv_tech_core_web_vitals', { - schema: 'reports', - type: 'incremental', - protected: true, - bigquery: { - partitionBy: 'date', - clusterBy: ['rank', 'geo'] - }, - tags: ['crux_ready'] -}).preOps(ctx => ` -CREATE TEMPORARY FUNCTION GET_VITALS( - records ARRAY>) -RETURNS ARRAY, - mobile STRUCT< - good_number INT64, - tested INT64 ->>> -LANGUAGE js AS ''' -const METRIC_MAP = { - overall: ['origins_with_good_cwv', 'origins_eligible_for_cwv'], - LCP: ['origins_with_good_lcp', 'origins_with_any_lcp'], - CLS: ['origins_with_good_cls', 'origins_with_any_cls'], - FID: ['origins_with_good_fid', 'origins_with_any_fid'], - FCP: ['origins_with_good_fcp', 'origins_with_any_fcp'], - TTFB: ['origins_with_good_ttfb', 'origins_with_any_ttfb'], - INP: ['origins_with_good_inp', 'origins_with_any_inp'] -}; - -// Initialize the vitals map. -const vitals = Object.fromEntries( - Object.keys(METRIC_MAP).map(metricName => { - return [metricName, {name: metricName}] -})); - -// Populate each client record. -records.forEach(record => { - Object.entries(METRIC_MAP).forEach( - ([metricName, [good_number, tested]]) => { - vitals[metricName][record.client] = {good_number: record[good_number], tested: record[tested]} -})}) - -return Object.values(vitals) -'''; - -DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; -`).query(ctx => ` -SELECT - date, - app AS technology, - rank, - geo, - GET_VITALS(ARRAY_AGG(STRUCT( - client, - origins_with_good_fid, - origins_with_good_cls, - origins_with_good_lcp, - origins_with_good_fcp, - origins_with_good_ttfb, - origins_with_good_inp, - origins_with_any_fid, - origins_with_any_cls, - origins_with_any_lcp, - origins_with_any_fcp, - origins_with_any_ttfb, - origins_with_any_inp, - origins_with_good_cwv, - origins_eligible_for_cwv - ))) AS vitals -FROM ${ctx.ref('core_web_vitals', 'technologies')} -WHERE date = '${pastMonth}' -GROUP BY - date, - app, - rank, - geo -`).postOps(ctx => ` - SELECT - reports.run_export_job( - JSON '''{ - "destination": "firestore", - "config": { - "database": "tech-report-apis-${constants.environment}", - "collection": "core_web_vitals", - "type": "report", - "date": "${pastMonth}" - }, - "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" - }''' - ); - `) diff --git a/definitions/output/reports/cwv_tech_lighthouse.js b/definitions/output/reports/cwv_tech_lighthouse.js deleted file mode 100644 index 775a482d..00000000 --- a/definitions/output/reports/cwv_tech_lighthouse.js +++ /dev/null @@ -1,91 +0,0 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - -publish('cwv_tech_lighthouse', { - schema: 'reports', - type: 'incremental', - protected: true, - bigquery: { - partitionBy: 'date', - clusterBy: ['rank', 'geo'] - }, - tags: ['crux_ready'] -}).preOps(ctx => ` -CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE( - records ARRAY>) -RETURNS ARRAY, - mobile STRUCT< - median_score FLOAT64 ->>> -LANGUAGE js AS ''' -const METRIC_MAP = { - accessibility: 'median_lighthouse_score_accessibility', - best_practices: 'median_lighthouse_score_best_practices', - performance: 'median_lighthouse_score_performance', - pwa: 'median_lighthouse_score_pwa', - seo: 'median_lighthouse_score_seo', -} - -// Initialize the Lighthouse map. -const lighthouse = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => { - return [metricName, {name: metricName}] -})); - -// Populate each client record. -records.forEach(record => { - Object.entries(METRIC_MAP).forEach(([metricName, median_score]) => { - lighthouse[metricName][record.client] = {median_score: record[median_score]} - }); -}); - -return Object.values(lighthouse) -'''; - -DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; -`).query(ctx => ` -SELECT - date, - app AS technology, - rank, - geo, - GET_LIGHTHOUSE(ARRAY_AGG(STRUCT( - client, - median_lighthouse_score_accessibility, - median_lighthouse_score_best_practices, - median_lighthouse_score_performance, - median_lighthouse_score_pwa, - median_lighthouse_score_seo - ))) AS lighthouse -FROM ${ctx.ref('core_web_vitals', 'technologies')} -WHERE date = '${pastMonth}' -GROUP BY - date, - app, - rank, - geo -`).postOps(ctx => ` - SELECT - reports.run_export_job( - JSON '''{ - "destination": "firestore", - "config": { - "database": "tech-report-apis-${constants.environment}", - "collection": "lighthouse", - "type": "report", - "date": "${pastMonth}" - }, - "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" - }''' - ); - `) diff --git a/definitions/output/reports/cwv_tech_page_weight.js b/definitions/output/reports/cwv_tech_page_weight.js deleted file mode 100644 index ea9e58e6..00000000 --- a/definitions/output/reports/cwv_tech_page_weight.js +++ /dev/null @@ -1,81 +0,0 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - -publish('cwv_tech_page_weight', { - schema: 'reports', - type: 'incremental', - protected: true, - bigquery: { - partitionBy: 'date', - clusterBy: ['rank', 'geo'] - }, - tags: ['crux_ready'] -}).preOps(ctx => ` -CREATE TEMPORARY FUNCTION GET_PAGE_WEIGHT( - records ARRAY>) -RETURNS ARRAY, - desktop STRUCT< - median_bytes INT64 ->>> -LANGUAGE js AS ''' -const METRICS = ['total', 'js', 'images'] - -// Initialize the page weight map. -const pageWeight = Object.fromEntries(METRICS.map(metricName => { -return [metricName, {name: metricName}] -})) - -// Populate each client record. -records.forEach(record => { - METRICS.forEach(metricName => { - pageWeight[metricName][record.client] = {median_bytes: record[metricName]} - }) -}) - -return Object.values(pageWeight) -'''; - -DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; -`).query(ctx => ` -SELECT - date, - app AS technology, - rank, - geo, - GET_PAGE_WEIGHT(ARRAY_AGG(STRUCT( - client, - median_bytes_total, - median_bytes_js, - median_bytes_image - ))) AS pageWeight -FROM ${ctx.ref('core_web_vitals', 'technologies')} -WHERE date = '${pastMonth}' -GROUP BY - date, - app, - rank, - geo -`).postOps(ctx => ` - SELECT - reports.run_export_job( - JSON '''{ - "destination": "firestore", - "config": { - "database": "tech-report-apis-${constants.environment}", - "collection": "page_weight", - "type": "report", - "date": "${pastMonth}" - }, - "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" - }''' - ); - `) diff --git a/definitions/output/reports/tech_report_adoption.js b/definitions/output/reports/tech_report_adoption.js index 528cb00c..81303acc 100644 --- a/definitions/output/reports/tech_report_adoption.js +++ b/definitions/output/reports/tech_report_adoption.js @@ -45,4 +45,20 @@ SELECT "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" }''' ); + +// legacy export to tech-report-apis database +SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "adoption", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date, version) FROM ${ctx.self()} WHERE date = '${pastMonth}' AND version = 'ALL'" + }''' + ); + `) diff --git a/definitions/output/reports/tech_report_core_web_vitals.js b/definitions/output/reports/tech_report_core_web_vitals.js index 8a358536..18d7f10a 100644 --- a/definitions/output/reports/tech_report_core_web_vitals.js +++ b/definitions/output/reports/tech_report_core_web_vitals.js @@ -113,4 +113,19 @@ GROUP BY "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" }''' ); + + // legacy export to tech-report-apis database + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-api-${constants.environment}", + "collection": "core_web_vitals", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date, version) FROM ${ctx.self()} WHERE date = '${pastMonth}' AND version = 'ALL'" + }''' + ); `) diff --git a/definitions/output/reports/tech_report_lighthouse.js b/definitions/output/reports/tech_report_lighthouse.js index 37c6a89f..2e8f7dcf 100644 --- a/definitions/output/reports/tech_report_lighthouse.js +++ b/definitions/output/reports/tech_report_lighthouse.js @@ -89,4 +89,19 @@ GROUP BY "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" }''' ); + + // legacy export to tech-report-apis database + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-api-${constants.environment}", + "collection": "lighthouse", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date, version) FROM ${ctx.self()} WHERE date = '${pastMonth}' AND version = 'ALL'" + }''' + ); `) diff --git a/definitions/output/reports/tech_report_page_weight.js b/definitions/output/reports/tech_report_page_weight.js index 3281961c..0baa7d98 100644 --- a/definitions/output/reports/tech_report_page_weight.js +++ b/definitions/output/reports/tech_report_page_weight.js @@ -82,4 +82,19 @@ GROUP BY "query": "SELECT STRING(date) AS date, * EXCEPT(date) FROM ${ctx.self()} WHERE date = '${pastMonth}'" }''' ); + + // legacy export for tech-report-apis + SELECT + reports.run_export_job( + JSON '''{ + "destination": "firestore", + "config": { + "database": "tech-report-apis-${constants.environment}", + "collection": "page_weight", + "type": "report", + "date": "${pastMonth}" + }, + "query": "SELECT STRING(date) AS date, * EXCEPT(date, version) FROM ${ctx.self()} WHERE date = '${pastMonth}' AND version = 'ALL'" + }''' + ); `) diff --git a/definitions/output/reports/technologies.js b/definitions/output/reports/technologies.js deleted file mode 100644 index d307929a..00000000 --- a/definitions/output/reports/technologies.js +++ /dev/null @@ -1,236 +0,0 @@ -const pastMonth = constants.fnPastMonth(constants.currentMonth) - -publish('technologies', { - schema: 'core_web_vitals', - type: 'incremental', - protected: true, - bigquery: { - partitionBy: 'date', - clusterBy: ['geo', 'app', 'rank', 'client'], - requirePartitionFilter: true - }, - tags: ['crux_ready'], - dependOnDependencyAssertions: true -}).preOps(ctx => ` -DELETE FROM ${ctx.self()} -WHERE date = '${pastMonth}'; - -CREATE TEMP FUNCTION IS_GOOD( - good FLOAT64, - needs_improvement FLOAT64, - poor FLOAT64 -) RETURNS BOOL AS ( - SAFE_DIVIDE(good, good + needs_improvement + poor) >= 0.75 -); - -CREATE TEMP FUNCTION IS_NON_ZERO( - good FLOAT64, - needs_improvement FLOAT64, - poor FLOAT64 -) RETURNS BOOL AS ( - good + needs_improvement + poor > 0 -); -`).query(ctx => ` -WITH geo_summary AS ( - SELECT - CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\\d{4})(\\d{2})', r'\\1-\\2-01') AS DATE) AS date, - * EXCEPT (country_code), - \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo - FROM ${ctx.ref('chrome-ux-report', 'materialized', 'country_summary')} - WHERE - yyyymm = CAST(FORMAT_DATE('%Y%m', '${pastMonth}') AS INT64) AND - device IN ('desktop', 'phone') -UNION ALL - SELECT - * EXCEPT (yyyymmdd, p75_fid_origin, p75_cls_origin, p75_lcp_origin, p75_inp_origin), - 'ALL' AS geo - FROM ${ctx.ref('chrome-ux-report', 'materialized', 'device_summary')} - WHERE - date = '${pastMonth}' AND - device IN ('desktop', 'phone') -), - -crux AS ( - SELECT - geo, - CASE _rank - WHEN 100000000 THEN 'ALL' - WHEN 10000000 THEN 'Top 10M' - WHEN 1000000 THEN 'Top 1M' - WHEN 100000 THEN 'Top 100k' - WHEN 10000 THEN 'Top 10k' - WHEN 1000 THEN 'Top 1k' - END AS rank, - CONCAT(origin, '/') AS root_page, - IF(device = 'desktop', 'desktop', 'mobile') AS client, - - # CWV - IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AS any_fid, - IS_GOOD(fast_fid, avg_fid, slow_fid) AS good_fid, - IS_NON_ZERO(small_cls, medium_cls, large_cls) AS any_cls, - IS_GOOD(small_cls, medium_cls, large_cls) AS good_cls, - IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AS any_lcp, - IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_lcp, - - (IS_GOOD(fast_inp, avg_inp, slow_inp) OR fast_inp IS NULL) AND - IS_GOOD(small_cls, medium_cls, large_cls) AND - IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2024, - - (IS_GOOD(fast_fid, avg_fid, slow_fid) OR fast_fid IS NULL) AND - IS_GOOD(small_cls, medium_cls, large_cls) AND - IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AS good_cwv_2023, - - # WV - IS_NON_ZERO(fast_fcp, avg_fcp, slow_fcp) AS any_fcp, - IS_GOOD(fast_fcp, avg_fcp, slow_fcp) AS good_fcp, - IS_NON_ZERO(fast_ttfb, avg_ttfb, slow_ttfb) AS any_ttfb, - IS_GOOD(fast_ttfb, avg_ttfb, slow_ttfb) AS good_ttfb, - IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AS any_inp, - IS_GOOD(fast_inp, avg_inp, slow_inp) AS good_inp - FROM geo_summary, - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank - WHERE rank <= _rank -), - -technologies AS ( - SELECT - technology.technology, - client, - page - FROM ${ctx.ref('crawl', 'pages')}, - UNNEST(technologies) AS technology - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} AND - technology.technology IS NOT NULL AND - technology.technology != '' -UNION ALL - SELECT - 'ALL' AS technology, - client, - page - FROM ${ctx.ref('crawl', 'pages')} - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} -), - -categories AS ( - SELECT - technology.technology, - ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category - FROM ${ctx.ref('crawl', 'pages')}, - UNNEST(technologies) AS technology, - UNNEST(technology.categories) AS category - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} - GROUP BY technology -UNION ALL - SELECT - 'ALL' AS technology, - ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category - FROM ${ctx.ref('crawl', 'pages')}, - UNNEST(technologies) AS technology, - UNNEST(technology.categories) AS category - WHERE - date = '${pastMonth}' AND - client = 'mobile' - ${constants.devRankFilter} -), - -summary_stats AS ( - SELECT - client, - page, - root_page AS root_page, - SAFE.INT64(summary.bytesTotal) AS bytesTotal, - SAFE.INT64(summary.bytesJS) AS bytesJS, - SAFE.INT64(summary.bytesImg) AS bytesImg, - SAFE.FLOAT64(lighthouse.categories.accessibility.score) AS accessibility, - SAFE.FLOAT64(lighthouse.categories['best-practices'].score) AS best_practices, - SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance, - SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa, - SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo - FROM ${ctx.ref('crawl', 'pages')} - WHERE - date = '${pastMonth}' - ${constants.devRankFilter} -), - -lab_data AS ( - SELECT - client, - root_page, - technology, - ANY_VALUE(category) AS category, - AVG(bytesTotal) AS bytesTotal, - AVG(bytesJS) AS bytesJS, - AVG(bytesImg) AS bytesImg, - AVG(accessibility) AS accessibility, - AVG(best_practices) AS best_practices, - AVG(performance) AS performance, - AVG(pwa) AS pwa, - AVG(seo) AS seo - FROM summary_stats - JOIN technologies - USING (client, page) - JOIN categories - USING (technology) - GROUP BY - client, - root_page, - technology -) - -SELECT - DATE('${pastMonth}') AS date, - geo, - rank, - ANY_VALUE(category) AS category, - technology AS app, - client, - COUNT(0) AS origins, - - # CrUX data - COUNTIF(good_fid) AS origins_with_good_fid, - COUNTIF(good_cls) AS origins_with_good_cls, - COUNTIF(good_lcp) AS origins_with_good_lcp, - COUNTIF(good_fcp) AS origins_with_good_fcp, - COUNTIF(good_ttfb) AS origins_with_good_ttfb, - COUNTIF(good_inp) AS origins_with_good_inp, - COUNTIF(any_fid) AS origins_with_any_fid, - COUNTIF(any_cls) AS origins_with_any_cls, - COUNTIF(any_lcp) AS origins_with_any_lcp, - COUNTIF(any_fcp) AS origins_with_any_fcp, - COUNTIF(any_ttfb) AS origins_with_any_ttfb, - COUNTIF(any_inp) AS origins_with_any_inp, - COUNTIF(good_cwv_2024) AS origins_with_good_cwv, - COUNTIF(good_cwv_2024) AS origins_with_good_cwv_2024, - COUNTIF(good_cwv_2023) AS origins_with_good_cwv_2023, - COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, - SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv, - SAFE_DIVIDE(COUNTIF(good_cwv_2024), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2024, - SAFE_DIVIDE(COUNTIF(good_cwv_2023), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv_2023, - - # Lighthouse data - SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_accessibility, - SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_best_practices, - SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_performance, - SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa, - SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo, - - # Page weight stats - SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total, - SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js, - SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image - -FROM lab_data -INNER JOIN crux -USING (client, root_page) -GROUP BY - app, - geo, - rank, - client -`)