Skip to content

Commit

Permalink
PWA 2021 queries (#2272)
Browse files Browse the repository at this point in the history
* Initial conversion of queries to custom metrics

* Better Lighthouse queries

* Remove comma

* Add Service Worker over time for top 1k sites query

* Change adoption ranking to include all ranking categories

* Formatting

* Linting fixes

* Spacing improvements

* Missing queries

* Query updates

* Missed some

* Remove s

* Space

* Misc updates

* More Tweaks

* Spacing fixes

* Fix assestlinks query

* Missing DESC

* Group rankings properly

* Linting fixes

* Remove unnecessary extra join

* Spacing

* Additional queries

* Linting fixes

* Update Events to look at onEvents

* Better array merge

* Misc fixes

* Better JS

* Filtering false positives for install query

* Fixes to events query

* Adding comment for install query

* Expand on comment

* Review feedback

* Linting fixes

* Correct comment

* Apply suggestions from code review

Co-authored-by: David Fox <[email protected]>

* Review feedback

* Review feedback

* Change to COUNT(DISTINCT url)

Co-authored-by: demianrenzulli <[email protected]>
Co-authored-by: David Fox <[email protected]>
  • Loading branch information
3 people authored Sep 20, 2021
1 parent 82c9eb3 commit fc694b3
Show file tree
Hide file tree
Showing 26 changed files with 1,406 additions and 0 deletions.
61 changes: 61 additions & 0 deletions sql/2021/pwa/assetlink_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#standardSQL
# assetlink usage

SELECT
'PWA sites' AS type,
_TABLE_SUFFIX AS client,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.pages.2021_07_01_*`
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2021_07_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]" AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true"
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true" AND
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]" AND
JSON_EXTRACT_SCALAR(JSON_VALUE(payload, "$._well-known"), "$['/.well-known/assetlinks.json'].found") = 'true'
GROUP BY
client,
total
UNION ALL
SELECT
'All sites' AS type,
_TABLE_SUFFIX AS client,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.pages.2021_07_01_*`
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
JSON_EXTRACT_SCALAR(JSON_VALUE(payload, "$._well-known"), "$['/.well-known/assetlinks.json'].found") = 'true'
GROUP BY
client,
total
ORDER BY
type DESC,
freq / total DESC,
client
69 changes: 69 additions & 0 deletions sql/2021/pwa/install_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#standardSQL
# SW install events

CREATE TEMPORARY FUNCTION getInstallEvents(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var payloadJSON = JSON.parse(payload);
/* YouTube iFrames account for a lot of these, so we exclude them */
/* Cannot use filter as it is a complex object and not a straight array */
function filterYouTube(info) {
var objectKeys = Object.keys(info);
objectKeys = objectKeys.trim().split(',');
for(var i = 0; i < objectKeys.length; i++) {
if(objectKeys[i].toLowerCase().includes('youtube')) {
delete info[objectKeys[i]];
}
}
return info;
}
var windowEventListenersInfo = Object.values(filterYouTube(payloadJSON.windowEventListenersInfo)).flat();
var windowPropertiesInfo = Object.values(filterYouTube(payloadJSON.windowPropertiesInfo)).flat()
return [...new Set([...windowEventListenersInfo ,...windowPropertiesInfo])];
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
install_event,
COUNT(DISTINCT url) AS freq,
total,
COUNT(DISTINCT url) / total AS pct
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(getInstallEvents(JSON_EXTRACT(payload, '$._pwa'))) AS install_event
JOIN
(
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.pages.2021_07_01_*`
WHERE
-- This condition filters out tests that might have broken when running the 'pwa' metric
-- as even pages without any pwa capabilities will have a _pwa object with empty fields
JSON_EXTRACT(payload, '$._pwa') != "[]"
GROUP BY
_TABLE_SUFFIX
)
USING (_TABLE_SUFFIX)
WHERE
(
JSON_EXTRACT(payload, '$._pwa.windowEventListenersInfo') != "[]" OR
JSON_EXTRACT(payload, '$._pwa.windowPropertiesInfo') != "[]"
) AND
install_event != '' AND
install_event != '[]'
GROUP BY
client,
total,
install_event
ORDER BY
freq / total DESC,
client
67 changes: 67 additions & 0 deletions sql/2021/pwa/lighthouse_pwa_audits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#standardSQL
# Get summary of all lighthouse PWA audits, for both PWA pages and all pages
# Note scores, weightings, groups and descriptions may be off in mixed months when new versions of Lighthouse roles out

CREATE TEMPORARY FUNCTION getAudits(auditRefs STRING, audits STRING)
RETURNS ARRAY<STRUCT<id STRING, weight INT64, audit_group STRING, title STRING, description STRING, score INT64>> LANGUAGE js AS '''
var auditrefs = JSON.parse(auditRefs);
var audits = JSON.parse(audits);
var results = [];
for (auditref of auditrefs) {
results.push({
id: auditref.id,
weight: auditref.weight,
audit_group: auditref.group,
description: audits[auditref.id].description,
score: audits[auditref.id].score
});
}
return results;
''';

SELECT
'PWA Sites' AS type,
audits.id AS id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score IS NOT NULL) AS total_applicable,
SAFE_DIVIDE(COUNTIF(audits.score > 0), COUNTIF(audits.score IS NOT NULL)) AS pct,
APPROX_QUANTILES(audits.weight, 100)[OFFSET(50)] AS median_weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2021_07_01_mobile`,
UNNEST(getAudits(JSON_EXTRACT(report, '$.categories.pwa.auditRefs'), JSON_EXTRACT(report, '$.audits'))) AS audits
JOIN
(
SELECT
url
FROM
`httparchive.pages.2021_07_01_mobile`
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true" AND
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]"
)
USING (url)
GROUP BY
audits.id
UNION ALL
SELECT
'ALL Sites' AS type,
audits.id AS id,
COUNTIF(audits.score > 0) AS num_pages,
COUNT(0) AS total,
COUNTIF(audits.score IS NOT NULL) AS total_applicable,
SAFE_DIVIDE(COUNTIF(audits.score > 0), COUNTIF(audits.score IS NOT NULL)) AS pct,
APPROX_QUANTILES(audits.weight, 100)[OFFSET(50)] AS median_weight,
MAX(audits.audit_group) AS audit_group,
MAX(audits.description) AS description
FROM
`httparchive.lighthouse.2021_07_01_mobile`,
UNNEST(getAudits(JSON_EXTRACT(report, '$.categories.pwa.auditRefs'), JSON_EXTRACT(report, '$.audits'))) AS audits
GROUP BY
audits.id
ORDER BY
type DESC,
median_weight DESC,
id
48 changes: 48 additions & 0 deletions sql/2021/pwa/lighthouse_pwa_score.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#standardSQL
# Percentiles of lighthouse pwa score
# This metric comes from Lighthouse only and is only available in mobile in HTTP Archive dataset
SELECT
'2021_07_01' AS date,
'PWA Sites' AS type,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] * 100 AS score
FROM (
SELECT
url,
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2021_07_01_mobile`)
JOIN
(
SELECT
url
FROM
`httparchive.pages.2021_07_01_mobile`
WHERE
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true" AND
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]"
)
USING (url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
date,
percentile
UNION ALL
SELECT
'2021_07_01' AS date,
'All Sites' AS type,
percentile,
APPROX_QUANTILES(score, 1000)[OFFSET(percentile * 10)] * 100 AS score
FROM (
SELECT
CAST(JSON_EXTRACT(report, '$.categories.pwa.score') AS NUMERIC) AS score
FROM
`httparchive.lighthouse.2021_07_01_mobile`),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
date,
percentile
ORDER BY
date,
type DESC,
percentile
21 changes: 21 additions & 0 deletions sql/2021/pwa/manifests_and_service_workers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#standardSQL
# Counting Manifests and Service Workers
SELECT
client,
SAFE_DIVIDE(SUM(ServiceWorker), SUM(COUNT(0)) OVER (PARTITION BY client)) AS ServiceWorkers,
SAFE_DIVIDE(SUM(manifests), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Manifests,
SAFE_DIVIDE(COUNTIF(ServiceWorker > 0 OR manifests > 0), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Either,
SAFE_DIVIDE(COUNTIF(ServiceWorker > 0 AND manifests > 0), SUM(COUNT(0)) OVER (PARTITION BY client)) AS Both,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total
FROM (
SELECT
_TABLE_SUFFIX AS client,
IF(JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true", 1, 0) AS ServiceWorker,
IF(JSON_EXTRACT(payload, '$._pwa.manifests') != "[]", 1, 0) AS manifests
FROM
`httparchive.pages.2021_07_01_*`
)
GROUP BY
client
ORDER BY
client
52 changes: 52 additions & 0 deletions sql/2021/pwa/manifests_not_json_parsable.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#standardSQL
# Manifests that are not JSON parsable for service worker pages and all pages

CREATE TEMP FUNCTION canParseManifest(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var manifestJSON = Object.values(JSON.parse(manifest))[0];
if (typeof manifestJSON === 'string' && manifestJSON.trim() != '') {
return false;
}
if (typeof manifestJSON === 'string' && manifestJSON.trim() === '') {
return null;
}
return true;
} catch {
return false;
}
''';

SELECT
'PWA Pages' AS type,
_TABLE_SUFFIX AS client,
canParseManifest(JSON_EXTRACT(payload, '$._pwa.manifests')) AS can_parse,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2021_07_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]" AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true"
GROUP BY
client,
can_parse
UNION ALL
SELECT
'All Pages' AS type,
_TABLE_SUFFIX AS client,
canParseManifest(JSON_EXTRACT(payload, '$._pwa.manifests')) AS can_parse,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
client,
can_parse
ORDER BY
type DESC,
freq / total DESC,
can_parse,
client
46 changes: 46 additions & 0 deletions sql/2021/pwa/manifests_preferring_native_apps.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#standardSQL
# % manifests preferring native apps for service worker pages and all pages

CREATE TEMP FUNCTION prefersNative(manifest STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = Object.values(JSON.parse(manifest))[0];
return $.prefer_related_applications == true && $.related_applications.length > 0;
} catch (e) {
return null;
}
''';

SELECT
'PWA Pages' AS type,
_TABLE_SUFFIX AS client,
prefersNative(JSON_EXTRACT(payload, '$._pwa.manifests')) AS prefersNative,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2021_07_01_*`
WHERE
JSON_EXTRACT(payload, '$._pwa.manifests') != "[]" AND
JSON_EXTRACT(payload, '$._pwa.serviceWorkerHeuristic') = "true"
GROUP BY
client,
prefersNative
UNION ALL
SELECT
'All Pages' AS type,
_TABLE_SUFFIX AS client,
prefersNative(JSON_EXTRACT(payload, '$._pwa.manifests')) AS prefersNative,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
client,
prefersNative
ORDER BY
type DESC,
freq / total DESC,
prefersNative,
client
Loading

0 comments on commit fc694b3

Please sign in to comment.