-
-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* CMS 2020 * WIP: Start on the 2020 CMS Queries * update to 2020-08-01 * Core Web Vitals by CMS * CWV distribution * Lighthouse * add results sheet URL to README * new query for CMS adoption YoY without client breakdown * Update from 2019 to 2020 Co-authored-by: Artem Denysov <[email protected]> Co-authored-by: Greg Brimble <[email protected]> Co-authored-by: Artem Denysov <[email protected]>
- Loading branch information
1 parent
226b8b9
commit d4286b1
Showing
12 changed files
with
487 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# CMS 2020 | ||
|
||
Related materials: | ||
|
||
- [Planning doc](https://docs.google.com/document/d/1XL87z5MOk7QDLhpFsaMWT3DaIXdOQV32qs57noGwDVQ/edit?usp=sharing) | ||
- [Results spreadsheet](https://docs.google.com/spreadsheets/d/1vTf459CcCbBuYeGvgo-RSidppR62SfM-VTkW-dfS3K4/edit?usp=sharing) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#standardSQL | ||
# Adoption of image formats in CMSs | ||
SELECT | ||
client, | ||
format, | ||
COUNT(0) AS freq, | ||
SUM(COUNT(0)) OVER (PARTITION BY client) AS total, | ||
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct | ||
FROM ( | ||
SELECT | ||
client, | ||
format, | ||
page | ||
FROM | ||
`httparchive.almanac.requests` | ||
WHERE | ||
date = '2020-08-01' AND | ||
type = 'image') | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url AS page | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
WHERE | ||
category = 'CMS') | ||
USING | ||
(client, page) | ||
GROUP BY | ||
client, | ||
format | ||
ORDER BY | ||
pct DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#standardSQL | ||
# CMS adoptions, compared to 2019 | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
2020 AS year, | ||
COUNT(0) AS freq, | ||
total, | ||
COUNT(0) / total AS pct | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.summary_pages.2020_08_01_*` | ||
GROUP BY | ||
_TABLE_SUFFIX) | ||
USING | ||
(_TABLE_SUFFIX) | ||
WHERE | ||
category = 'CMS' | ||
GROUP BY | ||
client, | ||
total | ||
UNION ALL | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
2019 AS year, | ||
COUNT(0) AS freq, | ||
total, | ||
COUNT(0) / total AS pct | ||
FROM | ||
`httparchive.technologies.2019_07_01_*` | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX, | ||
COUNT(0) AS total | ||
FROM | ||
`httparchive.summary_pages.2019_07_01_*` | ||
GROUP BY | ||
_TABLE_SUFFIX) | ||
USING | ||
(_TABLE_SUFFIX) | ||
WHERE | ||
category = 'CMS' | ||
GROUP BY | ||
client, | ||
total | ||
ORDER BY | ||
year DESC, | ||
pct DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#standardSQL | ||
# Core Web Vitals distribution by CMS | ||
# | ||
# Note that this is an unweighted average of all sites per CMS. | ||
# Performance of sites with millions of visitors as weighted the same as small sites. | ||
SELECT | ||
client, | ||
cms, | ||
COUNT(DISTINCT origin) AS origins, | ||
|
||
SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp, | ||
SUM(avg_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS ni_lcp, | ||
SUM(slow_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS poor_lcp, | ||
|
||
SUM(fast_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS good_fid, | ||
SUM(avg_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS ni_fid, | ||
SUM(slow_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS poor_fid, | ||
|
||
SUM(small_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS good_cls, | ||
SUM(medium_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS ni_cls, | ||
SUM(large_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS poor_cls, | ||
FROM | ||
`chrome-ux-report.materialized.device_summary` | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url, | ||
app AS cms | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
WHERE | ||
category = 'CMS') | ||
ON | ||
CONCAT(origin, '/') = url AND | ||
IF(device = 'desktop', 'desktop', 'mobile') = client | ||
WHERE | ||
# The CrUX 202008 dataset is not available until September 8. | ||
date = '2020-07-01' | ||
GROUP BY | ||
client, | ||
cms | ||
ORDER BY | ||
origins DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#standardSQL | ||
# Core Web Vitals performance by CMS | ||
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( | ||
good / (good + needs_improvement + poor) >= 0.75 | ||
); | ||
|
||
CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS ( | ||
good + needs_improvement + poor > 0 | ||
); | ||
|
||
|
||
SELECT | ||
client, | ||
cms, | ||
COUNT(DISTINCT origin) AS origins, | ||
# Origins with good LCP divided by origins with any LCP. | ||
SAFE_DIVIDE( | ||
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)), | ||
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp, | ||
|
||
# Origins with good FID divided by origins with any FID. | ||
SAFE_DIVIDE( | ||
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)), | ||
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid, | ||
|
||
# Origins with good CLS divided by origins with any CLS. | ||
SAFE_DIVIDE( | ||
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), | ||
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls, | ||
|
||
# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS. | ||
SAFE_DIVIDE( | ||
COUNT(DISTINCT IF( | ||
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND | ||
IS_GOOD(fast_fid, avg_fid, slow_fid) AND | ||
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)), | ||
COUNT(DISTINCT IF( | ||
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND | ||
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND | ||
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv | ||
FROM | ||
`chrome-ux-report.materialized.device_summary` | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url, | ||
app AS cms | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
WHERE | ||
category = 'CMS') | ||
ON | ||
CONCAT(origin, '/') = url AND | ||
IF(device = 'desktop', 'desktop', 'mobile') = client | ||
WHERE | ||
# The CrUX 202008 dataset is not available until September 8. | ||
date = '2020-07-01' | ||
GROUP BY | ||
client, | ||
cms | ||
ORDER BY | ||
origins DESC |
47 changes: 47 additions & 0 deletions
47
sql/2020/15_CMS/distribution_of_cms_page_kilobytes_per_resource_type.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#standardSQL | ||
# Distribution of CMS page kilobytes per resource type | ||
SELECT | ||
percentile, | ||
client, | ||
type, | ||
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests, | ||
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024, 2) AS kbytes | ||
FROM ( | ||
SELECT | ||
client, | ||
type, | ||
COUNT(0) AS requests, | ||
SUM(respSize) AS bytes | ||
FROM ( | ||
SELECT | ||
client, | ||
page, | ||
type, | ||
respSize | ||
FROM | ||
`httparchive.almanac.requests` | ||
WHERE | ||
date = '2020-08-01') | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
url AS page | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
WHERE | ||
category = 'CMS') | ||
USING | ||
(client, page) | ||
GROUP BY | ||
client, | ||
type, | ||
page), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
GROUP BY | ||
percentile, | ||
client, | ||
type | ||
ORDER BY | ||
percentile, | ||
client, | ||
kbytes DESC |
51 changes: 51 additions & 0 deletions
51
sql/2020/15_CMS/distribution_of_page_weight_requests_and_co2_grams_per_cms_web_page.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#standardSQL | ||
# Distribution of page weight, requests, and co2 grams per CMS web page | ||
# https://gitlab.com/wholegrain/carbon-api-2-0/-/blob/b498ec3bb239536d3612c5f3d758f46e0d2431a6/includes/carbonapi.php | ||
CREATE TEMP FUNCTION | ||
GREEN(url STRING) AS (FALSE); -- TODO: Investigate fetching from Green Web Foundation | ||
CREATE TEMP FUNCTION | ||
adjustDataTransfer(val INT64) AS (val * 0.75 + 0.02 * val * 0.25); | ||
CREATE TEMP FUNCTION | ||
energyConsumption(bytes FLOAT64) AS (bytes * 1.805 / 1073741824); | ||
CREATE TEMP FUNCTION | ||
getCo2Grid(energy FLOAT64) AS (energy * 475); | ||
CREATE TEMP FUNCTION | ||
getCo2Renewable(energy FLOAT64) AS (energy * 0.1008 * 33.4 + energy * 0.8992 * 475); | ||
CREATE TEMP FUNCTION | ||
CO2(url STRING, bytes INT64) AS ( | ||
IF | ||
(GREEN(url), | ||
getCo2Renewable(energyConsumption(adjustDataTransfer(bytes))), | ||
getCo2Grid(energyConsumption(adjustDataTransfer(bytes))))); | ||
|
||
SELECT | ||
percentile, | ||
client, | ||
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests, | ||
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024 / 1024, 2) AS mbytes, | ||
APPROX_QUANTILES(co2grams, 1000)[OFFSET(percentile * 10)] AS co2grams | ||
FROM ( | ||
SELECT | ||
_TABLE_SUFFIX AS client, | ||
reqTotal AS requests, | ||
bytesTotal AS bytes, | ||
CO2(url, bytesTotal) AS co2grams | ||
FROM | ||
`httparchive.summary_pages.2020_08_01_*` | ||
JOIN ( | ||
SELECT | ||
_TABLE_SUFFIX, | ||
url | ||
FROM | ||
`httparchive.technologies.2020_08_01_*` | ||
WHERE | ||
category = 'CMS') | ||
USING | ||
(_TABLE_SUFFIX, url)), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
GROUP BY | ||
percentile, | ||
client | ||
ORDER BY | ||
percentile, | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#standardSQL | ||
# Image bytes on CMSs | ||
SELECT | ||
percentile, | ||
_TABLE_SUFFIX AS client, | ||
APPROX_QUANTILES(reqImg, 1000)[OFFSET(percentile * 10)] AS image_count, | ||
APPROX_QUANTILES(bytesImg, 1000)[OFFSET(percentile * 10)] / 1024 AS image_kbytes | ||
FROM | ||
`httparchive.summary_pages.2020_08_01_*` | ||
JOIN | ||
`httparchive.technologies.2020_08_01_*` | ||
USING | ||
(_TABLE_SUFFIX, url), | ||
UNNEST([10, 25, 50, 75, 90]) AS percentile | ||
WHERE | ||
category = 'CMS' | ||
GROUP BY | ||
percentile, | ||
client | ||
ORDER BY | ||
percentile, | ||
client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#standardSQL | ||
# Lighthouse category scores per CMS | ||
SELECT | ||
app AS cms, | ||
COUNT(DISTINCT url) AS freq, | ||
# See https://github.com/HTTPArchive/almanac.httparchive.org/pull/1087#issuecomment-684983795 | ||
#APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.performance.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_performance, | ||
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.accessibility.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility, | ||
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.pwa.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_pwa | ||
FROM | ||
`httparchive.lighthouse.2020_08_01_mobile` | ||
JOIN | ||
`httparchive.technologies.2020_08_01_mobile` | ||
USING | ||
(url) | ||
WHERE | ||
category = 'CMS' | ||
GROUP BY | ||
cms | ||
ORDER BY | ||
freq DESC |
Oops, something went wrong.