Skip to content

Commit

Permalink
CMS 2020 queries (#1087)
Browse files Browse the repository at this point in the history
* CMS 2020

* WIP: Start on the 2020 CMS Queries

* update to 2020-08-01

* Core Web Vitals by CMS

* CWV distribution

* Lighthouse

* add results sheet URL to README

* new query for CMS adoption YoY without client breakdown

* Update from 2019 to 2020

Co-authored-by: Artem Denysov <[email protected]>

Co-authored-by: Greg Brimble <[email protected]>
Co-authored-by: Artem Denysov <[email protected]>
  • Loading branch information
3 people authored and sudheendrachari committed Sep 15, 2020
1 parent 226b8b9 commit d4286b1
Show file tree
Hide file tree
Showing 12 changed files with 487 additions and 0 deletions.
6 changes: 6 additions & 0 deletions sql/2020/15_CMS/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# CMS 2020

Related materials:

- [Planning doc](https://docs.google.com/document/d/1XL87z5MOk7QDLhpFsaMWT3DaIXdOQV32qs57noGwDVQ/edit?usp=sharing)
- [Results spreadsheet](https://docs.google.com/spreadsheets/d/1vTf459CcCbBuYeGvgo-RSidppR62SfM-VTkW-dfS3K4/edit?usp=sharing)
33 changes: 33 additions & 0 deletions sql/2020/15_CMS/adoption_of_image_formats_in_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#standardSQL
# Adoption of image formats in CMSs
SELECT
client,
format,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
format,
page
FROM
`httparchive.almanac.requests`
WHERE
date = '2020-08-01' AND
type = 'image')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(client, page)
GROUP BY
client,
format
ORDER BY
pct DESC
52 changes: 52 additions & 0 deletions sql/2020/15_CMS/cms_adoption_compared_to_2019.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#standardSQL
# CMS adoptions, compared to 2019
SELECT
_TABLE_SUFFIX AS client,
2020 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2020_08_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
UNION ALL
SELECT
_TABLE_SUFFIX AS client,
2019 AS year,
COUNT(0) AS freq,
total,
COUNT(0) / total AS pct
FROM
`httparchive.technologies.2019_07_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2019_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(_TABLE_SUFFIX)
WHERE
category = 'CMS'
GROUP BY
client,
total
ORDER BY
year DESC,
pct DESC
43 changes: 43 additions & 0 deletions sql/2020/15_CMS/core_web_vitals_distribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#standardSQL
# Core Web Vitals distribution by CMS
#
# Note that this is an unweighted average of all sites per CMS.
# Performance of sites with millions of visitors as weighted the same as small sites.
SELECT
client,
cms,
COUNT(DISTINCT origin) AS origins,

SUM(fast_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS good_lcp,
SUM(avg_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS ni_lcp,
SUM(slow_lcp) / (SUM(fast_lcp) + SUM(avg_lcp) + SUM(slow_lcp)) AS poor_lcp,

SUM(fast_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS good_fid,
SUM(avg_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS ni_fid,
SUM(slow_fid) / (SUM(fast_fid) + SUM(avg_fid) + SUM(slow_fid)) AS poor_fid,

SUM(small_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS good_cls,
SUM(medium_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS ni_cls,
SUM(large_cls) / (SUM(small_cls) + SUM(medium_cls) + SUM(large_cls)) AS poor_cls,
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
app AS cms
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-07-01'
GROUP BY
client,
cms
ORDER BY
origins DESC
62 changes: 62 additions & 0 deletions sql/2020/15_CMS/core_web_vitals_passing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#standardSQL
# Core Web Vitals performance by CMS
CREATE TEMP FUNCTION IS_GOOD (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good / (good + needs_improvement + poor) >= 0.75
);

CREATE TEMP FUNCTION IS_NON_ZERO (good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);


SELECT
client,
cms,
COUNT(DISTINCT origin) AS origins,
# Origins with good LCP divided by origins with any LCP.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))) AS pct_good_lcp,

# Origins with good FID divided by origins with any FID.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(fast_fid, avg_fid, slow_fid), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(fast_fid, avg_fid, slow_fid), origin, NULL))) AS pct_good_fid,

# Origins with good CLS divided by origins with any CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cls,

# Origins with good LCP, FID, and CLS dividied by origins with any LCP, FID, and CLS.
SAFE_DIVIDE(
COUNT(DISTINCT IF(
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
IS_GOOD(fast_fid, avg_fid, slow_fid) AND
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
COUNT(DISTINCT IF(
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
IS_NON_ZERO(fast_fid, avg_fid, slow_fid) AND
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))) AS pct_good_cwv
FROM
`chrome-ux-report.materialized.device_summary`
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url,
app AS cms
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
ON
CONCAT(origin, '/') = url AND
IF(device = 'desktop', 'desktop', 'mobile') = client
WHERE
# The CrUX 202008 dataset is not available until September 8.
date = '2020-07-01'
GROUP BY
client,
cms
ORDER BY
origins DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#standardSQL
# Distribution of CMS page kilobytes per resource type
SELECT
percentile,
client,
type,
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024, 2) AS kbytes
FROM (
SELECT
client,
type,
COUNT(0) AS requests,
SUM(respSize) AS bytes
FROM (
SELECT
client,
page,
type,
respSize
FROM
`httparchive.almanac.requests`
WHERE
date = '2020-08-01')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(client, page)
GROUP BY
client,
type,
page),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client,
type
ORDER BY
percentile,
client,
kbytes DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#standardSQL
# Distribution of page weight, requests, and co2 grams per CMS web page
# https://gitlab.com/wholegrain/carbon-api-2-0/-/blob/b498ec3bb239536d3612c5f3d758f46e0d2431a6/includes/carbonapi.php
CREATE TEMP FUNCTION
GREEN(url STRING) AS (FALSE); -- TODO: Investigate fetching from Green Web Foundation
CREATE TEMP FUNCTION
adjustDataTransfer(val INT64) AS (val * 0.75 + 0.02 * val * 0.25);
CREATE TEMP FUNCTION
energyConsumption(bytes FLOAT64) AS (bytes * 1.805 / 1073741824);
CREATE TEMP FUNCTION
getCo2Grid(energy FLOAT64) AS (energy * 475);
CREATE TEMP FUNCTION
getCo2Renewable(energy FLOAT64) AS (energy * 0.1008 * 33.4 + energy * 0.8992 * 475);
CREATE TEMP FUNCTION
CO2(url STRING, bytes INT64) AS (
IF
(GREEN(url),
getCo2Renewable(energyConsumption(adjustDataTransfer(bytes))),
getCo2Grid(energyConsumption(adjustDataTransfer(bytes)))));

SELECT
percentile,
client,
APPROX_QUANTILES(requests, 1000)[OFFSET(percentile * 10)] AS requests,
ROUND(APPROX_QUANTILES(bytes, 1000)[OFFSET(percentile * 10)] / 1024 / 1024, 2) AS mbytes,
APPROX_QUANTILES(co2grams, 1000)[OFFSET(percentile * 10)] AS co2grams
FROM (
SELECT
_TABLE_SUFFIX AS client,
reqTotal AS requests,
bytesTotal AS bytes,
CO2(url, bytesTotal) AS co2grams
FROM
`httparchive.summary_pages.2020_08_01_*`
JOIN (
SELECT
_TABLE_SUFFIX,
url
FROM
`httparchive.technologies.2020_08_01_*`
WHERE
category = 'CMS')
USING
(_TABLE_SUFFIX, url)),
UNNEST([10, 25, 50, 75, 90]) AS percentile
GROUP BY
percentile,
client
ORDER BY
percentile,
client
22 changes: 22 additions & 0 deletions sql/2020/15_CMS/image_bytes_on_cmss.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#standardSQL
# Image bytes on CMSs
SELECT
percentile,
_TABLE_SUFFIX AS client,
APPROX_QUANTILES(reqImg, 1000)[OFFSET(percentile * 10)] AS image_count,
APPROX_QUANTILES(bytesImg, 1000)[OFFSET(percentile * 10)] / 1024 AS image_kbytes
FROM
`httparchive.summary_pages.2020_08_01_*`
JOIN
`httparchive.technologies.2020_08_01_*`
USING
(_TABLE_SUFFIX, url),
UNNEST([10, 25, 50, 75, 90]) AS percentile
WHERE
category = 'CMS'
GROUP BY
percentile,
client
ORDER BY
percentile,
client
21 changes: 21 additions & 0 deletions sql/2020/15_CMS/median_lighthouse_score.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#standardSQL
# Lighthouse category scores per CMS
SELECT
app AS cms,
COUNT(DISTINCT url) AS freq,
# See https://github.com/HTTPArchive/almanac.httparchive.org/pull/1087#issuecomment-684983795
#APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.performance.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_performance,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.accessibility.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility,
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(report, '$.categories.pwa.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_pwa
FROM
`httparchive.lighthouse.2020_08_01_mobile`
JOIN
`httparchive.technologies.2020_08_01_mobile`
USING
(url)
WHERE
category = 'CMS'
GROUP BY
cms
ORDER BY
freq DESC
Loading

0 comments on commit d4286b1

Please sign in to comment.