Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fonts 2021 sql #2338

Merged
merged 34 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2911d07
Create 05_01.web_fonts_usage.sql
konfirmed Sep 20, 2021
bbd4e69
Create 05_02.web_fonts_usage_by_country.sql
konfirmed Sep 20, 2021
5e0c160
Rename 05_01.web_fonts_usage.sql to web_fonts_usage.sql
konfirmed Sep 20, 2021
7ffbc52
Rename 05_02.web_fonts_usage_by_country.sql to .web_fonts_usage_by_co…
konfirmed Sep 20, 2021
b413e65
Rename .web_fonts_usage_by_country.sql to web_fonts_usage_by_country.sql
konfirmed Sep 20, 2021
eb60449
Merge branch 'HTTPArchive:main' into fonts-2021-sql
konfirmed Sep 23, 2021
cc917ac
Update web_fonts_usage_by_country.sql
konfirmed Sep 29, 2021
6a02e9d
Merge branch 'HTTPArchive:main' into fonts-2021-sql
konfirmed Sep 29, 2021
960d8f0
Create fonts_format.sql
konfirmed Sep 29, 2021
6b5ed72
Merge branch 'HTTPArchive:main' into fonts-2021-sql
konfirmed Oct 2, 2021
4b928fe
Create impact_on_core_web_vitals.sql
konfirmed Oct 2, 2021
20015a1
Update and rename impact_on_core_web_vitals.sql to self_hosted_vs_hos…
konfirmed Oct 2, 2021
302f670
Create web_font_usage_breakdown_with_fcp_lcp.sql
konfirmed Oct 2, 2021
bf1d16f
Delete self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
955dd98
Create popular_typeface.sql
konfirmed Oct 2, 2021
046805b
Create variable_font_adoption.sql
konfirmed Oct 2, 2021
3c0fa1d
Create color_fonts.sql
konfirmed Oct 2, 2021
8a8eaed
Create variable_font_axis.sql
konfirmed Oct 2, 2021
f2b5250
Rename variable_font_axis.sql to variable_font_axes_used.sql
konfirmed Oct 2, 2021
d10f9ea
Create self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
dee4c99
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
7b18062
Create font_display.sql
konfirmed Oct 2, 2021
b6b636e
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
7aa5cf9
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
cad9390
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
c44f740
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
e149bcf
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
d121924
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
f294f02
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
c003453
Update self_hosted_vs_hosted_with_fcp_lcp.sql
konfirmed Oct 2, 2021
60eb996
Update sql/2021/fonts/fonts_format.sql
konfirmed Oct 4, 2021
418ade3
Update sql/2021/fonts/fonts_format.sql
konfirmed Oct 4, 2021
da4fa0a
Update sql/2021/fonts/variable_font_axes_used.sql
konfirmed Oct 4, 2021
0821e31
Merge branch 'HTTPArchive:main' into fonts-2021-sql
konfirmed Oct 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions sql/2021/fonts/color_fonts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#standardSQL
#color_fonts
SELECT
client,
format,
COUNT(DISTINCT page) AS pages_color,
total_page,
COUNT(DISTINCT page) / total_page AS pct_color
FROM (
SELECT
client,
page,
format,
payload
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_page
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX)
USING
(client),
# Color fonts have any of sbix, cbdt, svg or colr tables.
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)(sbix|CBDT|SVG|COLR)')) AS format
GROUP BY
client,
total_page,
format
ORDER BY
pages_color DESC
70 changes: 70 additions & 0 deletions sql/2021/fonts/font_display.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#standardSQL
#font_display
CREATE TEMPORARY FUNCTION getFontDisplay(css STRING)
RETURNS ARRAY < STRING > LANGUAGE js AS '''
try {
var reduceValues = (values, rule) => {
if ('rules' in rule) {
return rule.rules.reduce(reduceValues, values);
}
if (!('declarations' in rule)) {
return values;
}
if (rule.type != 'font-face') {
return values;
}
rule.declarations.forEach(d => {
if (d.property.toLowerCase() == 'font-display') {
values.push(d.value);
}
});
return values;
};
var $ = JSON.parse(css);
return $.stylesheet.rules.reduce(reduceValues, []);
} catch (e) {
return [null];
}
''';

SELECT
client,
font_display,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct_display,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT DISTINCT
client,
page,
font_display
FROM
`httparchive.almanac.parsed_css`
LEFT JOIN
UNNEST(getFontDisplay(css)) AS font_display
WHERE
date = '2021-07-01')
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload,
"$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload,
"$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX,
url,
payload)
USING
(client,
page)
GROUP BY
client,
font_display
ORDER BY
pages DESC
20 changes: 20 additions & 0 deletions sql/2021/fonts/fonts_format.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#standardSQL
#font_formats
SELECT
client,
LOWER(IFNULL(REGEXP_EXTRACT(mimeType, '/(?:x-)?(?:font-)?(.*)'), ext)) AS mime_type,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
mimeType != ''
GROUP BY
client,
mime_type
ORDER BY
client,
pct DESC
48 changes: 48 additions & 0 deletions sql/2021/fonts/popular_typeface.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#standardSQL
#popular_typeface
CREATE TEMPORARY FUNCTION getFontFamilies(css STRING)
RETURNS ARRAY <STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(css);
return $.stylesheet.rules.filter(rule => rule.type == 'font-face').map(rule => {
var family = rule.declarations && rule.declarations.find(d => d.property == 'font-family');
return family && family.value.replace(/[\'"]/g, '');
}).filter(family => family);
} catch (e) {
return [];
}
''';

SELECT
client,
font_family,
pages,
total,
pages / total AS pct
FROM (
SELECT
client,
font_family,
COUNT(DISTINCT page) AS pages
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getFontFamilies(css)) AS font_family
WHERE
date = '2021-07-01'
GROUP BY
client,
font_family)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
client)
USING
(client)
WHERE
pages / total >= 0.004
ORDER BY
pct DESC
42 changes: 42 additions & 0 deletions sql/2021/fonts/self_hosted_vs_hosted_with_fcp_lcp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#standardSQL #self_hosted_vs_hosted_with_fcp
SELECT
client,
CASE
WHEN pct_self_hosted_hosted = 1 THEN 'self-hosted'
WHEN pct_self_hosted_hosted = 0 THEN 'external'
ELSE 'both'
END AS font_host,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT
client,
page,
COUNTIF(NET.HOST(page) = NET.HOST(url)) / COUNT(0) AS pct_self_hosted_hosted
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font'
GROUP BY
client,
page)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`)
USING
(client, page)
GROUP BY
client,
font_host
ORDER BY
font_host,
client
53 changes: 53 additions & 0 deletions sql/2021/fonts/variable_font_adoption.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#standardSQL
#variable_font_with_fcp
CREATE TEMP FUNCTION getName(font_details STRING) RETURNS STRING LANGUAGE js AS '''
try {
const metadata = RegExp('(not to be used for anything other than web font use!|web use only|web_use_only|:|;|^google$|copyright|©|(c)|rights reserved|published by|generated by|property of|trademark|version|v\\d+|release|untitled|^bold$|^light$|^semibold$|^defaults$|^normal$|^regular$|^[a-f0-9]+$|Vernon Adams|Jan Kovarik|Jan Kovarik|Mark Simonson|Paul D. Hunt|Kai Bernau|Kris Sowersby|Joshua Darden|Jos Buivenga|Yugo Kajiwara|Moslem Ebrahimi|Hadrien Boyer|Russell Benson|Ryan Martinson|Joen Asmussen|Olivier Gourvat|Hannes von Doehren|René Bieder|House Industries|GoDaddy|TypeSquare|Dalton Maag Ltd|_null_name_substitute_|^font$|Moveable Type)', 'i')
return Object.values(JSON.parse(font_details).names).find(name => {
name = name.trim();
return name.length > 2 &&
!metadata.test(name) &&
isNaN(Number(name));
});
} catch (e) {
return null;
}
''';
SELECT
client,
name,
COUNT(DISTINCT page) AS freq_vf,
total_page,
COUNT(DISTINCT page) / total_page AS pct_vf
FROM (
SELECT
client,
page,
getName(JSON_EXTRACT(payload, '$._font_details')) AS name
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
REGEXP_CONTAINS(JSON_EXTRACT(payload, '$._font_details.table_sizes'), '(?i)gvar'))
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
COUNT(0) AS total_page
FROM
`httparchive.pages.2021_07_01_*`
GROUP BY
_TABLE_SUFFIX, url, payload)
USING
(client, page)
WHERE
name IS NOT NULL
GROUP BY
client,
name,
total_page
HAVING
freq_vf > 100
ORDER BY
freq_vf DESC
41 changes: 41 additions & 0 deletions sql/2021/fonts/variable_font_axes_used.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL
#VF_axis_value
CREATE TEMPORARY FUNCTION getFontVariationSettings(css STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var reduceValues = (values, rule) => {
if ('rules' in rule) {
return rule.rules.reduce(reduceValues, values);
}
if (!('declarations' in rule)) {
return values;
}
return values.concat(rule.declarations.filter(d => d.property.toLowerCase() == 'font-variation-settings').map(d => d.value));
};
var $ = JSON.parse(css);
return $.stylesheet.rules.reduce(reduceValues, []);
} catch (e) {
return [];
}
''';
SELECT
client,
REGEXP_EXTRACT(LOWER(values), '[\'"]([\\w]{4})[\'"]') AS axis,
CAST(REGEXP_EXTRACT(value, '\\d+') AS NUMERIC) AS num_axis,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getFontVariationSettings(css)) AS value,
UNNEST(SPLIT(value, ',')) AS values
WHERE
date = '2021-07-01'
GROUP BY
client,
axis,
num_axis
HAVING
axis IS NOT NULL
ORDER BY
pages DESC
41 changes: 41 additions & 0 deletions sql/2021/fonts/web_font_usage_breakdown_with_fcp_lcp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#standardSQL
#web_font_usage_breakdown_with_fcp_lcp
SELECT
client,
NET.HOST(url) AS host,
COUNT(DISTINCT page) AS pages,
SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS total,
COUNT(DISTINCT page) / SUM(COUNT(DISTINCT page)) OVER (PARTITION BY client) AS pct,
APPROX_QUANTILES(fcp, 1000)[OFFSET(500)] AS median_fcp,
APPROX_QUANTILES(lcp, 1000)[OFFSET(500)] AS median_lcp
FROM (
SELECT
client,
page,
url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'font' AND
NET.HOST(page) != NET.HOST(url)
GROUP BY
client, url,
page)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.firstContentfulPaint']") AS INT64) AS fcp,
CAST(JSON_EXTRACT_SCALAR(payload, "$['_chromeUserTiming.LargestContentfulPaint']") AS INT64) AS lcp
FROM
`httparchive.pages.2021_07_01_*`)
USING
(client, page)
GROUP BY
client,
host
HAVING
pages >= 1000
ORDER BY
pct DESC
19 changes: 19 additions & 0 deletions sql/2021/fonts/web_fonts_usage.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#standardSQL
#web_fonts_usage
SELECT
IF(ENDS_WITH(_TABLE_SUFFIX, 'desktop'), 'desktop', 'mobile') AS client,
REGEXP_REPLACE(_TABLE_SUFFIX, r'(\d+)_(\d+)_(\d+).*', r'\1-\2-\3') AS date,
COUNTIF(reqFont > 0) AS freq_fonts,
COUNT(0) AS total,
COUNTIF(reqFont > 0) / COUNT(0) AS pct_fonts
FROM
`httparchive.summary_pages.*`
WHERE
reqFont IS NOT NULL AND
bytesFont IS NOT NULL
GROUP BY
client,
date
ORDER BY
date DESC,
client
Loading