Skip to content

Commit

Permalink
Change Markup queries to order by pct first (#2538)
Browse files Browse the repository at this point in the history
* Change Markup queries to order by pct first

* Missed one

* Add not sets
  • Loading branch information
tunetheweb authored Nov 17, 2021
1 parent cfbc0eb commit f8c97f5
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 10 deletions.
2 changes: 2 additions & 0 deletions sql/2021/markup/attributes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ SELECT
_TABLE_SUFFIX AS client,
almanac_attribute_info.name,
SUM(almanac_attribute_info.freq) AS freq, # total count from all pages
SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
SUM(almanac_attribute_info.freq) / SUM(SUM(almanac_attribute_info.freq)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct_ratio
FROM
`httparchive.pages.2021_07_01_*`,
Expand All @@ -30,6 +31,7 @@ GROUP BY
client,
almanac_attribute_info.name
ORDER BY
pct_ratio DESC,
client,
freq DESC
LIMIT 1000
7 changes: 4 additions & 3 deletions sql/2021/markup/buttons.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@ SELECT
_TABLE_SUFFIX AS client,
button_type_info.name AS button_type,
COUNTIF(button_type_info.freq > 0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNTIF(button_type_info.freq > 0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_page_with_button_type
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
COUNTIF(button_type_info.freq > 0) / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS pct_page_with_button_type
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(get_markup_buttons_info(JSON_EXTRACT_SCALAR(payload, '$._markup'))) AS button_type_info
GROUP BY
client,
button_type
ORDER BY
pct_page_with_button_type DESC,
client,
freq_page_with_button DESC
freq DESC
LIMIT 1000
1 change: 1 addition & 0 deletions sql/2021/markup/content_encoding.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ GROUP BY
mimeType,
content_encoding
ORDER BY
pct DESC,
client,
freq DESC
1 change: 1 addition & 0 deletions sql/2021/markup/data_attributes.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ GROUP BY
client,
almanac_attribute_info.name
ORDER BY
pct_ratio DESC,
client,
freq DESC
LIMIT 1000
1 change: 1 addition & 0 deletions sql/2021/markup/doctype.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ GROUP BY
client,
doctype
ORDER BY
pct DESC,
client,
freq DESC
LIMIT 100
1 change: 1 addition & 0 deletions sql/2021/markup/element_popularity.sql
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ GROUP BY
total,
element_type
ORDER BY
pct DESC,
client,
pages DESC
LIMIT 1000
1 change: 1 addition & 0 deletions sql/2021/markup/favicons.sql
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ GROUP BY
client,
image_type_extension
ORDER BY
pct DESC,
client,
freq DESC
LIMIT 1000
13 changes: 10 additions & 3 deletions sql/2021/markup/html_lang.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
CREATE TEMPORARY FUNCTION get_almanac_html_lang(almanac_string STRING)
RETURNS STRING LANGUAGE js AS '''
try {
var almanac = JSON.parse(almanac_string);
var almanac = JSON.parse(almanac_string);
if (Array.isArray(almanac) || typeof almanac != 'object') return '';
Expand All @@ -20,8 +20,14 @@ return '';

SELECT
client,
IF(IFNULL(TRIM(almanac_html_lang), '') = '', '(not set)', almanac_html_lang) AS html_lang_country,
IF(
IFNULL(TRIM(SUBSTR(almanac_html_lang, 0, LENGTH(almanac_html_lang) - STRPOS(almanac_html_lang, '-'))), '') = '',
'(not set)',
SUBSTR(almanac_html_lang, 0, LENGTH(almanac_html_lang) - STRPOS(almanac_html_lang, '-'))
) AS html_lang,
COUNT(0) AS freq,
almanac_html_lang AS html_lang,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
(
Expand All @@ -33,7 +39,8 @@ FROM
)
GROUP BY
client,
html_lang
almanac_html_lang
ORDER BY
pct DESC,
client,
freq DESC
9 changes: 5 additions & 4 deletions sql/2021/markup/meta_nodes_name.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ try {
var $ = JSON.parse(payload);
var almanac = JSON.parse($._almanac);
return almanac['meta-nodes'].nodes.map(n => n.name || n.property);
} catch (e) {
} catch (e) {
return [];
}
''' ;

SELECT
_TABLE_SUFFIX AS client,
name,
IF(IFNULL(TRIM(name), '') = '', '(not set)', name) AS name,
COUNT(0) AS freq,
COUNT(0) / SUM(COUNT(0)) OVER () AS pct
FROM
Expand All @@ -27,6 +27,7 @@ GROUP BY
HAVING
freq > 1
ORDER BY
pct DESC,
client,
freq DESC
LIMIT 100
name
LIMIT 200
1 change: 1 addition & 0 deletions sql/2021/markup/meta_viewport.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ GROUP BY
client,
meta_viewport
ORDER BY
pct DESC,
client,
freq DESC
LIMIT 100
1 change: 1 addition & 0 deletions sql/2021/markup/top_elements.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ GROUP BY
client,
element_type_info.name
ORDER BY
pct DESC,
client,
freq DESC
LIMIT 1000

0 comments on commit f8c97f5

Please sign in to comment.