-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SSR: Stringify VNodes (II) #1344
Changes from 85 commits
145e331
3c01c22
169a3e2
9eb65ce
47789d9
bf59552
e17cf3c
e32c9d6
d46372e
c7d2615
051917c
de7a3b0
08bfaa5
decfd19
abb01d7
772211e
3291ec1
0f3b7f0
961a507
7373898
98a2b83
19c7f58
cea6a7b
aad03df
2da0666
9deee41
e289b08
2e65887
f5f7872
bd555c9
bd2c5ae
931550b
bd4935a
1ba0c88
ee9d8c9
e9ce780
cbff3a6
6fbce93
ee0e1f4
cd171d6
c4175e1
1f7c915
f159803
ce8523c
5f78a32
459aa6d
998403b
fb275b4
53e23c8
5734cf7
3ccf96e
68fac3d
959f36b
98c867a
324a23a
bd0091a
1b2602b
a91b402
c53419d
12e28e6
e2f1e5d
dc0b2df
32c6032
131b60a
52f7511
329c14c
99057bf
f5bc8b4
858f621
7804cff
34e286a
d5c3c7b
acac47d
e5fe5c1
5c2a60f
12dd296
2d3d032
93e90f6
d21a7db
a783503
215119c
dbcfe49
ac5e6f2
3de8c2f
7d9e907
b79f55b
c7986c9
b5d87b3
3358255
e31ac77
04dbbad
ffca187
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
//! This module contains utilities for parsing or validating strings relating | ||
//! to tags. | ||
|
||
use lazy_static::lazy_static; | ||
use std::collections::HashSet; | ||
use std::iter::FromIterator; | ||
|
||
lazy_static! { | ||
static ref DISALLOWED_CUSTOM_ELEMENT_TAGS: HashSet<&'static str> = HashSet::from_iter( | ||
vec![ | ||
"annotation-xml", | ||
"color-profile", | ||
"font-face", | ||
"font-face-src", | ||
"font-face-uri", | ||
"font-face-format", | ||
"font-face-name", | ||
"missing-glyph", | ||
] | ||
.iter() | ||
.map(|tag| tag.clone()) | ||
); | ||
} | ||
|
||
/// Returns true when the character provided is a "control" as defined | ||
/// in [the WhatWG spec](https://infra.spec.whatwg.org/#control) | ||
fn is_control(c: char) -> bool { | ||
match c { | ||
'\u{007F}'..='\u{009F}' => true, | ||
_ => is_c0_control(c), | ||
} | ||
} | ||
|
||
/// Returns true when the character provided is a "c0 control" as defined | ||
/// in [the WhatWG spec](https://infra.spec.whatwg.org/#c0-control) | ||
fn is_c0_control(c: char) -> bool { | ||
match c { | ||
'\u{0000}'..='\u{001F}' => true, | ||
_ => false, | ||
} | ||
} | ||
|
||
/// Returns true when the string provided is a "noncharacter" as defined | ||
/// in [the WhatWG spec](https://infra.spec.whatwg.org/#noncharacter) | ||
fn is_noncharacter(c: char) -> bool { | ||
match c { | ||
'\u{FDD0}'..='\u{FDEF}' => true, | ||
'\u{FFFE}' | '\u{FFFF}' | '\u{1FFFE}' | '\u{1FFFF}' | '\u{2FFFE}' | '\u{2FFFF}' | ||
| '\u{3FFFE}' | '\u{3FFFF}' | '\u{4FFFE}' | '\u{4FFFF}' | '\u{5FFFE}' | '\u{5FFFF}' | ||
| '\u{6FFFE}' | '\u{6FFFF}' | '\u{7FFFE}' | '\u{7FFFF}' | '\u{8FFFE}' | '\u{8FFFF}' | ||
| '\u{9FFFE}' | '\u{9FFFF}' | '\u{AFFFE}' | '\u{AFFFF}' | '\u{BFFFE}' | '\u{BFFFF}' | ||
| '\u{CFFFE}' | '\u{CFFFF}' | '\u{DFFFE}' | '\u{DFFFF}' | '\u{EFFFE}' | '\u{EFFFF}' | ||
| '\u{FFFFE}' | '\u{FFFFF}' | '\u{10FFFE}' | '\u{10FFFF}' => true, | ||
_ => false, | ||
} | ||
} | ||
|
||
/// Returns true when the string provided is a valid "attribute name" as defined | ||
/// in [the WhatWG spec](https://html.spec.whatwg.org/multipage/syntax.html#syntax-attribute-name) | ||
pub fn is_valid_html_attribute_name(attr: &str) -> bool { | ||
for c in attr.chars() { | ||
if is_noncharacter(c) | ||
|| is_control(c) | ||
|| c == '\u{0020}' | ||
|| c == '\u{0022}' | ||
|| c == '\u{0027}' | ||
|| c == '\u{003E}' | ||
|| c == '\u{002F}' | ||
|| c == '\u{003D}' | ||
{ | ||
return false; | ||
} | ||
} | ||
true | ||
} | ||
|
||
/// Returns true when the character provided is a valid PCENChar as defined | ||
/// in [the WhatWG spec](https://html.spec.whatwg.org/multipage/custom-elements.html#prod-pcenchar) | ||
fn is_pcen_char(c: char) -> bool { | ||
match c { | ||
'-' | '.' | '0'..='9' | 'a'..='z' | '_' => true, | ||
'\u{B7}' => true, | ||
'\u{C0}'..='\u{D6}' => true, | ||
'\u{D8}'..='\u{F6}' => true, | ||
'\u{F8}'..='\u{37D}' => true, | ||
'\u{37F}'..='\u{1FFF}' => true, | ||
'\u{200C}'..='\u{200D}' => true, | ||
'\u{203F}'..='\u{2040}' => true, | ||
'\u{2070}'..='\u{218F}' => true, | ||
'\u{2C00}'..='\u{2FEF}' => true, | ||
'\u{3001}'..='\u{D7FF}' => true, | ||
'\u{F900}'..='\u{FDCF}' => true, | ||
'\u{FDF0}'..='\u{FFFD}' => true, | ||
'\u{10000}'..='\u{EFFFF}' => true, | ||
_ => false, | ||
} | ||
} | ||
|
||
/// Returns true when the tag name provided would be a valid "custom element" per | ||
/// [the WhatWG spec](https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name) | ||
fn is_valid_html_custom_element_name(tag: &str) -> bool { | ||
if DISALLOWED_CUSTOM_ELEMENT_TAGS.contains(&tag) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need this check. Yes, these names must not be used for custom elements but that's only because they are already reserved for non-custom tags. This function isn't public and is only exposed through We can add a note to the documentation to make it clear that this function doesn't handle disallowed custom element tags. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the public interface is agnostic to these tags, I see no reason to remove the code. The function is meant to represent the way WhatWG defines a custom tag; to stray outside of that bounds would set a precedent in the codebase for not matching the spec exactly, which is a fraught path to go down when writing code that's meant to be secure. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While I would usually agree, in this case it makes absolutely no sense to incur the performance and space penalty. This is just dead code that Rust can't eliminate. |
||
return false; | ||
} | ||
|
||
let mut chars = tag.chars(); | ||
let first_char = chars.next(); | ||
|
||
match first_char { | ||
None => false, | ||
Some(first_char) => { | ||
// must begin with [a-z] | ||
if first_char < 'a' || first_char > 'z' { | ||
return false; | ||
} | ||
|
||
let mut seen_hyphen = false; | ||
for c in chars { | ||
if c == '-' { | ||
seen_hyphen = true | ||
} | ||
|
||
// all characters must be valid PCENChar's | ||
if !is_pcen_char(c) { | ||
return false; | ||
} | ||
} | ||
|
||
// must contain at least one hyphen | ||
seen_hyphen | ||
} | ||
} | ||
} | ||
|
||
/// Returns true when the tag name provided looks like a valid non-custom HTML element or valid SVG element. | ||
/// There's no official spec here, it's just arbitrary. | ||
fn is_valid_html_standard_ish_element_name(tag: &str) -> bool { | ||
philip-peterson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// must contain at least one character | ||
if tag.is_empty() { | ||
return false; | ||
} | ||
|
||
let mut saw_non_hyphen = false; | ||
for c in tag.chars() { | ||
match c { | ||
'a'..='z' |'A'..='Z' | '0'..='9' => { | ||
saw_non_hyphen = true | ||
} | ||
'-' => {} | ||
_ => { | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
saw_non_hyphen | ||
} | ||
|
||
/// Returns true when you could validly construct a tag using this name in an HTML document | ||
pub fn is_valid_sgml_tag(tag: &str) -> bool { | ||
is_valid_html_standard_ish_element_name(tag) || is_valid_html_custom_element_name(tag) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[cfg(feature = "wasm_test")] | ||
use wasm_bindgen_test::{wasm_bindgen_test as test, wasm_bindgen_test_configure}; | ||
|
||
#[cfg(feature = "wasm_test")] | ||
wasm_bindgen_test_configure!(run_in_browser); | ||
|
||
#[test] | ||
fn valid_custom_element() { | ||
assert_eq!(is_valid_html_custom_element_name("foo-bar"), true); | ||
assert_eq!(is_valid_html_custom_element_name("foo-"), true); | ||
assert_eq!(is_valid_html_custom_element_name("bar-baz"), true); | ||
} | ||
|
||
#[test] | ||
fn invalid_custom_element() { | ||
assert_eq!(is_valid_html_custom_element_name("foobar"), false); | ||
assert_eq!(is_valid_html_custom_element_name("-bar"), false); | ||
assert_eq!(is_valid_html_custom_element_name("foo bar"), false); | ||
assert_eq!(is_valid_html_custom_element_name(""), false); | ||
assert_eq!(is_valid_html_custom_element_name("foo\nbar"), false); | ||
assert_eq!(is_valid_html_custom_element_name("-"), false); | ||
} | ||
|
||
#[test] | ||
fn valid_html_element() { | ||
assert_eq!(is_valid_html_standard_ish_element_name("section"), true); | ||
assert_eq!(is_valid_html_standard_ish_element_name("h2"), true); | ||
assert_eq!(is_valid_html_standard_ish_element_name("applet"), true); | ||
assert_eq!(is_valid_html_standard_ish_element_name("appLET"), true); | ||
assert_eq!(is_valid_html_standard_ish_element_name("aPPlet"), true); | ||
assert_eq!(is_valid_html_standard_ish_element_name("foo-bar"), true); | ||
} | ||
|
||
#[test] | ||
fn invalid_html_element() { | ||
assert_eq!(is_valid_html_standard_ish_element_name(" foo"), false); | ||
assert_eq!(is_valid_html_standard_ish_element_name("foo "), false); | ||
assert_eq!(is_valid_html_standard_ish_element_name("-"), false); | ||
assert_eq!(is_valid_html_standard_ish_element_name("!doctype"), false); | ||
} | ||
|
||
#[test] | ||
fn valid_html_attribute() { | ||
assert_eq!(is_valid_html_attribute_name("-foo-bar"), true); | ||
assert_eq!(is_valid_html_attribute_name("data-foobar"), true); | ||
assert_eq!(is_valid_html_attribute_name("foo<bar"), true); // shocking but true | ||
} | ||
|
||
#[test] | ||
fn invalid_html_attribute() { | ||
assert_eq!(is_valid_html_attribute_name("foo=bar"), false); | ||
assert_eq!(is_valid_html_attribute_name("\"foo\""), false); | ||
assert_eq!(is_valid_html_attribute_name("foo bar"), false); | ||
assert_eq!(is_valid_html_attribute_name("foo>bar"), false); | ||
} | ||
|
||
#[test] | ||
fn invalid_sgml_tag() { | ||
assert_eq!(is_valid_sgml_tag("f>bar"), false); | ||
assert_eq!(is_valid_sgml_tag("f<bar"), false); | ||
assert_eq!(is_valid_sgml_tag("/>"), false); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to
iter
andclone
here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please note that if you agree with my comment further down then this list will no longer be necessary.
At that point we can remove the
lazy_static
dependency.