From 643fd162e36ae58085b92ff4c0fec0bafe5a46a7 Mon Sep 17 00:00:00 2001 From: Maciej Mionskowski Date: Thu, 19 Oct 2023 20:16:20 +0000 Subject: [PATCH] html: fix SOLIDUS '/' handling in attribute parsing Calling the Tokenizer with HTML elements containing SOLIDUS (/) character in the attribute name results in incorrect tokenization. This is due to violation of the following rule transitions in the WHATWG spec: - https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state, where we are not reconsuming the character if '/' is encountered - https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state, where we are not switching to self closing state Fixes golang/go#63402 Change-Id: I90d998dd8decde877bd63aa664f3657aa6161024 GitHub-Last-Rev: 3546db808c5fbf46ea25a10cdadb2802f763b6de GitHub-Pull-Request: golang/net#195 Reviewed-on: https://go-review.googlesource.com/c/net/+/533518 LUCI-TryBot-Result: Go LUCI Auto-Submit: Michael Pratt Reviewed-by: Roland Shoemaker Reviewed-by: David Chase --- html/token.go | 12 ++++++++---- html/token_test.go | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/html/token.go b/html/token.go index de67f938a..3c57880d6 100644 --- a/html/token.go +++ b/html/token.go @@ -910,9 +910,6 @@ func (z *Tokenizer) readTagAttrKey() { return } switch c { - case ' ', '\n', '\r', '\t', '\f', '/': - z.pendingAttr[0].end = z.raw.end - 1 - return case '=': if z.pendingAttr[0].start+1 == z.raw.end { // WHATWG 13.2.5.32, if we see an equals sign before the attribute name @@ -920,7 +917,9 @@ func (z *Tokenizer) readTagAttrKey() { continue } fallthrough - case '>': + case ' ', '\n', '\r', '\t', '\f', '/', '>': + // WHATWG 13.2.5.33 Attribute name state + // We need to reconsume the char in the after attribute name state to support the / character z.raw.end-- z.pendingAttr[0].end = z.raw.end return @@ -939,6 +938,11 @@ func (z *Tokenizer) readTagAttrVal() { if z.err != nil { return } + if c == '/' { + // WHATWG 13.2.5.34 After attribute name state + // U+002F SOLIDUS (/) - Switch to the self-closing start tag state. + return + } if c != '=' { z.raw.end-- return diff --git a/html/token_test.go b/html/token_test.go index b2383a951..8b0d5aab6 100644 --- a/html/token_test.go +++ b/html/token_test.go @@ -601,6 +601,21 @@ var tokenTests = []tokenTest{ `

`, `

`, }, + { + "forward slash before attribute name", + `

`, + `

`, + }, + { + "forward slash before attribute name with spaces around", + `

`, + `

`, + }, + { + "forward slash after attribute name followed by a character", + `

`, + `

`, + }, } func TestTokenizer(t *testing.T) {