From 3214e03f8dc135070c00ebf77e7389dd45ab9d35 Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Tue, 27 Sep 2022 18:41:55 +0900 Subject: [PATCH 1/3] Move the self-closing-tag warning to the TreeBuilder code --- .../impl/ErrorReportingTokenizer.java | 12 ---------- .../validator/htmlparser/impl/Tokenizer.java | 4 ---- .../htmlparser/impl/TreeBuilder.java | 22 +++++++++++++++++++ .../validator/htmlparser/sax/HtmlParser.java | 1 + 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index fcd2128e..77dc9090 100644 --- a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -710,18 +710,6 @@ private boolean isAstralPrivateUse(int c) { note("xhtml1", "Unquoted attribute value."); } - @Override - protected void noteSelfClosingTag() throws SAXException { - note("html-strict", - "Self-closing tag syntax in text/html documents is widely" - + " discouraged; it’s unnecessary and interacts badly" - + " with other HTML features (e.g., unquoted attribute" - + " values). If you’re using a tool that injects" - + " self-closing tag syntax into all void elements," - + " without any option to prevent it from doing so," - + " then consider switching to a different tool."); - } - /** * Sets the transitionHandler. * diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/src/nu/validator/htmlparser/impl/Tokenizer.java index 72d42af9..a197757a 100755 --- a/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -2292,7 +2292,6 @@ private void ensureBufferSpace(int inputLength) throws SAXException { * flag of the current tag token. Emit the current * tag token. */ - noteSelfClosingTag(); state = transition(state, emitCurrentTagToken(true, pos), reconsume, pos); if (shouldSuspend) { break stateloop; @@ -7588,9 +7587,6 @@ protected void noteAttributeWithoutValue() throws SAXException { protected void noteUnquotedAttributeValue() throws SAXException { } - protected void noteSelfClosingTag() throws SAXException { - } - /** * Sets the encodingDeclarationHandler. * diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/src/nu/validator/htmlparser/impl/TreeBuilder.java index 1f437bf7..cb2a5e59 100644 --- a/src/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -438,6 +438,8 @@ public abstract class TreeBuilder implements TokenHandler, private boolean reportingDoctype = true; + private HashMap errorProfileMap = null; + private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; private final Map idLocations = new HashMap(); @@ -1458,6 +1460,17 @@ public final void startTag(ElementName elementName, flushCharacters(); // [NOCPP[ + if (contextNamespace == "http://www.w3.org/1999/xhtml" && + selfClosing && errorProfileMap != null && + errorProfileMap.get("html-strict") != null) { + warn("Self-closing tag syntax in text/html documents is widely" + + " discouraged; it’s unnecessary and interacts badly" + + " with other HTML features (e.g., unquoted attribute" + + " values). If you’re using a tool that injects" + + " self-closing tag syntax into all void elements," + + " without any option to prevent it from doing so," + + " then consider switching to a different tool."); + } if (errorHandler != null) { // ID uniqueness @IdType String id = attributes.getId(); @@ -5818,6 +5831,15 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { this.namePolicy = namePolicy; } + /** + * Sets the errorProfile. + * + * @param errorProfile + */ + public void setErrorProfile(HashMap errorProfileMap) { + this.errorProfileMap = errorProfileMap; + } + /** * Sets the documentModeHandler. * diff --git a/src/nu/validator/htmlparser/sax/HtmlParser.java b/src/nu/validator/htmlparser/sax/HtmlParser.java index 8ff8167d..b778f000 100644 --- a/src/nu/validator/htmlparser/sax/HtmlParser.java +++ b/src/nu/validator/htmlparser/sax/HtmlParser.java @@ -199,6 +199,7 @@ private void lazyInit() { this.treeBuilder.setScriptingEnabled(scriptingEnabled); this.treeBuilder.setReportingDoctype(reportingDoctype); this.treeBuilder.setNamePolicy(namePolicy); + this.treeBuilder.setErrorProfile(errorProfileMap); if (saxStreamer != null) { saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() : contentHandler); From 7c849481ce8c3a28f0b207959ff34faed13cce2b Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Tue, 27 Sep 2022 19:51:27 +0900 Subject: [PATCH 2/3] Re-work self-closing-tag checking to avoid redundant error messages --- .../impl/ErrorReportingTokenizer.java | 9 +++ .../validator/htmlparser/impl/Tokenizer.java | 11 +++ .../htmlparser/impl/TreeBuilder.java | 73 +++++++++++++------ .../validator/htmlparser/sax/HtmlParser.java | 1 - 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java b/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java index 77dc9090..6c8e7617 100644 --- a/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java +++ b/src/nu/validator/htmlparser/impl/ErrorReportingTokenizer.java @@ -134,6 +134,15 @@ public void setErrorProfile(HashMap errorProfileMap) { this.errorProfileMap = errorProfileMap; } + /** + * Gets the errorProfile. + * + * @param errorProfile + */ + @Override public HashMap getErrorProfile() { + return errorProfileMap; + } + /** * Reports on an event based on profile selected. * diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/src/nu/validator/htmlparser/impl/Tokenizer.java index a197757a..1fa44b87 100755 --- a/src/nu/validator/htmlparser/impl/Tokenizer.java +++ b/src/nu/validator/htmlparser/impl/Tokenizer.java @@ -35,6 +35,8 @@ package nu.validator.htmlparser.impl; +import java.util.HashMap; + import org.xml.sax.ErrorHandler; import org.xml.sax.Locator; import org.xml.sax.ext.Locator2; @@ -686,6 +688,15 @@ public ErrorHandler getErrorHandler() { return this.errorHandler; } + /** + * Gets the errorProfile. + * + * @param errorProfile + */ + public HashMap getErrorProfile() { + return null; + } + /** * Sets the commentPolicy. * diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/src/nu/validator/htmlparser/impl/TreeBuilder.java index cb2a5e59..14b2a14f 100644 --- a/src/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -438,8 +438,6 @@ public abstract class TreeBuilder implements TokenHandler, private boolean reportingDoctype = true; - private HashMap errorProfileMap = null; - private XmlViolationPolicy namePolicy = XmlViolationPolicy.ALTER_INFOSET; private final Map idLocations = new HashMap(); @@ -1460,17 +1458,8 @@ public final void startTag(ElementName elementName, flushCharacters(); // [NOCPP[ - if (contextNamespace == "http://www.w3.org/1999/xhtml" && - selfClosing && errorProfileMap != null && - errorProfileMap.get("html-strict") != null) { - warn("Self-closing tag syntax in text/html documents is widely" - + " discouraged; it’s unnecessary and interacts badly" - + " with other HTML features (e.g., unquoted attribute" - + " values). If you’re using a tool that injects" - + " self-closing tag syntax into all void elements," - + " without any option to prevent it from doing so," - + " then consider switching to a different tool."); - } + boolean wasSelfClosing = selfClosing; + boolean voidElement = false; if (errorHandler != null) { // ID uniqueness @IdType String id = attributes.getId(); @@ -1593,6 +1582,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case TITLE: @@ -1605,6 +1597,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case SCRIPT: @@ -1791,6 +1786,9 @@ public final void startTag(ElementName elementName, attributes, formPointer); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case FORM: @@ -2137,6 +2135,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case HR: @@ -2145,6 +2146,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case IMAGE: @@ -2158,6 +2162,9 @@ public final void startTag(ElementName elementName, elementName, attributes, formPointer); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case TEXTAREA: @@ -2337,6 +2344,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case META: @@ -2404,6 +2414,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case META: @@ -2412,6 +2425,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case STYLE: @@ -2451,6 +2467,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; case TEMPLATE: @@ -2585,6 +2604,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] attributes = null; // CPP break starttagloop; default: @@ -2758,6 +2780,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] pop(); // head attributes = null; // CPP break starttagloop; @@ -2769,6 +2794,9 @@ public final void startTag(ElementName elementName, elementName, attributes); selfClosing = false; + // [NOCPP[ + voidElement = true; + // ]NOCPP] pop(); // head attributes = null; // CPP break starttagloop; @@ -2857,6 +2885,16 @@ public final void startTag(ElementName elementName, } if (selfClosing) { errSelfClosing(); + } else if (wasSelfClosing && voidElement + && tokenizer.getErrorProfile() != null + && tokenizer.getErrorProfile().get("html-strict") != null) { + warn("Self-closing tag syntax in text/html documents is widely" + + " discouraged; it’s unnecessary and interacts badly" + + " with other HTML features (e.g., unquoted attribute" + + " values). If you’re using a tool that injects" + + " self-closing tag syntax into all void elements," + + " without any option to prevent it from doing so," + + " then consider switching to a different tool."); } // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) { // CPPONLY: Portability.delete(attributes); @@ -5831,15 +5869,6 @@ public void setNamePolicy(XmlViolationPolicy namePolicy) { this.namePolicy = namePolicy; } - /** - * Sets the errorProfile. - * - * @param errorProfile - */ - public void setErrorProfile(HashMap errorProfileMap) { - this.errorProfileMap = errorProfileMap; - } - /** * Sets the documentModeHandler. * diff --git a/src/nu/validator/htmlparser/sax/HtmlParser.java b/src/nu/validator/htmlparser/sax/HtmlParser.java index b778f000..8ff8167d 100644 --- a/src/nu/validator/htmlparser/sax/HtmlParser.java +++ b/src/nu/validator/htmlparser/sax/HtmlParser.java @@ -199,7 +199,6 @@ private void lazyInit() { this.treeBuilder.setScriptingEnabled(scriptingEnabled); this.treeBuilder.setReportingDoctype(reportingDoctype); this.treeBuilder.setNamePolicy(namePolicy); - this.treeBuilder.setErrorProfile(errorProfileMap); if (saxStreamer != null) { saxStreamer.setContentHandler(contentHandler == null ? new DefaultHandler() : contentHandler); From 2e3445a7eb743d6b1203081b1dab5642bde2375c Mon Sep 17 00:00:00 2001 From: "Michael[tm] Smith" Date: Tue, 27 Sep 2022 20:03:11 +0900 Subject: [PATCH 3/3] Put self-closing-tag else into a NOCPP block --- src/nu/validator/htmlparser/impl/TreeBuilder.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/src/nu/validator/htmlparser/impl/TreeBuilder.java index 14b2a14f..967b53f6 100644 --- a/src/nu/validator/htmlparser/impl/TreeBuilder.java +++ b/src/nu/validator/htmlparser/impl/TreeBuilder.java @@ -2885,6 +2885,7 @@ public final void startTag(ElementName elementName, } if (selfClosing) { errSelfClosing(); + // [NOCPP[ } else if (wasSelfClosing && voidElement && tokenizer.getErrorProfile() != null && tokenizer.getErrorProfile().get("html-strict") != null) { @@ -2895,6 +2896,7 @@ public final void startTag(ElementName elementName, + " self-closing tag syntax into all void elements," + " without any option to prevent it from doing so," + " then consider switching to a different tool."); + // ]NOCPP] } // CPPONLY: if (mBuilder == null && attributes != HtmlAttributes.EMPTY_ATTRIBUTES) { // CPPONLY: Portability.delete(attributes);