diff --git a/CHANGELOG.md b/CHANGELOG.md
index db09299..cf29ce4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
+### Added
+- Support for parsing `` exactly to close the tag
+ -- * UNLESS it is contained in a JavaScript string or comment
+ --
+ -- In essence, we provide partial JavaScript parser here
+ Parser.succeed (Element name attributes)
+ |. Parser.chompIf ((==) '>')
+ |= consumeJavaScriptUntilClosingTag
+
else
Parser.succeed (Element name attributes)
|. Parser.chompIf ((==) '>')
@@ -267,6 +278,103 @@ element =
)
+consumeJavaScriptUntilClosingTag : Parser (List Node)
+consumeJavaScriptUntilClosingTag =
+ Parser.loop [] <|
+ \acc ->
+ let
+ accumulate newNode =
+ Parser.Loop <|
+ case ( acc, newNode ) of
+ ( [], first ) ->
+ [ first ]
+
+ ( (Text accChunk) :: tail, Text newChunk ) ->
+ -- Merge top-most text node unless HTML comment nodes are interleaved
+ Text (accChunk ++ newChunk) :: tail
+
+ ( nonTextNode :: tail, _ ) ->
+ newNode :: nonTextNode :: tail
+ in
+ Parser.oneOf
+ [ -- HTML comments are, albeit considered a bad practice recently,
+ -- allowed inside "
+ )
+ )
+ |. Parser.chompWhile isSpaceCharacter
+ |. Parser.token ">"
+
+
+javaScriptStringLike : Char -> Parser String
+javaScriptStringLike terminatorChar =
+ let
+ terminatorStr =
+ String.fromChar terminatorChar
+ in
+ Parser.succeed identity
+ |. Parser.token terminatorStr
+ |= Parser.loop "" (stringHelp terminatorChar terminatorStr)
+ -- Restoring original shape
+ |> Parser.map (\chunk -> terminatorStr ++ chunk ++ terminatorStr)
+
+
+stringHelp : Char -> String -> String -> Parser (Parser.Step String String)
+stringHelp terminatorChar terminatorStr acc =
+ Parser.oneOf
+ [ Parser.succeed (\char -> Parser.Loop (acc ++ "\\" ++ char))
+ |. Parser.token "\\"
+ |= justOneChar
+ , Parser.token terminatorStr
+ |> Parser.map (\_ -> Parser.Done acc)
+ , chompOneOrMore (\char -> char /= '\\' && char /= terminatorChar)
+ |> Parser.getChompedString
+ |> Parser.map (\chunk -> Parser.Loop (acc ++ chunk))
+ ]
+
+
+justOneChar : Parser String
+justOneChar =
+ Parser.loop () <|
+ \_ ->
+ Parser.chompIf (always True)
+ |> Parser.getChompedString
+ |> Parser.map Parser.Done
+
+
tagName : Parser String
tagName =
Parser.getChompedString
diff --git a/tests/Main.elm b/tests/MainTests.elm
similarity index 72%
rename from tests/Main.elm
rename to tests/MainTests.elm
index 8ca06b5..8656a67 100644
--- a/tests/Main.elm
+++ b/tests/MainTests.elm
@@ -1,4 +1,4 @@
-module Main exposing (suite)
+module MainTests exposing (suite)
import Dict
import Expect exposing (Expectation)
@@ -164,9 +164,117 @@ documentTests =
[ test "minimal" (testParseDocument "" (Html.Parser.Document [] "" [] ( [], [] ) []))
, test "example1" (testParseDocument "
Got it.
" { doctype = "LEGACY \"My legacy string stuff\"", document = ( [], [ Element "p" [] [ Text "Got it." ], Element "br" [] [] ] ), postdocComments = [ "Smelly feet" ], preambleComments = [ "Early!" ], predocComments = [ "Teehee!" ] })
, test "recapitalized1" (testParseDocument "gOt It.
" { doctype = "lEgAcY \"mY LeGaCy StRiNg StUfF\"", document = ( [], [ Element "p" [] [ Text "gOt It." ], Element "br" [] [] ] ), postdocComments = [ "sMeLlY fEeT" ], preambleComments = [ "EaRlY!" ], predocComments = [ "tEeHeE!" ] })
+ , test "realWorld1"
+ (testParseDocument realWorld1
+ { preambleComments = []
+ , doctype = ""
+ , predocComments = []
+ , postdocComments = []
+ , document =
+ ( []
+ , [ Text "\n "
+ , Element "head"
+ []
+ [ Text "\n "
+ , Element "meta" [ ( "charset", "utf-8" ) ] []
+ , Text "\n "
+ , Element "title" [] [ Text "Title" ]
+ , Text "\n "
+ , Element "link" [ ( "rel", "stylesheet" ), ( "href", "/style.css" ) ] []
+ , Text "\n "
+ , Element "link" [ ( "rel", "canonical" ), ( "href", "https://example.com" ) ] []
+ , Text "\n "
+ , Element "script" [ ( "async", "" ), ( "type", "text/javascript" ), ( "src", "https://external.example.com/script.js" ) ] []
+ , Text "\n "
+ , Comment " Google Analytics "
+ , Text "\n "
+ , Element "script" [ ( "async", "" ), ( "src", "https://www.googletagmanager.com/gtag/js?id=xxxxxxxx" ) ] []
+ , Text "\n "
+ , Element "script" [] [ Text """
+ /**
+ Block comments
+ */
+ window.dataLayer = window.dataLayer || [];
+ function gtag(){dataLayer.push(arguments);}
+ gtag('js', new Date());
+ gtag('config', 'xxxxxxxx');
+ """ ]
+ , Text "\n "
+ ]
+ , Text "\n "
+ , Element "body"
+ []
+ [ Text "\n "
+ , Element "div" [ ( "id", "root" ) ] []
+ , Text "\n "
+ , Element "script" [] [ Comment """
+ // Ancient Browser Workaround
+ // Hiding ');
+ //""" ]
+ , Text "\n "
+ , Element "script" [] [ Text """
+ var dqStringWithScript = " inside JavaScript double-quoted string must be ignored";
+ var sqStringWithScript = ' inside JavaScript single-quoted string must be ignored';
+ var templateWithScript = ` inside JavaScript template literal must be ignored; ${"even interpolated "}`;
+ // inside JavaScript line comment must be ignored
+ /*
+ inside JavaScript multiline comment must be ignored
+ */
+ """ ]
+ , Text "\n "
+ ]
+ , Text "\n"
+ ]
+ )
+ }
+ )
]
+realWorld1 : String
+realWorld1 =
+ """
+
+
+
+ Title
+
+
+
+
+
+
+
+
+
+ ');
+ //-->
+ inside JavaScript double-quoted string must be ignored";
+ var sqStringWithScript = ' inside JavaScript single-quoted string must be ignored';
+ var templateWithScript = ` inside JavaScript template literal must be ignored; ${"even interpolated "}`;
+ // inside JavaScript line comment must be ignored
+ /*
+ inside JavaScript multiline comment must be ignored
+ */
+
+
+
+"""
+
+
documentToStringTests : Test
documentToStringTests =
describe "documentToString"
@@ -240,6 +348,8 @@ errorTests =
, test "wrong DOCTYPE keyword" (testDocumentError "")
, test "wrong DOCTYPE" (testDocumentError "")
, test "wrong html tag" (testDocumentError "")
+ , test "incomplete script1" (testDocumentError "