diff --git a/CHANGELOG.md b/CHANGELOG.md index db09299..cf29ce4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- Support for parsing `` exactly to close the tag + -- * UNLESS it is contained in a JavaScript string or comment + -- + -- In essence, we provide partial JavaScript parser here + Parser.succeed (Element name attributes) + |. Parser.chompIf ((==) '>') + |= consumeJavaScriptUntilClosingTag + else Parser.succeed (Element name attributes) |. Parser.chompIf ((==) '>') @@ -267,6 +278,103 @@ element = ) +consumeJavaScriptUntilClosingTag : Parser (List Node) +consumeJavaScriptUntilClosingTag = + Parser.loop [] <| + \acc -> + let + accumulate newNode = + Parser.Loop <| + case ( acc, newNode ) of + ( [], first ) -> + [ first ] + + ( (Text accChunk) :: tail, Text newChunk ) -> + -- Merge top-most text node unless HTML comment nodes are interleaved + Text (accChunk ++ newChunk) :: tail + + ( nonTextNode :: tail, _ ) -> + newNode :: nonTextNode :: tail + in + Parser.oneOf + [ -- HTML comments are, albeit considered a bad practice recently, + -- allowed inside " + ) + ) + |. Parser.chompWhile isSpaceCharacter + |. Parser.token ">" + + +javaScriptStringLike : Char -> Parser String +javaScriptStringLike terminatorChar = + let + terminatorStr = + String.fromChar terminatorChar + in + Parser.succeed identity + |. Parser.token terminatorStr + |= Parser.loop "" (stringHelp terminatorChar terminatorStr) + -- Restoring original shape + |> Parser.map (\chunk -> terminatorStr ++ chunk ++ terminatorStr) + + +stringHelp : Char -> String -> String -> Parser (Parser.Step String String) +stringHelp terminatorChar terminatorStr acc = + Parser.oneOf + [ Parser.succeed (\char -> Parser.Loop (acc ++ "\\" ++ char)) + |. Parser.token "\\" + |= justOneChar + , Parser.token terminatorStr + |> Parser.map (\_ -> Parser.Done acc) + , chompOneOrMore (\char -> char /= '\\' && char /= terminatorChar) + |> Parser.getChompedString + |> Parser.map (\chunk -> Parser.Loop (acc ++ chunk)) + ] + + +justOneChar : Parser String +justOneChar = + Parser.loop () <| + \_ -> + Parser.chompIf (always True) + |> Parser.getChompedString + |> Parser.map Parser.Done + + tagName : Parser String tagName = Parser.getChompedString diff --git a/tests/Main.elm b/tests/MainTests.elm similarity index 72% rename from tests/Main.elm rename to tests/MainTests.elm index 8ca06b5..8656a67 100644 --- a/tests/Main.elm +++ b/tests/MainTests.elm @@ -1,4 +1,4 @@ -module Main exposing (suite) +module MainTests exposing (suite) import Dict import Expect exposing (Expectation) @@ -164,9 +164,117 @@ documentTests = [ test "minimal" (testParseDocument "" (Html.Parser.Document [] "" [] ( [], [] ) [])) , test "example1" (testParseDocument "

Got it.


" { doctype = "LEGACY \"My legacy string stuff\"", document = ( [], [ Element "p" [] [ Text "Got it." ], Element "br" [] [] ] ), postdocComments = [ "Smelly feet" ], preambleComments = [ "Early!" ], predocComments = [ "Teehee!" ] }) , test "recapitalized1" (testParseDocument "

gOt It.


" { doctype = "lEgAcY \"mY LeGaCy StRiNg StUfF\"", document = ( [], [ Element "p" [] [ Text "gOt It." ], Element "br" [] [] ] ), postdocComments = [ "sMeLlY fEeT" ], preambleComments = [ "EaRlY!" ], predocComments = [ "tEeHeE!" ] }) + , test "realWorld1" + (testParseDocument realWorld1 + { preambleComments = [] + , doctype = "" + , predocComments = [] + , postdocComments = [] + , document = + ( [] + , [ Text "\n " + , Element "head" + [] + [ Text "\n " + , Element "meta" [ ( "charset", "utf-8" ) ] [] + , Text "\n " + , Element "title" [] [ Text "Title" ] + , Text "\n " + , Element "link" [ ( "rel", "stylesheet" ), ( "href", "/style.css" ) ] [] + , Text "\n " + , Element "link" [ ( "rel", "canonical" ), ( "href", "https://example.com" ) ] [] + , Text "\n " + , Element "script" [ ( "async", "" ), ( "type", "text/javascript" ), ( "src", "https://external.example.com/script.js" ) ] [] + , Text "\n " + , Comment " Google Analytics " + , Text "\n " + , Element "script" [ ( "async", "" ), ( "src", "https://www.googletagmanager.com/gtag/js?id=xxxxxxxx" ) ] [] + , Text "\n " + , Element "script" [] [ Text """ + /** + Block comments + */ + window.dataLayer = window.dataLayer || []; + function gtag(){dataLayer.push(arguments);} + gtag('js', new Date()); + gtag('config', 'xxxxxxxx'); + """ ] + , Text "\n " + ] + , Text "\n " + , Element "body" + [] + [ Text "\n " + , Element "div" [ ( "id", "root" ) ] [] + , Text "\n " + , Element "script" [] [ Comment """ + // Ancient Browser Workaround + // Hiding '); + //""" ] + , Text "\n " + , Element "script" [] [ Text """ + var dqStringWithScript = " inside JavaScript double-quoted string must be ignored"; + var sqStringWithScript = ' inside JavaScript single-quoted string must be ignored'; + var templateWithScript = ` inside JavaScript template literal must be ignored; ${"even interpolated "}`; + // inside JavaScript line comment must be ignored + /* + inside JavaScript multiline comment must be ignored + */ + """ ] + , Text "\n " + ] + , Text "\n" + ] + ) + } + ) ] +realWorld1 : String +realWorld1 = + """ + + + + Title + + + + + + + + +
+ + inside JavaScript double-quoted string must be ignored"; + var sqStringWithScript = ' inside JavaScript single-quoted string must be ignored'; + var templateWithScript = ` inside JavaScript template literal must be ignored; ${"even interpolated "}`; + // inside JavaScript line comment must be ignored + /* + inside JavaScript multiline comment must be ignored + */ + + + +""" + + documentToStringTests : Test documentToStringTests = describe "documentToString" @@ -240,6 +348,8 @@ errorTests = , test "wrong DOCTYPE keyword" (testDocumentError "") , test "wrong DOCTYPE" (testDocumentError "") , test "wrong html tag" (testDocumentError "") + , test "incomplete script1" (testDocumentError "