diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java index 7644da3..d56040c 100644 --- a/src/main/java/org/pegdown/Extensions.java +++ b/src/main/java/org/pegdown/Extensions.java @@ -37,7 +37,7 @@ public interface Extensions { /** * All of the smartypants prettyfications. Equivalent to SMARTS + QUOTES. - * + * * @see Smartypants */ static final int SMARTYPANTS = SMARTS + QUOTES; @@ -71,7 +71,7 @@ public interface Extensions { * @see MultiMarkdown */ static final int TABLES = 0x20; - + /** * PHP Markdown Extra style definition lists. * Additionally supports the small extension proposed in the article referenced below. @@ -153,6 +153,13 @@ public interface Extensions { */ static final int EXTANCHORLINKS = 0x00400000; + /** + * Generate anchor links for headers using complete contents of the header. + * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. + * Anchor link is added wrapping the header content as without EXTANCHORLINKS: `

header a

` + */ + static final int EXTANCHORLINKS_WRAP = 0x00800000; + /** * All Optionals other than Suppress and FORCELISTITEMPARA which is a backwards compatibility extension * diff --git a/src/main/java/org/pegdown/LinkRenderer.java b/src/main/java/org/pegdown/LinkRenderer.java index 851bcc7..049c2c3 100644 --- a/src/main/java/org/pegdown/LinkRenderer.java +++ b/src/main/java/org/pegdown/LinkRenderer.java @@ -3,13 +3,14 @@ import org.parboiled.common.StringUtils; import org.pegdown.ast.*; -import static org.pegdown.FastEncoder.*; - import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import static org.pegdown.FastEncoder.encode; +import static org.pegdown.FastEncoder.obfuscate; + /** * A LinkRenderer is responsible for turning an AST node representing a link into a {@link LinkRenderer.Rendering} * instance, which hold the actual properties of the link as it is going to be rendered. @@ -50,6 +51,20 @@ public Rendering withAttribute(String name, String value) { } public Rendering withAttribute(Attribute attr) { + int iMax = attributes.size(); + + // vsch: a little wasteful, a Map would be better, but we don't have too many attributes and + // this will not break code for those that have implemented their own derived ToHtmlSerializers. + for (int i = 0; i < iMax; i++) { + Attribute attribute = attributes.get(i); + if (attribute.name.equals(attr.name)) { + // vsch: need to handle setting multiple classes, works for values too + // concatenate them with space between values, as for class + attr = new Attribute(attr.name, attribute.value + " " + attr.value); + attributes.remove(i); + break; + } + } attributes.add(attr); return this; } diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index 90b0ca7..cedd86e 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -50,7 +50,7 @@ */ @SuppressWarnings( {"InfiniteRecursion"}) public class Parser extends BaseParser implements Extensions { - + protected static final char CROSSED_OUT = '\uffff'; public interface ParseRunnerProvider { @@ -171,7 +171,7 @@ public Rule Verbatim() { return NodeSequence( OneOrMore( ZeroOrMore(BlankLine(), line.append('\n')), - Indent(), push(currentIndex()), + Indent(), push(currentIndex()), OneOrMore( FirstOf( Sequence('\t', line.append(repeat(' ', 4-(currentIndex()-1-(Integer)peek())%4))), @@ -184,11 +184,12 @@ public Rule Verbatim() { push(new VerbatimNode(text.getString())) ); } - + public Rule FencedCodeBlock() { StringBuilderVar text = new StringBuilderVar(); Var markerLength = new Var(); return NodeSequence( + // vsch: test to see if what appears to be a code fence is just inline code CodeFence(markerLength), TestNot(CodeFence(markerLength)), // prevent empty matches ZeroOrMore(BlankLine(), text.append('\n')), @@ -206,12 +207,12 @@ public Rule CodeFence(Var markerLength) { (markerLength.isSet() && matchLength() == markerLength.get()) || (markerLength.isNotSet() && markerLength.set(matchLength())), Sp(), - ZeroOrMore(TestNot(Newline()), ANY), // GFM code type identifier + ZeroOrMore(TestNot(FirstOf(Newline(), '~', '`')), ANY), // GFM code type identifier but exclude fenced code markers push(match()), Newline() ); } - + public Rule HorizontalRule() { return NodeSequence( NonindentSpace(), @@ -320,10 +321,18 @@ public boolean wrapInAnchor() { collectChildrensText(node, nodeInfo); String text = nodeInfo.text.toString().trim(); if (text.length() > 0) { - AnchorLinkNode anchor = new AnchorLinkNode(text, ""); - anchor.setStartIndex(nodeInfo.startIndex); - anchor.setEndIndex(nodeInfo.endIndex); - children.add(0, anchor); + if (ext(EXTANCHORLINKS_WRAP)) { + AnchorLinkNode anchor = new AnchorLinkNode(text, text); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.clear(); + children.add(0, anchor); + } else { + AnchorLinkNode anchor = new AnchorLinkNode(text, ""); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.add(0, anchor); + } } } } else { @@ -358,7 +367,7 @@ public void collectChildrensText(SuperNode node, AnchorNodeInfo nodeInfo) { } //************** Definition Lists ************ - + public Rule DefinitionList() { return NodeSequence( // test for successful definition list match before actually building it to reduce backtracking @@ -379,7 +388,7 @@ public Rule DefinitionList() { ) ); } - + public Rule DefListTerm() { return NodeSequence( TestNot(Spacechar()), @@ -390,7 +399,7 @@ public Rule DefListTerm() { Newline() ); } - + public Rule DefTermInline() { return Sequence( NotNewline(), @@ -398,7 +407,7 @@ public Rule DefTermInline() { Inline() ); } - + public Rule Definition() { SuperNodeCreator itemNodeCreator = new SuperNodeCreator() { public SuperNode create(Node child) { @@ -407,7 +416,7 @@ public SuperNode create(Node child) { }; return ListItem(DefListBullet(), itemNodeCreator); } - + public Rule DefListBullet() { return Sequence(NonindentSpace(), AnyOf(":~"), OneOrMore(Spacechar())); } @@ -617,7 +626,7 @@ public Rule TestNotItem() { ) ); } - + public Rule TestNotListItem() { return TestNot( FirstOf(new ArrayBuilder() @@ -635,7 +644,7 @@ public Rule Enumerator() { public Rule Bullet() { return Sequence(TestNot(HorizontalRule()), NonindentSpace(), AnyOf("+*-"), OneOrMore(Spacechar())); } - + //************* LIST ITEM ACTIONS **************** boolean appendCrossed(StringBuilderVar block) { @@ -694,7 +703,7 @@ boolean wrapFirstItemInPara(SuperNode item) { item.getChildren().set(0, rootNode); return true; } - + SuperNode wrapFirstSubItemInPara(SuperNode item) { Node firstItemFirstChild = item.getChildren().get(0); if (firstItemFirstChild.getChildren().size() == 1) { @@ -826,7 +835,7 @@ public Rule InlineOrIntermediateEndline() { Sequence(Endline(), Test(Inline())) ); } - + @MemoMismatches public Rule Inline() { return Sequence( @@ -898,7 +907,7 @@ public Rule UlOrStarLine() { public Rule CharLine(char c) { return FirstOf(NOrMore(c, 4), Sequence(Spacechar(), OneOrMore(c), Test(Spacechar()))); } - + public Rule StrongOrEmph() { return Sequence( Test(AnyOf("*_")), @@ -951,7 +960,7 @@ public Rule EmphOrStrong(String chars) { Optional(Sequence(EmphOrStrongClose(chars), setClosed())) ); } - + public Rule EmphOrStrongOpen(String chars) { return Sequence( TestNot(CharLine(chars.charAt(0))), @@ -979,10 +988,10 @@ public Rule EmphOrStrongClose(String chars) { ) ); } - + /** * This method checks if the parser can enter an emph or strong sequence - * Emph only allows Strong as direct child, Strong only allows Emph as + * Emph only allows Strong as direct child, Strong only allows Emph as * direct child. */ protected boolean mayEnterEmphOrStrong(String chars){ @@ -990,16 +999,16 @@ protected boolean mayEnterEmphOrStrong(String chars){ return false; } - Object parent = peek(2); + Object parent = peek(2); boolean isStrong = ( chars.length()==2 ); - + if( StrongEmphSuperNode.class.equals( parent.getClass() ) ){ if( ((StrongEmphSuperNode) parent).isStrong() == isStrong ) return false; } return true; } - + /** * This method checks if current position is a legal start position for a * strong or emph sequence by checking the last parsed character(-sequence). @@ -1017,16 +1026,16 @@ protected boolean isLegalEmphOrStrongStartPos(){ if(supernode.getChildren().size() < 1 ) return true; - + lastItem = supernode.getChildren().get( supernode.getChildren().size()-1 ); lastClass = lastItem.getClass(); } - + return ( TextNode.class.equals(lastClass) && ( (TextNode) lastItem).getText().endsWith(" ") ) || ( SimpleNode.class.equals(lastClass) ) || ( java.lang.Integer.class.equals(lastClass) ); } - + /** * Mark the current StrongEmphSuperNode as closed sequence */ @@ -1035,9 +1044,9 @@ protected boolean setClosed(){ node.setClosed(true); return true; } - + /** - * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the + * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the * latest inline node to be added as child does not end with a closing character of the parent. When this * is true, a next test should check if the closing character(s) of the child should become (part of) the * closing character(s) of the parent. @@ -1047,7 +1056,7 @@ protected boolean isStrongCloseCharStolen( String chars ){ return false; Object childClass = peek().getClass(); - + //checks if last `inline` to be added as child is not a StrongEmphSuperNode //that eats up a closing character for the parent StrongEmphSuperNode if( StrongEmphSuperNode.class.equals( childClass ) ){ @@ -1060,14 +1069,14 @@ protected boolean isStrongCloseCharStolen( String chars ){ return true; } } - + return false; } /** * Steals the last close char by marking a previously closed emph/strong node as unclosed. */ - protected boolean stealBackStrongCloseChar(){ + protected boolean stealBackStrongCloseChar(){ StrongEmphSuperNode child = (StrongEmphSuperNode) peek(); child.setClosed(false); addAsChild(); @@ -1075,7 +1084,7 @@ protected boolean stealBackStrongCloseChar(){ push(new ValidEmphOrStrongCloseNode()); return true; } - + /** * This method checks if the last parsed character or sequence is a valid prefix for a closing char for * an emph or strong sequence. @@ -1084,23 +1093,23 @@ protected boolean isLegalEmphOrStrongClosePos(){ Object lastItem = peek(); if ( StrongEmphSuperNode.class.equals( lastItem.getClass() ) ){ List children = ((StrongEmphSuperNode) lastItem).getChildren(); - + if(children.size() < 1) return true; lastItem = children.get( children.size()-1 ); Class lastClass = lastItem.getClass(); - + if( TextNode.class.equals(lastClass) ) return !((TextNode) lastItem).getText().endsWith(" "); if( SimpleNode.class.equals(lastClass) ) return !((SimpleNode) lastItem).getType().equals(SimpleNode.Type.Linebreak); - + } return true; } - + //************* LINKS **************** @@ -1517,7 +1526,7 @@ public Rule EscapableChar() { public Rule NotNewline() { return TestNot(AnyOf("\n\r")); } - + public Rule Newline() { return FirstOf('\n', Sequence('\r', Optional('\n'))); } @@ -1579,8 +1588,8 @@ public Rule Table() { TableDivider(node), Optional( NodeSequence( - TableRow(), push(1, new TableBodyNode()) && addAsChild(), - ZeroOrMore(TableRow(), addAsChild()) + TableRowAfterDivider(), push(1, new TableBodyNode()) && addAsChild(), + ZeroOrMore(TableRowAfterDivider(), addAsChild()) ), addAsChild() // add the TableHeaderNode to the TableNode ), @@ -1654,6 +1663,18 @@ public Rule TableRow() { ); } + public Rule TableRowAfterDivider() { + Var leadingPipe = new Var(Boolean.FALSE); + return NodeSequence( + push(new TableRowNode()), + Optional('|', leadingPipe.set(Boolean.TRUE)), + OneOrMore(TableCellAfterDivider(), addAsChild()), + leadingPipe.get() || ((Node) peek()).getChildren().size() > 1 || + getContext().getInputBuffer().charAt(matchEnd() - 1) == '|', + Sp(), Newline() + ); + } + // vsch: #183 Exclude the trailing || from TableCellNode node, leading ones are not included, it makes it more intuitive // that the TableCell will include only the text of the cell. public Rule TableCell() { @@ -1673,6 +1694,23 @@ public Rule TableCell() { ); } + // vsch: if a table divider was seen then we can have cells that look like the divider cell, it is not a divider + public Rule TableCellAfterDivider() { + return Sequence( + NodeSequence( + push(new TableCellNode()), + Optional(Sp(), TestNot('|'), NotNewline()), + OneOrMore( + TestNot('|'), TestNot(Sp(), Newline()), Inline(), + addAsChild(), + Optional(Sp(), Test('|'), Test(Newline())) + ) + ), + ZeroOrMore('|'), + ((TableCellNode) peek()).setColSpan(Math.max(1, matchLength())) + ); + } + //************* SMARTS **************** public Rule Smarts() { @@ -1728,11 +1766,11 @@ public Rule DoubleAngleQuoted() { } //************* HELPERS **************** - + public Rule NOrMore(char c, int n) { return Sequence(repeat(c, n), ZeroOrMore(c)); } - + public Rule NodeSequence(Object... nodeRules) { return Sequence( push(getContext().getCurrentIndex()), @@ -1740,14 +1778,14 @@ public Rule NodeSequence(Object... nodeRules) { setIndices() ); } - + public boolean setIndices() { AbstractNode node = (AbstractNode) peek(); node.setStartIndex((Integer)pop(1)); node.setEndIndex(currentIndex()); return true; } - + public boolean addAsChild() { SuperNode parent = (SuperNode) peek(1); List children = parent.getChildren(); @@ -1766,7 +1804,7 @@ public boolean addAsChild() { children.add(child); return true; } - + public Node popAsNode() { return (Node) pop(); } @@ -1778,12 +1816,12 @@ public String popAsString() { public boolean ext(int extension) { return (options & extension) > 0; } - + // called for inner parses for list items and blockquotes public RootNode parseInternal(StringBuilderVar block) { char[] chars = block.getChars(); int[] ixMap = new int[chars.length + 1]; // map of cleaned indices to original indices - + // strip out CROSSED_OUT characters and build index map StringBuilder clean = new StringBuilder(); for (int i = 0; i < chars.length; i++) { @@ -1794,15 +1832,15 @@ public RootNode parseInternal(StringBuilderVar block) { } } ixMap[clean.length()] = chars.length; - + // run inner parse char[] cleaned = new char[clean.length()]; clean.getChars(0, cleaned.length, cleaned, 0); RootNode rootNode = parseInternal(cleaned); - + // correct AST indices with index map fixIndices(rootNode, ixMap); - + return rootNode; } @@ -1824,7 +1862,7 @@ public RootNode parseInternal(char[] source) { } return (RootNode) result.resultValue; } - + ParsingResult parseToParsingResult(char[] source) { parsingStartTimeStamp = System.currentTimeMillis(); return parseRunnerProvider.get(Root()).run(source); diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast index c430abf..5027046 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast @@ -1,4 +1,4 @@ -RootNode [0-1235] +RootNode [0-1336] ParaNode [0-20] SuperNode [0-20] TextNode [0-19] 'A fenced code block' @@ -18,3 +18,14 @@ RootNode [0-1235] SuperNode [787-822] TextNode [787-822] 'test opening with more than 3 ticks' VerbatimNode [824-1235] 'public static String message(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.message(BUNDLE, key, params);\n}\n\npublic static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.messageOrDefault(BUNDLE, key, "", params);\n}\n```\nshould still be fenced\n`````\nshould still be fenced\n' + ParaNode [1236-1303] + SuperNode [1236-1303] + TextNode [1236-1265] 'test false opening code fence' + SimpleNode [1265-1266] Linebreak + CodeNode [1266-1283] 'inline code' + SimpleNode [1283-1284] Linebreak + CodeNode [1284-1303] 'inline code 2' + ParaNode [1305-1313] + SuperNode [1305-1313] + TextNode [1305-1313] 'not code' + VerbatimNode [1315-1336] 'fenced block\n' diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html index a6e4267..66b05e7 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html @@ -48,3 +48,13 @@ ````` should still be fenced + +

test false opening code fence
+inline code
+inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md index c198d9d..a3ddb5e 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md @@ -47,4 +47,14 @@ public static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) S should still be fenced ````` should still be fenced -```` \ No newline at end of file +```` + +test false opening code fence +```inline code``` +```inline code 2``` + +not code + +``` +fenced block +``` diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html index a389360..01f3643 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html @@ -57,4 +57,12 @@ +

test false opening code fence inline code +inline code 2

+

not code

+
+
 
+kcolb decnef
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html index 96f575c..a4cf044 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html @@ -50,3 +50,11 @@ should still be fenced +

test false opening code fence inline code +inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/Tables.html b/src/test/resources/pegdown/Tables.html index 2548c4e..42afc2f 100644 --- a/src/test/resources/pegdown/Tables.html +++ b/src/test/resources/pegdown/Tables.html @@ -100,4 +100,30 @@

Tables

this is the caption - \ No newline at end of file +

Tables with divider looking table cell:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Name Firstname Age
Fox Peter 42
—————–—-:
Guy Ritchie ca. 60
diff --git a/src/test/resources/pegdown/Tables.md b/src/test/resources/pegdown/Tables.md index 5a12e4d..8e9caaf 100644 --- a/src/test/resources/pegdown/Tables.md +++ b/src/test/resources/pegdown/Tables.md @@ -38,3 +38,12 @@ Content | **Cell** | Cell | New section | More | Data | And more | | And more | [this is the caption] + +Tables with divider looking table cell: + + Name | Firstname | Age +------|-----------|----: + Fox | Peter | 42 +------|-----------|----: + Guy | Ritchie | ca. 60 + diff --git a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala index 501a9f8..b557744 100644 --- a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala +++ b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala @@ -88,6 +88,8 @@ abstract class AbstractPegDownSpec extends Specification { out.toString } - def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n") + // vsch: seems like there is a bug in Tidy, passing in HTML with
\n results in
\n\n, and passing one with
\n\n results in
\n + // didn't look too deep into it but the following for now solves the problem. + def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n").replace("
\n\n", "
\n") }