diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java
index 7644da3..d56040c 100644
--- a/src/main/java/org/pegdown/Extensions.java
+++ b/src/main/java/org/pegdown/Extensions.java
@@ -37,7 +37,7 @@ public interface Extensions {
/**
* All of the smartypants prettyfications. Equivalent to SMARTS + QUOTES.
- *
+ *
* @see Smartypants
*/
static final int SMARTYPANTS = SMARTS + QUOTES;
@@ -71,7 +71,7 @@ public interface Extensions {
* @see MultiMarkdown
*/
static final int TABLES = 0x20;
-
+
/**
* PHP Markdown Extra style definition lists.
* Additionally supports the small extension proposed in the article referenced below.
@@ -153,6 +153,13 @@ public interface Extensions {
*/
static final int EXTANCHORLINKS = 0x00400000;
+ /**
+ * Generate anchor links for headers using complete contents of the header.
+ * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one.
+ * Anchor link is added wrapping the header content as without EXTANCHORLINKS: `
`
+ */
+ static final int EXTANCHORLINKS_WRAP = 0x00800000;
+
/**
* All Optionals other than Suppress and FORCELISTITEMPARA which is a backwards compatibility extension
*
diff --git a/src/main/java/org/pegdown/LinkRenderer.java b/src/main/java/org/pegdown/LinkRenderer.java
index 851bcc7..049c2c3 100644
--- a/src/main/java/org/pegdown/LinkRenderer.java
+++ b/src/main/java/org/pegdown/LinkRenderer.java
@@ -3,13 +3,14 @@
import org.parboiled.common.StringUtils;
import org.pegdown.ast.*;
-import static org.pegdown.FastEncoder.*;
-
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
+import static org.pegdown.FastEncoder.encode;
+import static org.pegdown.FastEncoder.obfuscate;
+
/**
* A LinkRenderer is responsible for turning an AST node representing a link into a {@link LinkRenderer.Rendering}
* instance, which hold the actual properties of the link as it is going to be rendered.
@@ -50,6 +51,20 @@ public Rendering withAttribute(String name, String value) {
}
public Rendering withAttribute(Attribute attr) {
+ int iMax = attributes.size();
+
+ // vsch: a little wasteful, a Map would be better, but we don't have too many attributes and
+ // this will not break code for those that have implemented their own derived ToHtmlSerializers.
+ for (int i = 0; i < iMax; i++) {
+ Attribute attribute = attributes.get(i);
+ if (attribute.name.equals(attr.name)) {
+ // vsch: need to handle setting multiple classes, works for values too
+ // concatenate them with space between values, as for class
+ attr = new Attribute(attr.name, attribute.value + " " + attr.value);
+ attributes.remove(i);
+ break;
+ }
+ }
attributes.add(attr);
return this;
}
diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java
index 90b0ca7..cedd86e 100644
--- a/src/main/java/org/pegdown/Parser.java
+++ b/src/main/java/org/pegdown/Parser.java
@@ -50,7 +50,7 @@
*/
@SuppressWarnings( {"InfiniteRecursion"})
public class Parser extends BaseParser implements Extensions {
-
+
protected static final char CROSSED_OUT = '\uffff';
public interface ParseRunnerProvider {
@@ -171,7 +171,7 @@ public Rule Verbatim() {
return NodeSequence(
OneOrMore(
ZeroOrMore(BlankLine(), line.append('\n')),
- Indent(), push(currentIndex()),
+ Indent(), push(currentIndex()),
OneOrMore(
FirstOf(
Sequence('\t', line.append(repeat(' ', 4-(currentIndex()-1-(Integer)peek())%4))),
@@ -184,11 +184,12 @@ public Rule Verbatim() {
push(new VerbatimNode(text.getString()))
);
}
-
+
public Rule FencedCodeBlock() {
StringBuilderVar text = new StringBuilderVar();
Var markerLength = new Var();
return NodeSequence(
+ // vsch: test to see if what appears to be a code fence is just inline code
CodeFence(markerLength),
TestNot(CodeFence(markerLength)), // prevent empty matches
ZeroOrMore(BlankLine(), text.append('\n')),
@@ -206,12 +207,12 @@ public Rule CodeFence(Var markerLength) {
(markerLength.isSet() && matchLength() == markerLength.get()) ||
(markerLength.isNotSet() && markerLength.set(matchLength())),
Sp(),
- ZeroOrMore(TestNot(Newline()), ANY), // GFM code type identifier
+ ZeroOrMore(TestNot(FirstOf(Newline(), '~', '`')), ANY), // GFM code type identifier but exclude fenced code markers
push(match()),
Newline()
);
}
-
+
public Rule HorizontalRule() {
return NodeSequence(
NonindentSpace(),
@@ -320,10 +321,18 @@ public boolean wrapInAnchor() {
collectChildrensText(node, nodeInfo);
String text = nodeInfo.text.toString().trim();
if (text.length() > 0) {
- AnchorLinkNode anchor = new AnchorLinkNode(text, "");
- anchor.setStartIndex(nodeInfo.startIndex);
- anchor.setEndIndex(nodeInfo.endIndex);
- children.add(0, anchor);
+ if (ext(EXTANCHORLINKS_WRAP)) {
+ AnchorLinkNode anchor = new AnchorLinkNode(text, text);
+ anchor.setStartIndex(nodeInfo.startIndex);
+ anchor.setEndIndex(nodeInfo.endIndex);
+ children.clear();
+ children.add(0, anchor);
+ } else {
+ AnchorLinkNode anchor = new AnchorLinkNode(text, "");
+ anchor.setStartIndex(nodeInfo.startIndex);
+ anchor.setEndIndex(nodeInfo.endIndex);
+ children.add(0, anchor);
+ }
}
}
} else {
@@ -358,7 +367,7 @@ public void collectChildrensText(SuperNode node, AnchorNodeInfo nodeInfo) {
}
//************** Definition Lists ************
-
+
public Rule DefinitionList() {
return NodeSequence(
// test for successful definition list match before actually building it to reduce backtracking
@@ -379,7 +388,7 @@ public Rule DefinitionList() {
)
);
}
-
+
public Rule DefListTerm() {
return NodeSequence(
TestNot(Spacechar()),
@@ -390,7 +399,7 @@ public Rule DefListTerm() {
Newline()
);
}
-
+
public Rule DefTermInline() {
return Sequence(
NotNewline(),
@@ -398,7 +407,7 @@ public Rule DefTermInline() {
Inline()
);
}
-
+
public Rule Definition() {
SuperNodeCreator itemNodeCreator = new SuperNodeCreator() {
public SuperNode create(Node child) {
@@ -407,7 +416,7 @@ public SuperNode create(Node child) {
};
return ListItem(DefListBullet(), itemNodeCreator);
}
-
+
public Rule DefListBullet() {
return Sequence(NonindentSpace(), AnyOf(":~"), OneOrMore(Spacechar()));
}
@@ -617,7 +626,7 @@ public Rule TestNotItem() {
)
);
}
-
+
public Rule TestNotListItem() {
return TestNot(
FirstOf(new ArrayBuilder()
@@ -635,7 +644,7 @@ public Rule Enumerator() {
public Rule Bullet() {
return Sequence(TestNot(HorizontalRule()), NonindentSpace(), AnyOf("+*-"), OneOrMore(Spacechar()));
}
-
+
//************* LIST ITEM ACTIONS ****************
boolean appendCrossed(StringBuilderVar block) {
@@ -694,7 +703,7 @@ boolean wrapFirstItemInPara(SuperNode item) {
item.getChildren().set(0, rootNode);
return true;
}
-
+
SuperNode wrapFirstSubItemInPara(SuperNode item) {
Node firstItemFirstChild = item.getChildren().get(0);
if (firstItemFirstChild.getChildren().size() == 1) {
@@ -826,7 +835,7 @@ public Rule InlineOrIntermediateEndline() {
Sequence(Endline(), Test(Inline()))
);
}
-
+
@MemoMismatches
public Rule Inline() {
return Sequence(
@@ -898,7 +907,7 @@ public Rule UlOrStarLine() {
public Rule CharLine(char c) {
return FirstOf(NOrMore(c, 4), Sequence(Spacechar(), OneOrMore(c), Test(Spacechar())));
}
-
+
public Rule StrongOrEmph() {
return Sequence(
Test(AnyOf("*_")),
@@ -951,7 +960,7 @@ public Rule EmphOrStrong(String chars) {
Optional(Sequence(EmphOrStrongClose(chars), setClosed()))
);
}
-
+
public Rule EmphOrStrongOpen(String chars) {
return Sequence(
TestNot(CharLine(chars.charAt(0))),
@@ -979,10 +988,10 @@ public Rule EmphOrStrongClose(String chars) {
)
);
}
-
+
/**
* This method checks if the parser can enter an emph or strong sequence
- * Emph only allows Strong as direct child, Strong only allows Emph as
+ * Emph only allows Strong as direct child, Strong only allows Emph as
* direct child.
*/
protected boolean mayEnterEmphOrStrong(String chars){
@@ -990,16 +999,16 @@ protected boolean mayEnterEmphOrStrong(String chars){
return false;
}
- Object parent = peek(2);
+ Object parent = peek(2);
boolean isStrong = ( chars.length()==2 );
-
+
if( StrongEmphSuperNode.class.equals( parent.getClass() ) ){
if( ((StrongEmphSuperNode) parent).isStrong() == isStrong )
return false;
}
return true;
}
-
+
/**
* This method checks if current position is a legal start position for a
* strong or emph sequence by checking the last parsed character(-sequence).
@@ -1017,16 +1026,16 @@ protected boolean isLegalEmphOrStrongStartPos(){
if(supernode.getChildren().size() < 1 )
return true;
-
+
lastItem = supernode.getChildren().get( supernode.getChildren().size()-1 );
lastClass = lastItem.getClass();
}
-
+
return ( TextNode.class.equals(lastClass) && ( (TextNode) lastItem).getText().endsWith(" ") )
|| ( SimpleNode.class.equals(lastClass) )
|| ( java.lang.Integer.class.equals(lastClass) );
}
-
+
/**
* Mark the current StrongEmphSuperNode as closed sequence
*/
@@ -1035,9 +1044,9 @@ protected boolean setClosed(){
node.setClosed(true);
return true;
}
-
+
/**
- * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the
+ * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the
* latest inline node to be added as child does not end with a closing character of the parent. When this
* is true, a next test should check if the closing character(s) of the child should become (part of) the
* closing character(s) of the parent.
@@ -1047,7 +1056,7 @@ protected boolean isStrongCloseCharStolen( String chars ){
return false;
Object childClass = peek().getClass();
-
+
//checks if last `inline` to be added as child is not a StrongEmphSuperNode
//that eats up a closing character for the parent StrongEmphSuperNode
if( StrongEmphSuperNode.class.equals( childClass ) ){
@@ -1060,14 +1069,14 @@ protected boolean isStrongCloseCharStolen( String chars ){
return true;
}
}
-
+
return false;
}
/**
* Steals the last close char by marking a previously closed emph/strong node as unclosed.
*/
- protected boolean stealBackStrongCloseChar(){
+ protected boolean stealBackStrongCloseChar(){
StrongEmphSuperNode child = (StrongEmphSuperNode) peek();
child.setClosed(false);
addAsChild();
@@ -1075,7 +1084,7 @@ protected boolean stealBackStrongCloseChar(){
push(new ValidEmphOrStrongCloseNode());
return true;
}
-
+
/**
* This method checks if the last parsed character or sequence is a valid prefix for a closing char for
* an emph or strong sequence.
@@ -1084,23 +1093,23 @@ protected boolean isLegalEmphOrStrongClosePos(){
Object lastItem = peek();
if ( StrongEmphSuperNode.class.equals( lastItem.getClass() ) ){
List children = ((StrongEmphSuperNode) lastItem).getChildren();
-
+
if(children.size() < 1)
return true;
lastItem = children.get( children.size()-1 );
Class> lastClass = lastItem.getClass();
-
+
if( TextNode.class.equals(lastClass) )
return !((TextNode) lastItem).getText().endsWith(" ");
if( SimpleNode.class.equals(lastClass) )
return !((SimpleNode) lastItem).getType().equals(SimpleNode.Type.Linebreak);
-
+
}
return true;
}
-
+
//************* LINKS ****************
@@ -1517,7 +1526,7 @@ public Rule EscapableChar() {
public Rule NotNewline() {
return TestNot(AnyOf("\n\r"));
}
-
+
public Rule Newline() {
return FirstOf('\n', Sequence('\r', Optional('\n')));
}
@@ -1579,8 +1588,8 @@ public Rule Table() {
TableDivider(node),
Optional(
NodeSequence(
- TableRow(), push(1, new TableBodyNode()) && addAsChild(),
- ZeroOrMore(TableRow(), addAsChild())
+ TableRowAfterDivider(), push(1, new TableBodyNode()) && addAsChild(),
+ ZeroOrMore(TableRowAfterDivider(), addAsChild())
),
addAsChild() // add the TableHeaderNode to the TableNode
),
@@ -1654,6 +1663,18 @@ public Rule TableRow() {
);
}
+ public Rule TableRowAfterDivider() {
+ Var leadingPipe = new Var(Boolean.FALSE);
+ return NodeSequence(
+ push(new TableRowNode()),
+ Optional('|', leadingPipe.set(Boolean.TRUE)),
+ OneOrMore(TableCellAfterDivider(), addAsChild()),
+ leadingPipe.get() || ((Node) peek()).getChildren().size() > 1 ||
+ getContext().getInputBuffer().charAt(matchEnd() - 1) == '|',
+ Sp(), Newline()
+ );
+ }
+
// vsch: #183 Exclude the trailing || from TableCellNode node, leading ones are not included, it makes it more intuitive
// that the TableCell will include only the text of the cell.
public Rule TableCell() {
@@ -1673,6 +1694,23 @@ public Rule TableCell() {
);
}
+ // vsch: if a table divider was seen then we can have cells that look like the divider cell, it is not a divider
+ public Rule TableCellAfterDivider() {
+ return Sequence(
+ NodeSequence(
+ push(new TableCellNode()),
+ Optional(Sp(), TestNot('|'), NotNewline()),
+ OneOrMore(
+ TestNot('|'), TestNot(Sp(), Newline()), Inline(),
+ addAsChild(),
+ Optional(Sp(), Test('|'), Test(Newline()))
+ )
+ ),
+ ZeroOrMore('|'),
+ ((TableCellNode) peek()).setColSpan(Math.max(1, matchLength()))
+ );
+ }
+
//************* SMARTS ****************
public Rule Smarts() {
@@ -1728,11 +1766,11 @@ public Rule DoubleAngleQuoted() {
}
//************* HELPERS ****************
-
+
public Rule NOrMore(char c, int n) {
return Sequence(repeat(c, n), ZeroOrMore(c));
}
-
+
public Rule NodeSequence(Object... nodeRules) {
return Sequence(
push(getContext().getCurrentIndex()),
@@ -1740,14 +1778,14 @@ public Rule NodeSequence(Object... nodeRules) {
setIndices()
);
}
-
+
public boolean setIndices() {
AbstractNode node = (AbstractNode) peek();
node.setStartIndex((Integer)pop(1));
node.setEndIndex(currentIndex());
return true;
}
-
+
public boolean addAsChild() {
SuperNode parent = (SuperNode) peek(1);
List children = parent.getChildren();
@@ -1766,7 +1804,7 @@ public boolean addAsChild() {
children.add(child);
return true;
}
-
+
public Node popAsNode() {
return (Node) pop();
}
@@ -1778,12 +1816,12 @@ public String popAsString() {
public boolean ext(int extension) {
return (options & extension) > 0;
}
-
+
// called for inner parses for list items and blockquotes
public RootNode parseInternal(StringBuilderVar block) {
char[] chars = block.getChars();
int[] ixMap = new int[chars.length + 1]; // map of cleaned indices to original indices
-
+
// strip out CROSSED_OUT characters and build index map
StringBuilder clean = new StringBuilder();
for (int i = 0; i < chars.length; i++) {
@@ -1794,15 +1832,15 @@ public RootNode parseInternal(StringBuilderVar block) {
}
}
ixMap[clean.length()] = chars.length;
-
+
// run inner parse
char[] cleaned = new char[clean.length()];
clean.getChars(0, cleaned.length, cleaned, 0);
RootNode rootNode = parseInternal(cleaned);
-
+
// correct AST indices with index map
fixIndices(rootNode, ixMap);
-
+
return rootNode;
}
@@ -1824,7 +1862,7 @@ public RootNode parseInternal(char[] source) {
}
return (RootNode) result.resultValue;
}
-
+
ParsingResult parseToParsingResult(char[] source) {
parsingStartTimeStamp = System.currentTimeMillis();
return parseRunnerProvider.get(Root()).run(source);
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast
index c430abf..5027046 100644
--- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast
+++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast
@@ -1,4 +1,4 @@
-RootNode [0-1235]
+RootNode [0-1336]
ParaNode [0-20]
SuperNode [0-20]
TextNode [0-19] 'A fenced code block'
@@ -18,3 +18,14 @@ RootNode [0-1235]
SuperNode [787-822]
TextNode [787-822] 'test opening with more than 3 ticks'
VerbatimNode [824-1235] 'public static String message(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.message(BUNDLE, key, params);\n}\n\npublic static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.messageOrDefault(BUNDLE, key, "", params);\n}\n```\nshould still be fenced\n`````\nshould still be fenced\n'
+ ParaNode [1236-1303]
+ SuperNode [1236-1303]
+ TextNode [1236-1265] 'test false opening code fence'
+ SimpleNode [1265-1266] Linebreak
+ CodeNode [1266-1283] 'inline code'
+ SimpleNode [1283-1284] Linebreak
+ CodeNode [1284-1303] 'inline code 2'
+ ParaNode [1305-1313]
+ SuperNode [1305-1313]
+ TextNode [1305-1313] 'not code'
+ VerbatimNode [1315-1336] 'fenced block\n'
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html
index a6e4267..66b05e7 100644
--- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html
+++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html
@@ -48,3 +48,13 @@
`````
should still be fenced
+
+test false opening code fence
+inline code
+inline code 2
+not code
+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md
index c198d9d..a3ddb5e 100644
--- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md
+++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md
@@ -47,4 +47,14 @@ public static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) S
should still be fenced
`````
should still be fenced
-````
\ No newline at end of file
+````
+
+test false opening code fence
+```inline code```
+```inline code 2```
+
+not code
+
+```
+fenced block
+```
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html
index a389360..01f3643 100644
--- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html
+++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html
@@ -57,4 +57,12 @@
+test false opening code fence inline code
+inline code 2
+not code
+
+
+kcolb decnef
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html
index 96f575c..a4cf044 100644
--- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html
+++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html
@@ -50,3 +50,11 @@
should still be fenced
+test false opening code fence inline code
+inline code 2
+not code
+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/Tables.html b/src/test/resources/pegdown/Tables.html
index 2548c4e..42afc2f 100644
--- a/src/test/resources/pegdown/Tables.html
+++ b/src/test/resources/pegdown/Tables.html
@@ -100,4 +100,30 @@
this is the caption
-
\ No newline at end of file
+Tables with divider looking table cell:
+
+
+
+ Name
+ Firstname
+ Age
+
+
+
+
+ Fox
+ Peter
+ 42
+
+
+ ——
+ ———–
+ —-:
+
+
+ Guy
+ Ritchie
+ ca. 60
+
+
+
diff --git a/src/test/resources/pegdown/Tables.md b/src/test/resources/pegdown/Tables.md
index 5a12e4d..8e9caaf 100644
--- a/src/test/resources/pegdown/Tables.md
+++ b/src/test/resources/pegdown/Tables.md
@@ -38,3 +38,12 @@ Content | **Cell** | Cell |
New section | More | Data |
And more | | And more |
[this is the caption]
+
+Tables with divider looking table cell:
+
+ Name | Firstname | Age
+------|-----------|----:
+ Fox | Peter | 42
+------|-----------|----:
+ Guy | Ritchie | ca. 60
+
diff --git a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala
index 501a9f8..b557744 100644
--- a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala
+++ b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala
@@ -88,6 +88,8 @@ abstract class AbstractPegDownSpec extends Specification {
out.toString
}
- def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n")
+ // vsch: seems like there is a bug in Tidy, passing in HTML with \n results in \n\n, and passing one with \n\n results in \n
+ // didn't look too deep into it but the following for now solves the problem.
+ def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n").replace(" \n\n", " \n")
}