Skip to content

Commit

Permalink
feat: add xml support.
Browse files Browse the repository at this point in the history
  • Loading branch information
jrfaller committed Nov 27, 2023
1 parent 642aef6 commit 6584227
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 1 deletion.
1 change: 1 addition & 0 deletions dist/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies {
implementation project(':gen.ruby')
implementation project(':gen.srcml')
implementation project(':gen.treesitter')
implementation project(':gen.xml')
implementation project(':gen.yaml')
}

Expand Down
5 changes: 5 additions & 0 deletions gen.xml/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
description = 'GumTree tree generator for XML code (JSoup based).'

dependencies {
implementation 'org.jsoup:jsoup:1.17.1'
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* This file is part of GumTree.
*
* GumTree is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GumTree is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GumTree. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright 2023 Jean-Rémy Falleri <[email protected]>
*/

package com.github.gumtreediff.gen.xml;

import com.github.gumtreediff.gen.Register;
import com.github.gumtreediff.utils.Registry;
import com.github.gumtreediff.gen.TreeGenerator;
import com.github.gumtreediff.tree.Tree;
import com.github.gumtreediff.tree.TreeContext;
import org.jsoup.Jsoup;
import org.jsoup.nodes.*;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeVisitor;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Stack;

import static com.github.gumtreediff.tree.TypeSet.type;

@Register(id = "xml-jsoup", accept = {"\\.xml$"}, priority = Registry.Priority.MAXIMUM)
public class XmlTreeGenerator extends TreeGenerator {
private Stack<Tree> trees;
private TreeContext ctx;

@Override
public TreeContext generate(Reader r) throws IOException {
ctx = new TreeContext();
trees = new Stack<>();
StringBuilder builder = new StringBuilder();

try (Reader reader = new BufferedReader(r)) {
char[] charBuffer = new char[8 * 1024];
int numCharsRead;
while ((numCharsRead = reader.read(charBuffer, 0,
charBuffer.length)) != -1) {
builder.append(charBuffer, 0, numCharsRead);
}
}

try (InputStream inputStream = new ByteArrayInputStream(
builder.toString().getBytes(StandardCharsets.UTF_8))) {
Parser parser = Parser.xmlParser();
parser.setTrackPosition(true);
Document doc = Jsoup.parse(inputStream, StandardCharsets.UTF_8.name(), "", parser);
doc.traverse(new GumtreeNodeVisitor());
}

ctx.setRoot(ctx.getRoot().getChild(0));

return ctx;
}

private class GumtreeNodeVisitor implements NodeVisitor {
@Override
public void head(Node node, int depth) {
Tree tree;
if (node instanceof Element)
tree = asTree((Element) node);
else if (node instanceof DataNode)
tree = asTree((DataNode) node);
else if (node instanceof TextNode)
tree = asTree((TextNode) node);
else
throw new IllegalArgumentException();

insertTree(tree);
}

private void insertTree(Tree tree) {
if (!trees.isEmpty()) {
if (!dummyTextNode(tree))
trees.peek().addChild(tree);
}
else
ctx.setRoot(tree);

trees.push(tree);
}

private boolean dummyTextNode(Tree tree) {
return tree.getType() == type("xml-text")
&& tree.getLabel().trim().isEmpty();
}

@Override
public void tail(Node node, int depth) {
Tree tree = trees.pop();
}

private Tree asTree(Element element) {
Tree tree = ctx.createTree(type(element.tagName()));
int startPos = element.sourceRange().startPos();
int length = element.endSourceRange().endPos() - startPos;
tree.setPos(startPos);
tree.setLength(length);
for (Attribute attrXml : element.attributes()) {
Tree attrTree = ctx.createTree(type(attrXml.getKey()), attrXml.getValue());
startPos = attrXml.sourceRange().nameRange().startPos();
length = attrXml.sourceRange().valueRange().endPos() - startPos;
attrTree.setPos(startPos);
attrTree.setLength(length);
tree.addChild(attrTree);
}
return tree;
}

private Tree asTree(DataNode dataNode) {
Tree tree = ctx.createTree(type("xml-data"));
int startPos = dataNode.sourceRange().startPos();
int length = dataNode.sourceRange().endPos() - startPos;
tree.setPos(startPos);
tree.setLength(length);
return tree;
}

private Tree asTree(TextNode textXml) {
Tree tree = ctx.createTree(type("xml-text"), textXml.text());
int startPos = textXml.sourceRange().startPos();
int length = textXml.sourceRange().endPos() - startPos;
tree.setPos(startPos);
tree.setLength(length);
return tree;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* This file is part of GumTree.
*
* GumTree is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GumTree is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GumTree. If not, see <http://www.gnu.org/licenses/>.
*
* Copyright 2023 Jean-Rémy Falleri <[email protected]>
*/

package com.github.gumtreediff.gen.xml;

import java.io.IOException;

import com.github.gumtreediff.gen.SyntaxException;
import com.github.gumtreediff.tree.*;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import static com.github.gumtreediff.tree.TypeSet.type;
import static org.junit.jupiter.api.Assertions.*;

public class TestXmlTreeGenerator {
@Test
public void testSimpleSyntax() throws IOException {
String input = "<students>\n"
+ " <list>Students list</list>\n"
+ " <student name=\"foo\"/>\n"
+ " <student name=\"bar\"/>\n"
+ " <student name=\"baz\"/>\n"
+ "</student>";
Tree t = new XmlTreeGenerator().generateFrom().string(input).getRoot();
System.out.println(t.toTreeString());
}
}
3 changes: 2 additions & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ include 'benchmark',
'gen.ruby',
'gen.srcml',
'gen.treesitter',
'gen.yaml'
'gen.yaml',
'gen.xml'

rootProject.name = "gumtree"

0 comments on commit 6584227

Please sign in to comment.