Skip to content

Commit

Permalink
[NOID] Fixes #4138: Add support for loading Gephi GEXF file format (#…
Browse files Browse the repository at this point in the history
…4171) (#4260)

* [NOID] Fixes #4138: Add support for loading Gephi GEXF file format (#4171)

* Fixes #4138: Add support for loading Gephi GEXF file format

* removed unused imports

* Fixed RollupTest

* [NOID] fix tests

* [NOID] fix 4.4 implementation

* [NOID] fix tests

* [NOID] fix implementation
  • Loading branch information
vga91 authored Jan 21, 2025
1 parent f8142df commit 0a5127f
Show file tree
Hide file tree
Showing 14 changed files with 1,045 additions and 189 deletions.
54 changes: 45 additions & 9 deletions core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package apoc.export.graphml;

import static apoc.util.ConvertUtil.toValidValue;

import apoc.export.util.BatchTransaction;
import apoc.export.util.ExportConfig;
import apoc.export.util.Reporter;
Expand Down Expand Up @@ -218,13 +220,39 @@ public Object parseValue(String input) {
public static final QName TYPE = QName.valueOf("attr.type");
public static final QName LIST = QName.valueOf("attr.list");
public static final QName KEY = QName.valueOf("key");
public static final QName VALUE = QName.valueOf("value");
public static final QName DATA_TYPE = QName.valueOf("type");
public static final QName KIND = QName.valueOf("kind");

public XmlGraphMLReader(GraphDatabaseService db, Transaction tx) {
this.db = db;
this.tx = tx;
}

public enum ReaderType {
GRAPHML("data", KEY, LABEL, LABELS),
GEXF("attvalue", FOR, KIND, LABEL);

public String data;
public QName key;
public QName label;
public QName labels;

ReaderType(String data, QName key, QName label, QName labels) {
this.data = data;
this.key = key;
this.label = label;
this.labels = labels;
}
}

public long parseXML(Reader input, TerminationGuard terminationGuard) throws XMLStreamException {
return parseXML(input, terminationGuard, ReaderType.GRAPHML);
}

public long parseXML(Reader input, TerminationGuard terminationGuard, ReaderType readerType)
throws XMLStreamException {
Map<String, Object> dataMap = new HashMap<>();
Map<String, Long> cache = new HashMap<>(1024 * 32);
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
inputFactory.setProperty("javax.xml.stream.isCoalescing", true);
Expand All @@ -238,7 +266,6 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
int count = 0;
BatchTransaction tx = new BatchTransaction(db, batchSize * 10, reporter);
try {

while (reader.hasNext()) {
terminationGuard.check();
XMLEvent event;
Expand All @@ -257,11 +284,15 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
continue;
}
if (event.isStartElement()) {

StartElement element = event.asStartElement();
String name = element.getName().getLocalPart();

if (name.equals("graphml") || name.equals("graph")) continue;
boolean isNameGexf = readerType.equals(ReaderType.GEXF) && name.equals("gexf");
if (name.equals("graphml") || name.equals("graph") || isNameGexf) continue;
if (readerType.equals(ReaderType.GEXF) && name.equals("attribute")) {
String id = getAttribute(element, ID);
String type = getAttribute(element, DATA_TYPE);
dataMap.put(id, type);
}
if (name.equals("key")) {
String id = getAttribute(element, ID);
Key key = new Key(
Expand All @@ -284,19 +315,24 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
else relKeys.put(id, key);
continue;
}
if (name.equals("data")) {
if (name.equals(readerType.data)) {
if (last == null) continue;
String id = getAttribute(element, KEY);
String id = getAttribute(element, readerType.key);
boolean isNode = last instanceof Node;
Key key = isNode ? nodeKeys.get(id) : relKeys.get(id);
if (key == null) key = Key.defaultKey(id, isNode);
final Map.Entry<XMLEvent, Object> eventEntry = getDataEventEntry(reader, key);
final XMLEvent next = eventEntry.getKey();
final Object value = eventEntry.getValue();
Object value = readerType.equals(ReaderType.GRAPHML)
? eventEntry.getValue()
: getAttribute(element, VALUE);
if (value != null) {
if (this.labels && isNode && id.equals("labels")) {
addLabels((Node) last, value.toString());
} else if (!this.labels || isNode || !id.equals("label")) {
value = readerType.equals(ReaderType.GRAPHML)
? value
: toValidValue(value, key.name, dataMap);
last.setProperty(key.name, value);
if (reporter != null) reporter.update(0, 0, 1);
}
Expand All @@ -311,7 +347,7 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
String id = getAttribute(element, ID);
Node node = tx.getTransaction().createNode();
if (this.labels) {
String labels = getAttribute(element, LABELS);
String labels = getAttribute(element, readerType.labels);
addLabels(node, labels);
}
if (storeNodeIds) node.setProperty("id", id);
Expand All @@ -324,7 +360,7 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
}
if (name.equals("edge")) {
tx.increment();
String label = getAttribute(element, LABEL);
String label = getAttribute(element, readerType.label);
Node from = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.SOURCE);
Node to = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.TARGET);

Expand Down
39 changes: 25 additions & 14 deletions core/src/main/java/apoc/load/Xml.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public Stream<MapResult> xml(
@Name(value = "config", defaultValue = "{}") Map<String, Object> config,
@Name(value = "simple", defaultValue = "false") boolean simpleMode)
throws Exception {
return xmlXpathToMapResult(urlOrBinary, simpleMode, path, config);
return xmlXpathToMapResult(urlOrBinary, simpleMode, path, config, terminationGuard);
}

@UserFunction("apoc.xml.parse")
Expand All @@ -128,29 +128,39 @@ public Map<String, Object> parse(
throws Exception {
if (config == null) config = Collections.emptyMap();
boolean failOnError = (boolean) config.getOrDefault("failOnError", true);
return parse(new ByteArrayInputStream(data.getBytes(Charset.forName("UTF-8"))), simpleMode, path, failOnError)
return parse(
new ByteArrayInputStream(data.getBytes(Charset.forName("UTF-8"))),
simpleMode,
path,
failOnError,
terminationGuard)
.map(mr -> mr.value)
.findFirst()
.orElse(null);
}

private Stream<MapResult> xmlXpathToMapResult(
@Name("urlOrBinary") Object urlOrBinary, boolean simpleMode, String path, Map<String, Object> config)
public static Stream<MapResult> xmlXpathToMapResult(
@Name("urlOrBinary") Object urlOrBinary,
boolean simpleMode,
String path,
Map<String, Object> config,
TerminationGuard terminationGuard)
throws Exception {
if (config == null) config = Collections.emptyMap();
boolean failOnError = (boolean) config.getOrDefault("failOnError", true);
try {
Map<String, Object> headers = (Map) config.getOrDefault("headers", Collections.emptyMap());
CountingInputStream is = FileUtils.inputStreamFor(
urlOrBinary, headers, null, (String) config.getOrDefault(COMPRESSION, CompressionAlgo.NONE.name()));
return parse(is, simpleMode, path, failOnError);
return parse(is, simpleMode, path, failOnError, terminationGuard);
} catch (Exception e) {
if (!failOnError) return Stream.of(new MapResult(Collections.emptyMap()));
else throw e;
}
}

private Stream<MapResult> parse(InputStream data, boolean simpleMode, String path, boolean failOnError)
public static Stream<MapResult> parse(
InputStream data, boolean simpleMode, String path, boolean failOnError, TerminationGuard terminationGuard)
throws Exception {
List<MapResult> result = new ArrayList<>();
try {
Expand All @@ -173,7 +183,7 @@ private Stream<MapResult> parse(InputStream data, boolean simpleMode, String pat
for (int i = 0; i < nodeList.getLength(); i++) {
final Deque<Map<String, Object>> stack = new LinkedList<>();

handleNode(stack, nodeList.item(i), simpleMode);
handleNode(stack, nodeList.item(i), simpleMode, terminationGuard);
for (int index = 0; index < stack.size(); index++) {
result.add(new MapResult(stack.pollFirst()));
}
Expand Down Expand Up @@ -223,15 +233,16 @@ private boolean proceedReader(XMLStreamReader reader) throws XMLStreamException
}
}

private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean simpleMode) {
private static void handleNode(
Deque<Map<String, Object>> stack, Node node, boolean simpleMode, TerminationGuard terminationGuard) {
terminationGuard.check();

// Handle document node
if (node.getNodeType() == Node.DOCUMENT_NODE) {
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
if (children.item(i).getLocalName() != null) {
handleNode(stack, children.item(i), simpleMode);
handleNode(stack, children.item(i), simpleMode, terminationGuard);
return;
}
}
Expand All @@ -248,7 +259,7 @@ private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean sim

// This is to deal with text between xml tags for example new line characters
if (child.getNodeType() != Node.TEXT_NODE && child.getNodeType() != Node.CDATA_SECTION_NODE) {
handleNode(stack, child, simpleMode);
handleNode(stack, child, simpleMode, terminationGuard);
count++;
} else {
// Deal with text nodes
Expand Down Expand Up @@ -290,7 +301,7 @@ private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean sim
* @param node
* @param elementMap
*/
private void handleTypeAndAttributes(Node node, Map<String, Object> elementMap) {
private static void handleTypeAndAttributes(Node node, Map<String, Object> elementMap) {
// Set type
if (node.getLocalName() != null) {
elementMap.put("_type", node.getLocalName());
Expand All @@ -312,7 +323,7 @@ private void handleTypeAndAttributes(Node node, Map<String, Object> elementMap)
* @param node
* @param elementMap
*/
private void handleTextNode(Node node, Map<String, Object> elementMap) {
private static void handleTextNode(Node node, Map<String, Object> elementMap) {
Object text = "";
int nodeType = node.getNodeType();
switch (nodeType) {
Expand Down Expand Up @@ -344,7 +355,7 @@ private void handleTextNode(Node node, Map<String, Object> elementMap) {
* @param text
* @return
*/
private String normalizeText(String text) {
private static String normalizeText(String text) {
String[] tokens = StringUtils.split(text, "\n");
for (int i = 0; i < tokens.length; i++) {
tokens[i] = tokens[i].trim();
Expand Down Expand Up @@ -682,7 +693,7 @@ private void setPropertyIfNotNull(org.neo4j.graphdb.Node root, String propertyKe
}
}

private RuntimeException generateXmlDoctypeException() {
private static RuntimeException generateXmlDoctypeException() {
throw new RuntimeException("XML documents with a DOCTYPE are not allowed.");
}
}
Loading

0 comments on commit 0a5127f

Please sign in to comment.