Skip to content

Commit

Permalink
Fixes #2659: Apoc.import.graphml doesn't work for edges (#2853)
Browse files Browse the repository at this point in the history
  • Loading branch information
vga91 authored May 26, 2022
1 parent 7c6f970 commit 3a17e32
Show file tree
Hide file tree
Showing 10 changed files with 417 additions and 7 deletions.
2 changes: 2 additions & 0 deletions core/src/main/java/apoc/export/graphml/ExportGraphML.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ public Stream<ProgressInfo> file(@Name("urlOrBinaryFile") Object urlOrBinaryFile
XmlGraphMLReader graphMLReader = new XmlGraphMLReader(db, tx).reporter(reporter)
.batchSize(exportConfig.getBatchSize())
.relType(exportConfig.defaultRelationshipType())
.source(exportConfig.getSource())
.target(exportConfig.getTarget())
.nodeLabels(exportConfig.readLabels());

if (exportConfig.storeNodeIds()) graphMLReader.storeNodeIds();
Expand Down
47 changes: 43 additions & 4 deletions core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package apoc.export.graphml;

import apoc.export.util.BatchTransaction;
import apoc.export.util.ExportConfig;
import apoc.export.util.Reporter;
import apoc.util.JsonUtil;
import org.apache.commons.lang3.StringUtils;
Expand All @@ -19,6 +20,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;

/**
Expand All @@ -31,6 +33,8 @@ public class XmlGraphMLReader {
private final Transaction tx;
private boolean storeNodeIds;
private RelationshipType defaultRelType = RelationshipType.withName("UNKNOWN");
private ExportConfig.NodeConfig source;
private ExportConfig.NodeConfig target;
private int batchSize = 40000;
private Reporter reporter;
private boolean labels;
Expand All @@ -55,12 +59,28 @@ public XmlGraphMLReader nodeLabels(boolean readLabels) {
return this;
}

public XmlGraphMLReader source(ExportConfig.NodeConfig sourceConfig) {
this.source = sourceConfig;
return this;
}

public XmlGraphMLReader target(ExportConfig.NodeConfig targetConfig) {
this.target = targetConfig;
return this;
}

public XmlGraphMLReader reporter(Reporter reporter) {
this.reporter = reporter;
return this;
}

public ExportConfig.NodeConfig getSource() {
return source;
}

public ExportConfig.NodeConfig getTarget() {
return target;
}

enum Type {
BOOLEAN() {
Expand Down Expand Up @@ -255,11 +275,9 @@ public long parseXML(Reader input) throws XMLStreamException {
}
if (name.equals("edge")) {
tx.increment();
String source = getAttribute(element, SOURCE);
String target = getAttribute(element, TARGET);
String label = getAttribute(element, LABEL);
Node from = tx.getTransaction().getNodeById(cache.get(source));
Node to = tx.getTransaction().getNodeById(cache.get(target));
Node from = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.SOURCE);
Node to = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.TARGET);

RelationshipType relationshipType = label == null ? getRelationshipType(reader) : RelationshipType.withName(label);
Relationship relationship = from.createRelationshipTo(to, relationshipType);
Expand All @@ -274,6 +292,27 @@ public long parseXML(Reader input) throws XMLStreamException {
return count;
}

private Node getByNodeId(Map<String, Long> cache, Transaction tx, StartElement element, XmlNodeExport.NodeType nodeType) {
final XmlNodeExport.ExportNode xmlNodeInterface = nodeType.get();
final ExportConfig.NodeConfig nodeConfig = xmlNodeInterface.getNodeConfigReader(this);

final String sourceTargetValue = getAttribute(element, QName.valueOf(nodeType.getName()));

final Long id = cache.get(sourceTargetValue);
// without source/target config, we look for the internal id
if (StringUtils.isBlank(nodeConfig.label)) {
return tx.getNodeById(id);
}
// with source/target configured, we search a node with a specified label
// and with a type specified in sourceType, if present, or string by default
final String attribute = getAttribute(element, QName.valueOf(nodeType.getNameType()));
final Object value = attribute == null
? sourceTargetValue
: Type.forType(attribute).parse(sourceTargetValue);

return tx.findNode(Label.label(nodeConfig.label), Optional.ofNullable(nodeConfig.id).orElse("id"), value);
}

private RelationshipType getRelationshipType(XMLEventReader reader) throws XMLStreamException {
if (this.labels) {
XMLEvent peek = reader.peek();
Expand Down
30 changes: 28 additions & 2 deletions core/src/main/java/apoc/export/graphml/XmlGraphMLWriter.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package apoc.export.graphml;

import apoc.export.util.*;
import org.apache.commons.lang3.StringUtils;
import org.neo4j.cypher.export.SubGraph;
import org.neo4j.graphdb.Entity;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.NotFoundException;
import org.neo4j.graphdb.Relationship;

import javax.xml.stream.XMLOutputFactory;
Expand Down Expand Up @@ -131,8 +134,8 @@ private void writeLabelsAsData(XMLStreamWriter writer, Node node, ExportConfig c
private int writeRelationship(XMLStreamWriter writer, Relationship rel, ExportConfig config) throws XMLStreamException {
writer.writeStartElement("edge");
writer.writeAttribute("id", id(rel));
writer.writeAttribute("source", id(rel.getStartNode()));
writer.writeAttribute("target", id(rel.getEndNode()));
getNodeAttribute(writer, XmlNodeExport.NodeType.SOURCE, config, rel);
getNodeAttribute(writer, XmlNodeExport.NodeType.TARGET, config, rel);
if (config.getFormat() == ExportFormat.TINKERPOP) {
writeData(writer, "labelE", rel.getType().name());
} else {
Expand All @@ -147,6 +150,29 @@ private int writeRelationship(XMLStreamWriter writer, Relationship rel, ExportCo
return props;
}

private void getNodeAttribute(XMLStreamWriter writer, XmlNodeExport.NodeType nodeType, ExportConfig config, Relationship rel) throws XMLStreamException {

final XmlNodeExport.ExportNode xmlNodeInterface = nodeType.get();
final Node node = xmlNodeInterface.getNode(rel);
final String name = nodeType.getName();
final ExportConfig.NodeConfig nodeConfig = xmlNodeInterface.getNodeConfig(config);
// without config the source/target configs, we leverage the internal node id
if (StringUtils.isBlank(nodeConfig.id)) {
writer.writeAttribute(name, id(node));
return;
}
// with source/target with an id configured
// we put a source with the property value and a sourceType with the prop type of node
try {
final Object nodeProperty = node.getProperty(nodeConfig.id);
writer.writeAttribute(name, nodeProperty.toString());
writer.writeAttribute(nodeType.getNameType(), MetaInformation.typeFor(nodeProperty.getClass(), MetaInformation.GRAPHML_ALLOWED));
} catch (NotFoundException e) {
throw new RuntimeException(
"The config source and/or target cannot be used because the node with id " + node.getId() + " doesn't have property " + nodeConfig.id);
}
}

private String id(Relationship rel) {
return "e" + rel.getId();
}
Expand Down
72 changes: 72 additions & 0 deletions core/src/main/java/apoc/export/graphml/XmlNodeExport.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package apoc.export.graphml;

import apoc.export.util.ExportConfig;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;

import static apoc.export.util.ExportConfig.NodeConfig;

public class XmlNodeExport {

public interface ExportNode {
NodeConfig getNodeConfig(ExportConfig config);
NodeConfig getNodeConfigReader(XmlGraphMLReader reader);
Node getNode(Relationship rel);
}

enum NodeType {
SOURCE("source", new ExportNode() {
@Override
public ExportConfig.NodeConfig getNodeConfig(ExportConfig config) {
return config.getSource();
}

@Override
public Node getNode(Relationship rel) {
return rel.getStartNode();
}

@Override
public NodeConfig getNodeConfigReader(XmlGraphMLReader reader) {
return reader.getSource();
}
}),

TARGET("target", new ExportNode() {
@Override
public ExportConfig.NodeConfig getNodeConfig(ExportConfig config) {
return config.getTarget();
}

@Override
public Node getNode(Relationship rel) {
return rel.getEndNode();
}

@Override
public NodeConfig getNodeConfigReader(XmlGraphMLReader reader) {
return reader.getTarget();
}
});

private final String name;
private final ExportNode exportNode;

NodeType(String name, ExportNode exportNode) {
this.name = name;
this.exportNode = exportNode;
}

public String getName() {
return name;
}

public String getNameType() {
return name + "Type";
}

ExportNode get() {
return exportNode;
}
}
}
24 changes: 24 additions & 0 deletions core/src/main/java/apoc/export/util/ExportConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@
* @since 19.01.14
*/
public class ExportConfig extends CompressionConfig {

public static class NodeConfig {
public String label;
public String id;

public NodeConfig(Map<String, String> config) {
config = config == null ? Collections.emptyMap() : config;
this.label = config.get("label");
this.id = config.get("id");
}
}

public static final char QUOTECHAR = '"';
public static final String NONE_QUOTES = "none";
public static final String ALWAYS_QUOTES = "always";
Expand All @@ -26,6 +38,8 @@ public class ExportConfig extends CompressionConfig {
public static final String DEFAULT_QUOTES = ALWAYS_QUOTES;
private final boolean streamStatements;
private final boolean ifNotExists;
private final NodeConfig source;
private final NodeConfig target;

private int batchSize;
private boolean silent;
Expand Down Expand Up @@ -113,6 +127,8 @@ public ExportConfig(Map<String,Object> config) {
this.samplingConfig = (Map<String, Object>) config.getOrDefault("samplingConfig", new HashMap<>());
this.unwindBatchSize = ((Number)getOptimizations().getOrDefault("unwindBatchSize", DEFAULT_UNWIND_BATCH_SIZE)).intValue();
this.awaitForIndexes = ((Number)config.getOrDefault("awaitForIndexes", 300)).longValue();
this.source = new NodeConfig((Map<String, String>) config.get("source"));
this.target = new NodeConfig((Map<String, String>) config.get("target"));
validate();
}

Expand Down Expand Up @@ -154,6 +170,14 @@ public String defaultRelationshipType() {
return config.getOrDefault("defaultRelationshipType","RELATED").toString();
}

public NodeConfig getSource() {
return source;
}

public NodeConfig getTarget() {
return target;
}

public boolean readLabels() {
return toBoolean(config.getOrDefault("readLabels",false));
}
Expand Down
85 changes: 85 additions & 0 deletions core/src/test/java/apoc/export/graphml/ExportGraphMLTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,91 @@ public void testImportGraphML() throws Exception {

TestUtil.testCall(db, "MATCH (c:Bar {age: 12, values: [1,2,3]}) RETURN COUNT(c) AS c", null, (r) -> assertEquals(1L, r.get("c")));
}

@Test
public void testRoundTripWithSeparatedImport() {
Map<String, Object> exportConfig = map("useTypes", true);

Map<String, Object> importConfig = map("readLabels", true, "storeNodeIds", true,
"source", map("label", "Foo"),
"target", map("label", "Bar"));

// we didn't specified a source/target in export config
// so we have to store the nodeIds and looking for them during relationship import
separatedFileCommons(exportConfig, importConfig);
}

@Test
public void testImportSeparatedFilesWithCustomId() {
Map<String, Object> exportConfig = map("useTypes", true,
"source", map("id", "name"),
"target", map("id", "age"));

Map<String, Object> importConfig = map("readLabels", true,
"source", map("label", "Foo", "id", "name"),
"target", map("label", "Bar", "id", "age"));

// we specified a source/target in export config
// so storeNodeIds config is unnecessary and we search nodes by properties Foo.name and Bar.age
separatedFileCommons(exportConfig, importConfig);
}

private void separatedFileCommons(Map<String, Object> exportConfig, Map<String, Object> importConfig) {
db.executeTransactionally("CREATE (:Foo {name: 'zzz'})-[:KNOWS]->(:Bar {age: 0}), (:Foo {name: 'aaa'})-[:KNOWS {id: 1}]->(:Bar {age: 666})");

// we export 3 files: 1 for source nodes, 1 for end nodes, 1 for relationships
String outputNodesFoo = new File(directory, "queryNodesFoo.graphml").getAbsolutePath();
String outputNodesBar = new File(directory, "queryNodesBar.graphml").getAbsolutePath();
String outputRelationships = new File(directory, "queryRelationship.graphml").getAbsolutePath();

TestUtil.testCall(db, "CALL apoc.export.graphml.query('MATCH (start:Foo)-[:KNOWS]->(:Bar) RETURN start',$file, $config)",
map("file", outputNodesFoo, "config", exportConfig),
(r) -> assertEquals(3L, r.get("nodes")));

TestUtil.testCall(db, "CALL apoc.export.graphml.query('MATCH (:Foo)-[:KNOWS]->(end:Bar) RETURN end', $file, $config) ",
map("file", outputNodesBar, "config", exportConfig),
(r) -> assertEquals(3L, r.get("nodes")));

TestUtil.testCall(db, "MATCH (:Foo)-[rel:KNOWS]->(:Bar) WITH collect(rel) as rels \n" +
"call apoc.export.graphml.data([], rels, $file, $config) " +
"YIELD nodes, relationships RETURN nodes, relationships",
map("file", outputRelationships, "config", exportConfig),
(r) -> assertEquals(3L, r.get("relationships")));

// delete current entities and re-import
db.executeTransactionally("MATCH (n) DETACH DELETE n");

TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)",
map("file", outputNodesFoo, "config", importConfig),
(r) -> assertEquals(3L, r.get("nodes")));

TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)",
map("file", outputNodesBar, "config", importConfig),
(r) -> assertEquals(3L, r.get("nodes")));

TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)",
map("file", outputRelationships, "config", importConfig),
(r) -> assertEquals(3L, r.get("relationships")));

TestUtil.testResult(db, "MATCH (start:Foo)-[rel:KNOWS]->(end:Bar) \n" +
"RETURN start.name AS startName, rel.id AS relId, end.age AS endAge \n" +
"ORDER BY start.name",
(r) -> {
Map<String, Object> row = r.next();
assertions(row, "aaa", 1L, 666L);
row = r.next();
assertions(row, "foo", null, 42L);
row = r.next();
assertions(row, "zzz", null, 0L);
assertFalse(r.hasNext());
});
}

private void assertions(Map<String, Object> row, String expectedSource, Long expectedRel, Long expectedTarget) {
assertEquals(expectedSource, row.get("startName"));
assertEquals(expectedRel, row.get("relId"));
assertEquals(expectedTarget, row.get("endAge"));
}

@Test
public void testImportGraphMLLargeFile() {
Expand Down
7 changes: 7 additions & 0 deletions docs/asciidoc/modules/ROOT/pages/export/graphml.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ The procedures support the following config parameters:
| defaultRelationshipType | "RELATED" | set relationship type (import/export graphml)
| separateFiles | false | export results in separated file by type (nodes, relationships..)
| stream | false | stream the xml directly to the client into the `data` field
| useTypes | false | Write the attribute type information to the graphml output
| source | Map<String,String> | Empty map | To be used together with `target` to import (via `apoc.import.graphml`) a relationships-only file. In this case the source and target attributes of `edge` tag are not based on an internal id of nodes but on a custom property value. +
For example, with a path like `(:Foo {name: "aaa"})-[:KNOWS]->(:Bar {age: 666})`, we can export the `KNOWS` rel with a config `<edge id="e2" source="aaa" sourceType="string" target="666" targetType="long" label="KNOWS"><data key="label">KNOWS</data><data key="id">1</data></edge>`. Note the additional `sourceType`/`targetType` to detect the right type during the import.
| target | Map<String,String> | Empty map | Same as `source`, for end node.
|===

[[export-graphml-file-export]]
Expand Down Expand Up @@ -93,6 +97,9 @@ The Neo4j Browser visualization below shows the imported graph:

image::play-movies.png[title="Movies Graph Visualization"]

[[roundtip-separated-files]]
include::partial$roundtripSeparatedGraphml.adoc[]

[[export-graphml-whole-database]]
=== Export whole database to GraphML

Expand Down
Loading

0 comments on commit 3a17e32

Please sign in to comment.