diff --git a/core/src/main/java/apoc/export/graphml/ExportGraphML.java b/core/src/main/java/apoc/export/graphml/ExportGraphML.java index 002f029568..e1f1be3617 100644 --- a/core/src/main/java/apoc/export/graphml/ExportGraphML.java +++ b/core/src/main/java/apoc/export/graphml/ExportGraphML.java @@ -69,6 +69,8 @@ public Stream file(@Name("urlOrBinaryFile") Object urlOrBinaryFile XmlGraphMLReader graphMLReader = new XmlGraphMLReader(db, tx).reporter(reporter) .batchSize(exportConfig.getBatchSize()) .relType(exportConfig.defaultRelationshipType()) + .source(exportConfig.getSource()) + .target(exportConfig.getTarget()) .nodeLabels(exportConfig.readLabels()); if (exportConfig.storeNodeIds()) graphMLReader.storeNodeIds(); diff --git a/core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java b/core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java index 1ba854d264..57b40b4b9f 100644 --- a/core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java +++ b/core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java @@ -1,6 +1,7 @@ package apoc.export.graphml; import apoc.export.util.BatchTransaction; +import apoc.export.util.ExportConfig; import apoc.export.util.Reporter; import apoc.util.JsonUtil; import org.apache.commons.lang3.StringUtils; @@ -19,6 +20,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Function; /** @@ -31,6 +33,8 @@ public class XmlGraphMLReader { private final Transaction tx; private boolean storeNodeIds; private RelationshipType defaultRelType = RelationshipType.withName("UNKNOWN"); + private ExportConfig.NodeConfig source; + private ExportConfig.NodeConfig target; private int batchSize = 40000; private Reporter reporter; private boolean labels; @@ -55,12 +59,28 @@ public XmlGraphMLReader nodeLabels(boolean readLabels) { return this; } + public XmlGraphMLReader source(ExportConfig.NodeConfig sourceConfig) { + this.source = sourceConfig; + return this; + } + + public XmlGraphMLReader target(ExportConfig.NodeConfig targetConfig) { + this.target = targetConfig; + return this; + } + public XmlGraphMLReader reporter(Reporter reporter) { this.reporter = reporter; return this; } + public ExportConfig.NodeConfig getSource() { + return source; + } + public ExportConfig.NodeConfig getTarget() { + return target; + } enum Type { BOOLEAN() { @@ -255,11 +275,9 @@ public long parseXML(Reader input) throws XMLStreamException { } if (name.equals("edge")) { tx.increment(); - String source = getAttribute(element, SOURCE); - String target = getAttribute(element, TARGET); String label = getAttribute(element, LABEL); - Node from = tx.getTransaction().getNodeById(cache.get(source)); - Node to = tx.getTransaction().getNodeById(cache.get(target)); + Node from = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.SOURCE); + Node to = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.TARGET); RelationshipType relationshipType = label == null ? getRelationshipType(reader) : RelationshipType.withName(label); Relationship relationship = from.createRelationshipTo(to, relationshipType); @@ -274,6 +292,27 @@ public long parseXML(Reader input) throws XMLStreamException { return count; } + private Node getByNodeId(Map cache, Transaction tx, StartElement element, XmlNodeExport.NodeType nodeType) { + final XmlNodeExport.ExportNode xmlNodeInterface = nodeType.get(); + final ExportConfig.NodeConfig nodeConfig = xmlNodeInterface.getNodeConfigReader(this); + + final String sourceTargetValue = getAttribute(element, QName.valueOf(nodeType.getName())); + + final Long id = cache.get(sourceTargetValue); + // without source/target config, we look for the internal id + if (StringUtils.isBlank(nodeConfig.label)) { + return tx.getNodeById(id); + } + // with source/target configured, we search a node with a specified label + // and with a type specified in sourceType, if present, or string by default + final String attribute = getAttribute(element, QName.valueOf(nodeType.getNameType())); + final Object value = attribute == null + ? sourceTargetValue + : Type.forType(attribute).parse(sourceTargetValue); + + return tx.findNode(Label.label(nodeConfig.label), Optional.ofNullable(nodeConfig.id).orElse("id"), value); + } + private RelationshipType getRelationshipType(XMLEventReader reader) throws XMLStreamException { if (this.labels) { XMLEvent peek = reader.peek(); diff --git a/core/src/main/java/apoc/export/graphml/XmlGraphMLWriter.java b/core/src/main/java/apoc/export/graphml/XmlGraphMLWriter.java index d3c41b35ea..0eaeb63df9 100644 --- a/core/src/main/java/apoc/export/graphml/XmlGraphMLWriter.java +++ b/core/src/main/java/apoc/export/graphml/XmlGraphMLWriter.java @@ -1,9 +1,12 @@ package apoc.export.graphml; import apoc.export.util.*; +import org.apache.commons.lang3.StringUtils; import org.neo4j.cypher.export.SubGraph; import org.neo4j.graphdb.Entity; +import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.NotFoundException; import org.neo4j.graphdb.Relationship; import javax.xml.stream.XMLOutputFactory; @@ -131,8 +134,8 @@ private void writeLabelsAsData(XMLStreamWriter writer, Node node, ExportConfig c private int writeRelationship(XMLStreamWriter writer, Relationship rel, ExportConfig config) throws XMLStreamException { writer.writeStartElement("edge"); writer.writeAttribute("id", id(rel)); - writer.writeAttribute("source", id(rel.getStartNode())); - writer.writeAttribute("target", id(rel.getEndNode())); + getNodeAttribute(writer, XmlNodeExport.NodeType.SOURCE, config, rel); + getNodeAttribute(writer, XmlNodeExport.NodeType.TARGET, config, rel); if (config.getFormat() == ExportFormat.TINKERPOP) { writeData(writer, "labelE", rel.getType().name()); } else { @@ -147,6 +150,29 @@ private int writeRelationship(XMLStreamWriter writer, Relationship rel, ExportCo return props; } + private void getNodeAttribute(XMLStreamWriter writer, XmlNodeExport.NodeType nodeType, ExportConfig config, Relationship rel) throws XMLStreamException { + + final XmlNodeExport.ExportNode xmlNodeInterface = nodeType.get(); + final Node node = xmlNodeInterface.getNode(rel); + final String name = nodeType.getName(); + final ExportConfig.NodeConfig nodeConfig = xmlNodeInterface.getNodeConfig(config); + // without config the source/target configs, we leverage the internal node id + if (StringUtils.isBlank(nodeConfig.id)) { + writer.writeAttribute(name, id(node)); + return; + } + // with source/target with an id configured + // we put a source with the property value and a sourceType with the prop type of node + try { + final Object nodeProperty = node.getProperty(nodeConfig.id); + writer.writeAttribute(name, nodeProperty.toString()); + writer.writeAttribute(nodeType.getNameType(), MetaInformation.typeFor(nodeProperty.getClass(), MetaInformation.GRAPHML_ALLOWED)); + } catch (NotFoundException e) { + throw new RuntimeException( + "The config source and/or target cannot be used because the node with id " + node.getId() + " doesn't have property " + nodeConfig.id); + } + } + private String id(Relationship rel) { return "e" + rel.getId(); } diff --git a/core/src/main/java/apoc/export/graphml/XmlNodeExport.java b/core/src/main/java/apoc/export/graphml/XmlNodeExport.java new file mode 100644 index 0000000000..7dd56157dd --- /dev/null +++ b/core/src/main/java/apoc/export/graphml/XmlNodeExport.java @@ -0,0 +1,72 @@ +package apoc.export.graphml; + +import apoc.export.util.ExportConfig; +import org.neo4j.graphdb.Node; +import org.neo4j.graphdb.Relationship; + +import static apoc.export.util.ExportConfig.NodeConfig; + +public class XmlNodeExport { + + public interface ExportNode { + NodeConfig getNodeConfig(ExportConfig config); + NodeConfig getNodeConfigReader(XmlGraphMLReader reader); + Node getNode(Relationship rel); + } + + enum NodeType { + SOURCE("source", new ExportNode() { + @Override + public ExportConfig.NodeConfig getNodeConfig(ExportConfig config) { + return config.getSource(); + } + + @Override + public Node getNode(Relationship rel) { + return rel.getStartNode(); + } + + @Override + public NodeConfig getNodeConfigReader(XmlGraphMLReader reader) { + return reader.getSource(); + } + }), + + TARGET("target", new ExportNode() { + @Override + public ExportConfig.NodeConfig getNodeConfig(ExportConfig config) { + return config.getTarget(); + } + + @Override + public Node getNode(Relationship rel) { + return rel.getEndNode(); + } + + @Override + public NodeConfig getNodeConfigReader(XmlGraphMLReader reader) { + return reader.getTarget(); + } + }); + + private final String name; + private final ExportNode exportNode; + + NodeType(String name, ExportNode exportNode) { + this.name = name; + this.exportNode = exportNode; + } + + public String getName() { + return name; + } + + public String getNameType() { + return name + "Type"; + } + + ExportNode get() { + return exportNode; + } + } +} diff --git a/core/src/main/java/apoc/export/util/ExportConfig.java b/core/src/main/java/apoc/export/util/ExportConfig.java index 415a222d2e..1c7298af25 100644 --- a/core/src/main/java/apoc/export/util/ExportConfig.java +++ b/core/src/main/java/apoc/export/util/ExportConfig.java @@ -14,6 +14,18 @@ * @since 19.01.14 */ public class ExportConfig extends CompressionConfig { + + public static class NodeConfig { + public String label; + public String id; + + public NodeConfig(Map config) { + config = config == null ? Collections.emptyMap() : config; + this.label = config.get("label"); + this.id = config.get("id"); + } + } + public static final char QUOTECHAR = '"'; public static final String NONE_QUOTES = "none"; public static final String ALWAYS_QUOTES = "always"; @@ -26,6 +38,8 @@ public class ExportConfig extends CompressionConfig { public static final String DEFAULT_QUOTES = ALWAYS_QUOTES; private final boolean streamStatements; private final boolean ifNotExists; + private final NodeConfig source; + private final NodeConfig target; private int batchSize; private boolean silent; @@ -113,6 +127,8 @@ public ExportConfig(Map config) { this.samplingConfig = (Map) config.getOrDefault("samplingConfig", new HashMap<>()); this.unwindBatchSize = ((Number)getOptimizations().getOrDefault("unwindBatchSize", DEFAULT_UNWIND_BATCH_SIZE)).intValue(); this.awaitForIndexes = ((Number)config.getOrDefault("awaitForIndexes", 300)).longValue(); + this.source = new NodeConfig((Map) config.get("source")); + this.target = new NodeConfig((Map) config.get("target")); validate(); } @@ -154,6 +170,14 @@ public String defaultRelationshipType() { return config.getOrDefault("defaultRelationshipType","RELATED").toString(); } + public NodeConfig getSource() { + return source; + } + + public NodeConfig getTarget() { + return target; + } + public boolean readLabels() { return toBoolean(config.getOrDefault("readLabels",false)); } diff --git a/core/src/test/java/apoc/export/graphml/ExportGraphMLTest.java b/core/src/test/java/apoc/export/graphml/ExportGraphMLTest.java index 26e2579494..c4fd8d062c 100644 --- a/core/src/test/java/apoc/export/graphml/ExportGraphMLTest.java +++ b/core/src/test/java/apoc/export/graphml/ExportGraphMLTest.java @@ -218,6 +218,91 @@ public void testImportGraphML() throws Exception { TestUtil.testCall(db, "MATCH (c:Bar {age: 12, values: [1,2,3]}) RETURN COUNT(c) AS c", null, (r) -> assertEquals(1L, r.get("c"))); } + + @Test + public void testRoundTripWithSeparatedImport() { + Map exportConfig = map("useTypes", true); + + Map importConfig = map("readLabels", true, "storeNodeIds", true, + "source", map("label", "Foo"), + "target", map("label", "Bar")); + + // we didn't specified a source/target in export config + // so we have to store the nodeIds and looking for them during relationship import + separatedFileCommons(exportConfig, importConfig); + } + + @Test + public void testImportSeparatedFilesWithCustomId() { + Map exportConfig = map("useTypes", true, + "source", map("id", "name"), + "target", map("id", "age")); + + Map importConfig = map("readLabels", true, + "source", map("label", "Foo", "id", "name"), + "target", map("label", "Bar", "id", "age")); + + // we specified a source/target in export config + // so storeNodeIds config is unnecessary and we search nodes by properties Foo.name and Bar.age + separatedFileCommons(exportConfig, importConfig); + } + + private void separatedFileCommons(Map exportConfig, Map importConfig) { + db.executeTransactionally("CREATE (:Foo {name: 'zzz'})-[:KNOWS]->(:Bar {age: 0}), (:Foo {name: 'aaa'})-[:KNOWS {id: 1}]->(:Bar {age: 666})"); + + // we export 3 files: 1 for source nodes, 1 for end nodes, 1 for relationships + String outputNodesFoo = new File(directory, "queryNodesFoo.graphml").getAbsolutePath(); + String outputNodesBar = new File(directory, "queryNodesBar.graphml").getAbsolutePath(); + String outputRelationships = new File(directory, "queryRelationship.graphml").getAbsolutePath(); + + TestUtil.testCall(db, "CALL apoc.export.graphml.query('MATCH (start:Foo)-[:KNOWS]->(:Bar) RETURN start',$file, $config)", + map("file", outputNodesFoo, "config", exportConfig), + (r) -> assertEquals(3L, r.get("nodes"))); + + TestUtil.testCall(db, "CALL apoc.export.graphml.query('MATCH (:Foo)-[:KNOWS]->(end:Bar) RETURN end', $file, $config) ", + map("file", outputNodesBar, "config", exportConfig), + (r) -> assertEquals(3L, r.get("nodes"))); + + TestUtil.testCall(db, "MATCH (:Foo)-[rel:KNOWS]->(:Bar) WITH collect(rel) as rels \n" + + "call apoc.export.graphml.data([], rels, $file, $config) " + + "YIELD nodes, relationships RETURN nodes, relationships", + map("file", outputRelationships, "config", exportConfig), + (r) -> assertEquals(3L, r.get("relationships"))); + + // delete current entities and re-import + db.executeTransactionally("MATCH (n) DETACH DELETE n"); + + TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)", + map("file", outputNodesFoo, "config", importConfig), + (r) -> assertEquals(3L, r.get("nodes"))); + + TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)", + map("file", outputNodesBar, "config", importConfig), + (r) -> assertEquals(3L, r.get("nodes"))); + + TestUtil.testCall(db, "CALL apoc.import.graphml($file, $config)", + map("file", outputRelationships, "config", importConfig), + (r) -> assertEquals(3L, r.get("relationships"))); + + TestUtil.testResult(db, "MATCH (start:Foo)-[rel:KNOWS]->(end:Bar) \n" + + "RETURN start.name AS startName, rel.id AS relId, end.age AS endAge \n" + + "ORDER BY start.name", + (r) -> { + Map row = r.next(); + assertions(row, "aaa", 1L, 666L); + row = r.next(); + assertions(row, "foo", null, 42L); + row = r.next(); + assertions(row, "zzz", null, 0L); + assertFalse(r.hasNext()); + }); + } + + private void assertions(Map row, String expectedSource, Long expectedRel, Long expectedTarget) { + assertEquals(expectedSource, row.get("startName")); + assertEquals(expectedRel, row.get("relId")); + assertEquals(expectedTarget, row.get("endAge")); + } @Test public void testImportGraphMLLargeFile() { diff --git a/docs/asciidoc/modules/ROOT/pages/export/graphml.adoc b/docs/asciidoc/modules/ROOT/pages/export/graphml.adoc index 12dd34614b..36daeb88f7 100644 --- a/docs/asciidoc/modules/ROOT/pages/export/graphml.adoc +++ b/docs/asciidoc/modules/ROOT/pages/export/graphml.adoc @@ -64,6 +64,10 @@ The procedures support the following config parameters: | defaultRelationshipType | "RELATED" | set relationship type (import/export graphml) | separateFiles | false | export results in separated file by type (nodes, relationships..) | stream | false | stream the xml directly to the client into the `data` field +| useTypes | false | Write the attribute type information to the graphml output +| source | Map | Empty map | To be used together with `target` to import (via `apoc.import.graphml`) a relationships-only file. In this case the source and target attributes of `edge` tag are not based on an internal id of nodes but on a custom property value. + +For example, with a path like `(:Foo {name: "aaa"})-[:KNOWS]->(:Bar {age: 666})`, we can export the `KNOWS` rel with a config `KNOWS1`. Note the additional `sourceType`/`targetType` to detect the right type during the import. +| target | Map | Empty map | Same as `source`, for end node. |=== [[export-graphml-file-export]] @@ -93,6 +97,9 @@ The Neo4j Browser visualization below shows the imported graph: image::play-movies.png[title="Movies Graph Visualization"] +[[roundtip-separated-files]] +include::partial$roundtripSeparatedGraphml.adoc[] + [[export-graphml-whole-database]] === Export whole database to GraphML diff --git a/docs/asciidoc/modules/ROOT/partials/roundtripSeparatedGraphml.adoc b/docs/asciidoc/modules/ROOT/partials/roundtripSeparatedGraphml.adoc new file mode 100644 index 0000000000..bc6eb0f1b6 --- /dev/null +++ b/docs/asciidoc/modules/ROOT/partials/roundtripSeparatedGraphml.adoc @@ -0,0 +1,138 @@ +== Round trip separated GraphML files + +With this dataset: + +[source,cypher] +---- +CREATE (f:Foo:Foo2:Foo0 {name:'foo', born:Date('2018-10-10'), place:point({ longitude: 56.7, latitude: 12.78, height: 100 })})-[:KNOWS]->(b:Bar {name:'bar',age:42, place:point({ longitude: 56.7, latitude: 12.78})}); +CREATE (:Foo {name: 'zzz'})-[:KNOWS]->(:Bar {age: 0}); +CREATE (:Foo {name: 'aaa'})-[:KNOWS {id: 1}]->(:Bar {age: 666}); +---- + +we can execute these 3 export queries: + +[source,cypher] +---- +// Foo nodes +call apoc.export.graphml.query('MATCH (start:Foo)-[:KNOWS]->(:Bar) RETURN start', 'queryNodesFoo.graphml', {useTypes: true}); + +// Bar nodes +call apoc.export.graphml.query('MATCH (:Foo)-[:KNOWS]->(end:Bar) RETURN end', 'queryNodesBar.graphml', {useTypes: true}); + +// KNOWS rels +MATCH (:Foo)-[rel:KNOWS]->(:Bar) +WITH collect(rel) as rels +call apoc.export.graphml.data([], rels, 'queryRelationship.graphml', {useTypes: true}) +YIELD nodes, relationships RETURN nodes, relationships; +---- + + +In this case we will have these 3 files: +.queryNodesFoo.graphml +[source,xml] +---- + + + + + + + +:Foo:Foo0:Foo22018-10-10foo{"crs":"wgs-84-3d","latitude":12.78,"longitude":56.7,"height":100.0} +:Foozzz +:Fooaaa + + +---- + +.queryNodesBar.graphml +[source,xml] +---- + + + + + + + +:Barbar42{"crs":"wgs-84","latitude":12.78,"longitude":56.7,"height":null} +:Bar0 +:Bar666 + + +---- + +.queryRelationship.graphml +[source,xml] +---- + + + + + +KNOWS +KNOWS +KNOWS1 + + +---- + +So we can import, in another db, in this way, to recreate the original dataset, using these queries: +[source,cypher] +---- +CALL apoc.import.graphml('queryNodesFoo.graphml', {readLabels: true, storeNodeIds: true}); +CALL apoc.import.graphml('queryNodesBar.graphml', {readLabels: true, storeNodeIds: true}); +CALL apoc.import.graphml('queryRelationship.graphml', {readLabels: true, source: {label: 'Foo'}, target: {label: 'Bar'}}); +---- + +Note that we have to execute the import of nodes before, +and we used the `useTypes: true` to import the attribute `id` of `node` tags as a property and `readLabels` to populate nodes with labels. + + +=== With custom property key + +Otherwise, we can leverage a custom property and avoid importing the `id` attribute (via `useTypes:true`) +in this way (same dataset and nodes export query as before): + +[source,cypher] +---- +// KNOWS rels +MATCH (:Foo)-[rel:KNOWS]->(:Bar) +WITH collect(rel) as rels +call apoc.export.graphml.data([], rels, 'queryRelationship.graphml', + {useTypes: true, source: {id: 'name'}, label: {id: 'age'}}) +YIELD nodes, relationships RETURN nodes, relationships; +---- + +[Note] +==== +Is strongly recommended using an unique constraint to ensure uniqueness, +so in this case for label `Foo` and property `name` and for label `Bar` and property `age` +==== + + +The above query generate this rel file: + +.queryRelationship.graphml +[source,xml] +---- + + + + + +KNOWS +KNOWS +KNOWS1 + + +---- + +Finally, we can import the files using the same id (name and age) as above: +[source,cypher] +---- +CALL apoc.import.graphml('queryNodesFoo.graphml', {readLabels: true}); +CALL apoc.import.graphml('queryNodesBar.graphml', {readLabels: true}); +CALL apoc.import.graphml('queryRelationship.graphml', + {readLabels: true, source: {label: 'Foo', id: 'name'}, target: {label: 'Bar', id: 'age'}}); +---- diff --git a/docs/asciidoc/modules/ROOT/partials/usage/apoc.import.graphml.adoc b/docs/asciidoc/modules/ROOT/partials/usage/apoc.import.graphml.adoc index 5f262bf161..295ca7c149 100644 --- a/docs/asciidoc/modules/ROOT/partials/usage/apoc.import.graphml.adoc +++ b/docs/asciidoc/modules/ROOT/partials/usage/apoc.import.graphml.adoc @@ -229,3 +229,6 @@ RETURN source, format, nodes, relationships, properties | source | format | nodes | relationships | properties | "binary" | "graphml" | 2 | 1 | 7 |=== + +[[roundtip-separated-files]] +include::partial$roundtripSeparatedGraphml.adoc[] \ No newline at end of file diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.import.graphml.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.import.graphml.adoc index 08ba184c2e..4e0562ba9d 100644 --- a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.import.graphml.adoc +++ b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.import.graphml.adoc @@ -9,5 +9,19 @@ The procedure support the following config parameters: | storeNodeIds | Boolean | false | store the `id` property of `node` elements | batchSize | Integer | 20000 | The number of elements to process per transaction | compression | `Enum[NONE, BYTES, GZIP, BZIP2, DEFLATE, BLOCK_LZ4, FRAMED_SNAPPY]` | `null` | Allow taking binary data, either not compressed (value: `NONE`) or compressed (other values) +| source | Map | Empty map | See below +| target | Map | Empty map | See below See the xref::overview/apoc.load/apoc.load.csv.adoc#_binary_file[Binary file example] -|=== \ No newline at end of file +|=== + +=== source / target config + +Allows the import of relations in case the source and / or target nodes are not present in the file, searching for nodes via a custom label and property. +To do this, we can insert into the config map `source: {label: '', id: `''`}` and/or `source: {label: '', id: `''`}` +In this way, we can search start and end nodes via the source and end attribute of `edge` tag. + +For example, with a config map `{source: {id: 'myId', label: 'Foo'}, target: {id: 'other', label: 'Bar'}}` +with a edge row like `KNOWS` +we search a source node `(:Foo {myId: 'n0'})` and an end node `(:Bar {other: 'n1'})`. +The id key is optional (the default is `'id'`). +