-
Notifications
You must be signed in to change notification settings - Fork 494
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes #4138: Add support for loading Gephi GEXF file format
- Loading branch information
Showing
12 changed files
with
1,448 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
[[gexf]] | ||
= Load GEXF (Graph Exchange XML Format) | ||
:description: This section describes procedures that can be used to import data from GEXF files. | ||
|
||
|
||
|
||
Many existing applications and data integrations use GEXF to describes a graph with nodes and edges. | ||
For further information, you should visit the https://gexf.net/[official documentation]. | ||
|
||
It is possible to load or import nodes and relationship from a GEXF file with the procedures | ||
`apoc.load.gexf` and `apoc.import.gexf`. You need to: | ||
|
||
* provide a path to a GEXF file | ||
* provide configuration (optional) | ||
|
||
The `apoc.import.gexf` read as the `apoc.load.gexf` but also create nodes and relationships in Neo4j. | ||
|
||
For reading from files you'll have to enable the config option: | ||
|
||
---- | ||
apoc.import.file.enabled=true | ||
---- | ||
|
||
By default file paths are global, for paths relative to the `import` directory set: | ||
|
||
---- | ||
apoc.import.file.use_neo4j_config=true | ||
---- | ||
|
||
== Examples for apoc.load.gexf | ||
|
||
.load.gexf | ||
---- | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<gexf version="1.2"> | ||
<graph defaultedgetype="directed"> | ||
<nodes> | ||
<node foo="bar"> | ||
<attvalues> | ||
<attvalue for="0" value="http://gephi.org"/> | ||
</attvalues> | ||
</node> | ||
</nodes> | ||
</graph> | ||
</gexf> | ||
---- | ||
|
||
[source, cypher] | ||
---- | ||
CALL apoc.load.gexf('load.gexf') | ||
---- | ||
|
||
.Results | ||
[opts="header"] | ||
|=== | ||
| value | ||
| {_type: gexf, _children: [{_type: graph, defaultedgetype: directed, _children: [{_type: nodes, _children: [{_type: node, _children: [{_type: attvalues, _children: [{_type: attvalue, for: 0, value: http://gephi.org}]}], foo: bar}]}]}], version: 1.2} | ||
|=== | ||
|
||
== Examples for apoc.import.gexf | ||
|
||
Besides the file you can pass in a config map: | ||
|
||
.Config parameters | ||
[opts=header] | ||
|=== | ||
| name | type | default | description | ||
| readLabels | Boolean | false | Creates node labels based on the value in the `labels` property of `node` elements | ||
| defaultRelationshipType | String | RELATED | The default relationship type to use if none is specified in the GraphML file | ||
| storeNodeIds | Boolean | false | store the `id` property of `node` elements | ||
| batchSize | Integer | 20000 | The number of elements to process per transaction | ||
| compression | `Enum[NONE, BYTES, GZIP, BZIP2, DEFLATE, BLOCK_LZ4, FRAMED_SNAPPY]` | `null` | Allow taking binary data, either not compressed (value: `NONE`) or compressed (other values) | ||
| source | Map<String,String> | Empty map | See `source / target config` parameter below | ||
| target | Map<String,String> | Empty map | See `source / target config` parameter below | ||
See the xref::overview/apoc.load/apoc.load.csv.adoc#_binary_file[Binary file example] | ||
|=== | ||
|
||
|
||
With the following file will be created: | ||
|
||
* 1 node with label Gephi | ||
* 2 nodes with label Webatlas | ||
* 1 node with label RTGI | ||
* 1 node with label BarabasiLab | ||
* 6 relationships of kind KNOWS | ||
* 1 relationship of kind HAS_TICKET | ||
* 1 relationship of kind BAZ | ||
|
||
.data.gexf | ||
---- | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<gexf xmlns="http://gexf.net/1.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://gexf.net/1.3 http://gexf.net/1.3/gexf.xsd" version="1.2"> | ||
<meta lastmodifieddate="2009-03-20"> | ||
<creator>Gephi.org</creator> | ||
<description>A Web network</description> | ||
</meta> | ||
<graph defaultedgetype="directed"> | ||
<attributes class="node"> | ||
<attribute id="0" title="url" type="string"/> | ||
<attribute id="room" title="room" type="integer"/> | ||
<attribute id="projects" title="projects" type="long"/> | ||
<attribute id="price" title="price" type="double"/> | ||
<attribute id="1" title="indegree" type="float"/> | ||
<attribute id="members" title="members" type="liststring"/> | ||
<attribute id="pins" title="pins" type="listboolean"/> | ||
<attribute id="2" title="frog" type="boolean"> | ||
<default>true</default> | ||
</attribute> | ||
</attributes> | ||
<attributes class="edge"> | ||
<attribute id="score" title="score" type="float"/> | ||
</attributes> | ||
<nodes> | ||
<node id="0" label="Gephi"> | ||
<attvalues> | ||
<attvalue for="0" value="http://gephi.org"/> | ||
<attvalue for="1" value="1"/> | ||
<attvalue for="room" value="10"/> | ||
<attvalue for="price" value="10.02"/> | ||
<attvalue for="projects" value="300"/> | ||
<attvalue for="members" value="[Altomare, Sterpeto, Lino]"/> | ||
<attvalue for="pins" value="[true, false, true, false]"/> | ||
</attvalues> | ||
</node> | ||
<node id="5" label="Gephi"> | ||
<attvalues> | ||
<attvalue for="0" value="http://test.gephi.org"/> | ||
<attvalue for="1" value="2"/> | ||
</attvalues> | ||
</node> | ||
<node id="1" label="Webatlas"> | ||
<attvalues> | ||
<attvalue for="0" value="http://webatlas.fr"/> | ||
<attvalue for="1" value="2"/> | ||
</attvalues> | ||
</node> | ||
<node id="2" label="RTGI"> | ||
<attvalues> | ||
<attvalue for="0" value="http://rtgi.fr"/> | ||
<attvalue for="1" value="1"/> | ||
</attvalues> | ||
</node> | ||
<node id="3" label=":BarabasiLab:Webatlas"> | ||
<attvalues> | ||
<attvalue for="0" value="http://barabasilab.com"/> | ||
<attvalue for="1" value="1"/> | ||
<attvalue for="2" value="false"/> | ||
</attvalues> | ||
</node> | ||
</nodes> | ||
<edges> | ||
<edge source="0" target="1" kind="KNOWS"> | ||
<attvalues> | ||
<attvalue for="score" value="1.5"/> | ||
</attvalues> | ||
</edge> | ||
<edge source="0" target="0" kind="BAZ"> | ||
<attvalues> | ||
<attvalue for="foo" value="bar"/> | ||
<attvalue for="score" value="2"/> | ||
</attvalues> | ||
</edge> | ||
<edge source="0" target="2" kind="HAS_TICKET"> | ||
<attvalues> | ||
<attvalue for="ajeje" value="brazorf"/> | ||
<attvalue for="score" value="3"/> | ||
</attvalues> | ||
</edge> | ||
<edge source="0" target="2" kind="KNOWS" /> | ||
<edge source="1" target="0" kind="KNOWS" /> | ||
<edge source="2" target="1" kind="KNOWS" /> | ||
<edge source="0" target="3" kind="KNOWS" /> | ||
<edge source="5" target="3" kind="KNOWS" /> | ||
</edges> | ||
</graph> | ||
</gexf> | ||
---- | ||
|
||
[source, cypher] | ||
---- | ||
CALL apoc.import.gexf('data.gexf', {readLabels:true}) | ||
---- | ||
|
||
.Results | ||
[opts="header"] | ||
|=== | ||
| value | ||
| { | ||
"relationships" : 8, | ||
"batches" : 0, | ||
"file" : "file:/../data.gexf", | ||
"nodes" : 5, | ||
"format" : "gexf", | ||
"source" : "file", | ||
"time" : 9736, | ||
"rows" : 0, | ||
"batchSize" : -1, | ||
"done" : true, | ||
"properties" : 21 | ||
} | ||
|=== | ||
|
||
We can also store the node IDs by executing: | ||
[source, cypher] | ||
---- | ||
CALL apoc.import.gexf('data.gexf', {readLabels:true, storeNodeIds: true}) | ||
---- | ||
|
||
=== source / target config | ||
|
||
Allows the import of relations in case the source and / or target nodes are not present in the file, searching for nodes via a custom label and property. | ||
To do this, we can insert into the config map `source: {label: '<MY_SOURCE_LABEL>', id: `'<MY_SOURCE_ID>'`}` and/or `source: {label: '<MY_TARGET_LABEL>', id: `'<MY_TARGET_ID>'`}` | ||
In this way, we can search start and end nodes via the source and end attribute of `edge` tag. | ||
|
||
For example, with a config map `{source: {id: 'myId', label: 'Foo'}, target: {id: 'other', label: 'Bar'}}` | ||
with a edge row like `<edge id="e0" source="n0" target="n1" label="KNOWS"><data key="label">KNOWS</data></edge>` | ||
we search a source node `(:Foo {myId: 'n0'})` and an end node `(:Bar {other: 'n1'})`. | ||
The id key is optional (the default is `'id'`). | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package apoc.load; | ||
|
||
import apoc.Extended; | ||
import apoc.Pools; | ||
import apoc.export.util.CountingReader; | ||
import apoc.export.util.ExportConfig; | ||
import apoc.export.util.ProgressReporter; | ||
import apoc.load.util.XmlReadUtil.Import; | ||
import apoc.result.MapResult; | ||
import apoc.result.ProgressInfo; | ||
import apoc.util.FileUtils; | ||
import apoc.util.Util; | ||
import org.neo4j.graphdb.GraphDatabaseService; | ||
import org.neo4j.graphdb.security.URLAccessChecker; | ||
import org.neo4j.procedure.Context; | ||
import org.neo4j.procedure.Description; | ||
import org.neo4j.procedure.Mode; | ||
import org.neo4j.procedure.Name; | ||
import org.neo4j.procedure.Procedure; | ||
import org.neo4j.procedure.TerminationGuard; | ||
|
||
import java.util.Map; | ||
import java.util.stream.Stream; | ||
|
||
import static apoc.load.util.XmlReadUtil.Load.xmlXpathToMapResult; | ||
|
||
@Extended | ||
public class Gexf { | ||
|
||
@Context | ||
public GraphDatabaseService db; | ||
|
||
@Context | ||
public URLAccessChecker urlAccessChecker; | ||
|
||
@Context | ||
public TerminationGuard terminationGuard; | ||
|
||
@Context | ||
public Pools pools; | ||
|
||
@Procedure("apoc.load.gexf") | ||
@Description("apoc.load.gexf(urlOrBinary, path, $config) - load Gexf file from URL or binary source") | ||
public Stream<MapResult> gexf( | ||
@Name("urlOrBinary") Object urlOrBinary, | ||
@Name(value = "config", defaultValue = "{}") Map<String, Object> config | ||
) throws Exception { | ||
return xmlXpathToMapResult(urlOrBinary, urlAccessChecker, terminationGuard, config); | ||
} | ||
|
||
@Procedure(name = "apoc.import.gexf", mode = Mode.WRITE) | ||
@Description("Imports a graph from the provided GraphML file.") | ||
public Stream<ProgressInfo> importGexf( | ||
@Name("urlOrBinaryFile") Object urlOrBinaryFile, @Name("config") Map<String, Object> config) { | ||
ProgressInfo result = Util.inThread(pools, () -> { | ||
ExportConfig exportConfig = new ExportConfig(config); | ||
String file = null; | ||
String source = "binary"; | ||
if (urlOrBinaryFile instanceof String) { | ||
file = (String) urlOrBinaryFile; | ||
source = "file"; | ||
} | ||
ProgressReporter reporter = new ProgressReporter(null, null, new ProgressInfo(file, source, "gexf")); | ||
Import graphReader = new Import(db) | ||
.reporter(reporter) | ||
.batchSize(exportConfig.getBatchSize()) | ||
.relType(exportConfig.defaultRelationshipType()) | ||
.source(exportConfig.getSource()) | ||
.target(exportConfig.getTarget()) | ||
.nodeLabels(exportConfig.readLabels()); | ||
|
||
if (exportConfig.storeNodeIds()) graphReader.storeNodeIds(); | ||
|
||
try (CountingReader reader = | ||
FileUtils.readerFor(urlOrBinaryFile, exportConfig.getCompressionAlgo(), urlAccessChecker)) { | ||
graphReader.parseXML(reader, terminationGuard); | ||
} | ||
|
||
return reporter.getTotal(); | ||
}); | ||
return Stream.of(result); | ||
} | ||
} |
Oops, something went wrong.