Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NOID] Fixes #4138: Add support for loading Gephi GEXF file format (#4171) #4260

Merged
merged 5 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 45 additions & 9 deletions core/src/main/java/apoc/export/graphml/XmlGraphMLReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package apoc.export.graphml;

import static apoc.util.ConvertUtil.toValidValue;

import apoc.export.util.BatchTransaction;
import apoc.export.util.ExportConfig;
import apoc.export.util.Reporter;
Expand Down Expand Up @@ -218,13 +220,39 @@ public Object parseValue(String input) {
public static final QName TYPE = QName.valueOf("attr.type");
public static final QName LIST = QName.valueOf("attr.list");
public static final QName KEY = QName.valueOf("key");
public static final QName VALUE = QName.valueOf("value");
public static final QName DATA_TYPE = QName.valueOf("type");
public static final QName KIND = QName.valueOf("kind");

public XmlGraphMLReader(GraphDatabaseService db, Transaction tx) {
this.db = db;
this.tx = tx;
}

public enum ReaderType {
GRAPHML("data", KEY, LABEL, LABELS),
GEXF("attvalue", FOR, KIND, LABEL);

public String data;
public QName key;
public QName label;
public QName labels;

ReaderType(String data, QName key, QName label, QName labels) {
this.data = data;
this.key = key;
this.label = label;
this.labels = labels;
}
}

public long parseXML(Reader input, TerminationGuard terminationGuard) throws XMLStreamException {
return parseXML(input, terminationGuard, ReaderType.GRAPHML);
}

public long parseXML(Reader input, TerminationGuard terminationGuard, ReaderType readerType)
throws XMLStreamException {
Map<String, Object> dataMap = new HashMap<>();
Map<String, Long> cache = new HashMap<>(1024 * 32);
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
inputFactory.setProperty("javax.xml.stream.isCoalescing", true);
Expand All @@ -238,7 +266,6 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
int count = 0;
BatchTransaction tx = new BatchTransaction(db, batchSize * 10, reporter);
try {

while (reader.hasNext()) {
terminationGuard.check();
XMLEvent event;
Expand All @@ -257,11 +284,15 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
continue;
}
if (event.isStartElement()) {

StartElement element = event.asStartElement();
String name = element.getName().getLocalPart();

if (name.equals("graphml") || name.equals("graph")) continue;
boolean isNameGexf = readerType.equals(ReaderType.GEXF) && name.equals("gexf");
if (name.equals("graphml") || name.equals("graph") || isNameGexf) continue;
if (readerType.equals(ReaderType.GEXF) && name.equals("attribute")) {
String id = getAttribute(element, ID);
String type = getAttribute(element, DATA_TYPE);
dataMap.put(id, type);
}
if (name.equals("key")) {
String id = getAttribute(element, ID);
Key key = new Key(
Expand All @@ -284,19 +315,24 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
else relKeys.put(id, key);
continue;
}
if (name.equals("data")) {
if (name.equals(readerType.data)) {
if (last == null) continue;
String id = getAttribute(element, KEY);
String id = getAttribute(element, readerType.key);
boolean isNode = last instanceof Node;
Key key = isNode ? nodeKeys.get(id) : relKeys.get(id);
if (key == null) key = Key.defaultKey(id, isNode);
final Map.Entry<XMLEvent, Object> eventEntry = getDataEventEntry(reader, key);
final XMLEvent next = eventEntry.getKey();
final Object value = eventEntry.getValue();
Object value = readerType.equals(ReaderType.GRAPHML)
? eventEntry.getValue()
: getAttribute(element, VALUE);
if (value != null) {
if (this.labels && isNode && id.equals("labels")) {
addLabels((Node) last, value.toString());
} else if (!this.labels || isNode || !id.equals("label")) {
value = readerType.equals(ReaderType.GRAPHML)
? value
: toValidValue(value, key.name, dataMap);
last.setProperty(key.name, value);
if (reporter != null) reporter.update(0, 0, 1);
}
Expand All @@ -311,7 +347,7 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
String id = getAttribute(element, ID);
Node node = tx.getTransaction().createNode();
if (this.labels) {
String labels = getAttribute(element, LABELS);
String labels = getAttribute(element, readerType.labels);
addLabels(node, labels);
}
if (storeNodeIds) node.setProperty("id", id);
Expand All @@ -324,7 +360,7 @@ public long parseXML(Reader input, TerminationGuard terminationGuard) throws XML
}
if (name.equals("edge")) {
tx.increment();
String label = getAttribute(element, LABEL);
String label = getAttribute(element, readerType.label);
Node from = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.SOURCE);
Node to = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.TARGET);

Expand Down
39 changes: 25 additions & 14 deletions core/src/main/java/apoc/load/Xml.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public Stream<MapResult> xml(
@Name(value = "config", defaultValue = "{}") Map<String, Object> config,
@Name(value = "simple", defaultValue = "false") boolean simpleMode)
throws Exception {
return xmlXpathToMapResult(urlOrBinary, simpleMode, path, config);
return xmlXpathToMapResult(urlOrBinary, simpleMode, path, config, terminationGuard);
}

@UserFunction("apoc.xml.parse")
Expand All @@ -128,29 +128,39 @@ public Map<String, Object> parse(
throws Exception {
if (config == null) config = Collections.emptyMap();
boolean failOnError = (boolean) config.getOrDefault("failOnError", true);
return parse(new ByteArrayInputStream(data.getBytes(Charset.forName("UTF-8"))), simpleMode, path, failOnError)
return parse(
new ByteArrayInputStream(data.getBytes(Charset.forName("UTF-8"))),
simpleMode,
path,
failOnError,
terminationGuard)
.map(mr -> mr.value)
.findFirst()
.orElse(null);
}

private Stream<MapResult> xmlXpathToMapResult(
@Name("urlOrBinary") Object urlOrBinary, boolean simpleMode, String path, Map<String, Object> config)
public static Stream<MapResult> xmlXpathToMapResult(
@Name("urlOrBinary") Object urlOrBinary,
boolean simpleMode,
String path,
Map<String, Object> config,
TerminationGuard terminationGuard)
throws Exception {
if (config == null) config = Collections.emptyMap();
boolean failOnError = (boolean) config.getOrDefault("failOnError", true);
try {
Map<String, Object> headers = (Map) config.getOrDefault("headers", Collections.emptyMap());
CountingInputStream is = FileUtils.inputStreamFor(
urlOrBinary, headers, null, (String) config.getOrDefault(COMPRESSION, CompressionAlgo.NONE.name()));
return parse(is, simpleMode, path, failOnError);
return parse(is, simpleMode, path, failOnError, terminationGuard);
} catch (Exception e) {
if (!failOnError) return Stream.of(new MapResult(Collections.emptyMap()));
else throw e;
}
}

private Stream<MapResult> parse(InputStream data, boolean simpleMode, String path, boolean failOnError)
public static Stream<MapResult> parse(
InputStream data, boolean simpleMode, String path, boolean failOnError, TerminationGuard terminationGuard)
throws Exception {
List<MapResult> result = new ArrayList<>();
try {
Expand All @@ -173,7 +183,7 @@ private Stream<MapResult> parse(InputStream data, boolean simpleMode, String pat
for (int i = 0; i < nodeList.getLength(); i++) {
final Deque<Map<String, Object>> stack = new LinkedList<>();

handleNode(stack, nodeList.item(i), simpleMode);
handleNode(stack, nodeList.item(i), simpleMode, terminationGuard);
for (int index = 0; index < stack.size(); index++) {
result.add(new MapResult(stack.pollFirst()));
}
Expand Down Expand Up @@ -223,15 +233,16 @@ private boolean proceedReader(XMLStreamReader reader) throws XMLStreamException
}
}

private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean simpleMode) {
private static void handleNode(
Deque<Map<String, Object>> stack, Node node, boolean simpleMode, TerminationGuard terminationGuard) {
terminationGuard.check();

// Handle document node
if (node.getNodeType() == Node.DOCUMENT_NODE) {
NodeList children = node.getChildNodes();
for (int i = 0; i < children.getLength(); i++) {
if (children.item(i).getLocalName() != null) {
handleNode(stack, children.item(i), simpleMode);
handleNode(stack, children.item(i), simpleMode, terminationGuard);
return;
}
}
Expand All @@ -248,7 +259,7 @@ private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean sim

// This is to deal with text between xml tags for example new line characters
if (child.getNodeType() != Node.TEXT_NODE && child.getNodeType() != Node.CDATA_SECTION_NODE) {
handleNode(stack, child, simpleMode);
handleNode(stack, child, simpleMode, terminationGuard);
count++;
} else {
// Deal with text nodes
Expand Down Expand Up @@ -290,7 +301,7 @@ private void handleNode(Deque<Map<String, Object>> stack, Node node, boolean sim
* @param node
* @param elementMap
*/
private void handleTypeAndAttributes(Node node, Map<String, Object> elementMap) {
private static void handleTypeAndAttributes(Node node, Map<String, Object> elementMap) {
// Set type
if (node.getLocalName() != null) {
elementMap.put("_type", node.getLocalName());
Expand All @@ -312,7 +323,7 @@ private void handleTypeAndAttributes(Node node, Map<String, Object> elementMap)
* @param node
* @param elementMap
*/
private void handleTextNode(Node node, Map<String, Object> elementMap) {
private static void handleTextNode(Node node, Map<String, Object> elementMap) {
Object text = "";
int nodeType = node.getNodeType();
switch (nodeType) {
Expand Down Expand Up @@ -344,7 +355,7 @@ private void handleTextNode(Node node, Map<String, Object> elementMap) {
* @param text
* @return
*/
private String normalizeText(String text) {
private static String normalizeText(String text) {
String[] tokens = StringUtils.split(text, "\n");
for (int i = 0; i < tokens.length; i++) {
tokens[i] = tokens[i].trim();
Expand Down Expand Up @@ -682,7 +693,7 @@ private void setPropertyIfNotNull(org.neo4j.graphdb.Node root, String propertyKe
}
}

private RuntimeException generateXmlDoctypeException() {
private static RuntimeException generateXmlDoctypeException() {
throw new RuntimeException("XML documents with a DOCTYPE are not allowed.");
}
}
Loading
Loading