diff --git a/regress/expected/cypher_delete.out b/regress/expected/cypher_delete.out index d7d198261..0a44ded2b 100644 --- a/regress/expected/cypher_delete.out +++ b/regress/expected/cypher_delete.out @@ -62,9 +62,9 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (:v)-[:e]->(:v)$$) AS (a agtype); --Should Fail SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n1 RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n2 RETURN n2$$) AS (a agtype); -ERROR: Cannot delete vertex n2, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE e RETURN e$$) AS (a agtype); a ------------------------------------------------------------------------------------------------------------------------ @@ -188,7 +188,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v) CREATE (n)-[:e]- (0 rows) SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[]->() DELETE n1 RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. --Cleanup SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype); a @@ -234,7 +234,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v)$$) AS (a agtype) (0 rows) SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->() DELETE n1, e RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. --Cleanup SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype); a @@ -651,6 +651,113 @@ SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (resu -------- (0 rows) +-- +-- Detach Delete +-- +SELECT create_graph('detach_delete'); +NOTICE: graph "detach_delete" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('detach_delete', +$$ + CREATE (x:Label3{name:'x', delete: true}), + (y:Label3{name:'y', delete: true}), + (a:Label1{name:'a', delete: true}), + (b:Label5{name:'b'}), + (c:Label5{name:'c'}), + (d:Label5{name:'d'}), + (m:Label7{name:'m', delete: true}), + (n:Label2{name:'n'}), + (p:Label2{name:'p'}), + (q:Label2{name:'q'}), + (a)-[:rel1{name:'ab'}]->(b), + (c)-[:rel2{name:'cd'}]->(d), + (n)-[:rel3{name:'nm'}]->(m), + (a)-[:rel4{name:'am'}]->(m), + (p)-[:rel5{name:'pq'}]->(q) +$$ +) as (a agtype); + a +--- +(0 rows) + +-- no vertices or edges are deleted (error is expected) +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype); +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); + vname +------- + "x" + "y" + "a" + "b" + "c" + "d" + "m" + "n" + "p" + "q" +(10 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + ename +------- + "ab" + "cd" + "nm" + "am" + "pq" +(5 rows) + +-- x, y, a, m, ab, nm, am are deleted +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype); + a +--- + 1 + 1 +(2 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); + vname +------- + "b" + "c" + "d" + "n" + "p" + "q" +(6 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + ename +------- + "cd" + "pq" +(2 rows) + +SELECT drop_graph('detach_delete', true); +NOTICE: drop cascades to 12 other objects +DETAIL: drop cascades to table detach_delete._ag_label_vertex +drop cascades to table detach_delete._ag_label_edge +drop cascades to table detach_delete."Label3" +drop cascades to table detach_delete."Label1" +drop cascades to table detach_delete."Label5" +drop cascades to table detach_delete."Label7" +drop cascades to table detach_delete."Label2" +drop cascades to table detach_delete.rel1 +drop cascades to table detach_delete.rel2 +drop cascades to table detach_delete.rel3 +drop cascades to table detach_delete.rel4 +drop cascades to table detach_delete.rel5 +NOTICE: graph "detach_delete" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_delete.sql b/regress/sql/cypher_delete.sql index 38d911683..f0d98b6ec 100644 --- a/regress/sql/cypher_delete.sql +++ b/regress/sql/cypher_delete.sql @@ -246,6 +246,43 @@ END; SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (result agtype); +-- +-- Detach Delete +-- + +SELECT create_graph('detach_delete'); +SELECT * FROM cypher('detach_delete', +$$ + CREATE (x:Label3{name:'x', delete: true}), + (y:Label3{name:'y', delete: true}), + (a:Label1{name:'a', delete: true}), + (b:Label5{name:'b'}), + (c:Label5{name:'c'}), + (d:Label5{name:'d'}), + (m:Label7{name:'m', delete: true}), + (n:Label2{name:'n'}), + (p:Label2{name:'p'}), + (q:Label2{name:'q'}), + (a)-[:rel1{name:'ab'}]->(b), + (c)-[:rel2{name:'cd'}]->(d), + (n)-[:rel3{name:'nm'}]->(m), + (a)-[:rel4{name:'am'}]->(m), + (p)-[:rel5{name:'pq'}]->(q) +$$ +) as (a agtype); + +-- no vertices or edges are deleted (error is expected) +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + +-- x, y, a, m, ab, nm, am are deleted +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + +SELECT drop_graph('detach_delete', true); + -- -- Clean up -- diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index 68fafae77..2ffc3b847 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -32,6 +32,7 @@ #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "utils/rel.h" +#include "common/hashfn.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -48,9 +49,7 @@ static void rescan_cypher_delete(CustomScanState *node); static void process_delete_list(CustomScanState *node); -static void find_connected_edges(CustomScanState *node, char *graph_name, - List *labels, char *var_name, graphid id, - bool detach_delete); +static void check_for_connected_edges(CustomScanState *node); static agtype_value *extract_entity(CustomScanState *node, TupleTableSlot *scanTupleSlot, int entity_position); @@ -83,6 +82,7 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate, cypher_delete_custom_scan_state *css = (cypher_delete_custom_scan_state *)node; Plan *subplan; + HASHCTL hashctl; Assert(list_length(css->cs->custom_plans) == 1); @@ -112,6 +112,16 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate, */ css->edge_labels = get_all_edge_labels_per_graph(estate, css->delete_data->graph_oid); + /* init vertex_id_htab */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(graphid); + hashctl.entrysize = + sizeof(graphid); // entries are not used, but entrysize must >= keysize + hashctl.hash = tag_hash; + css->vertex_id_htab = hash_create(DELETE_VERTEX_HTAB_NAME, + DELETE_VERTEX_HTAB_SIZE, &hashctl, + HASH_ELEM | HASH_FUNCTION); + /* * Postgres does not assign the es_output_cid in queries that do * not write to disk, ie: SELECT commands. We need the command id @@ -194,6 +204,10 @@ static TupleTableSlot *exec_cypher_delete(CustomScanState *node) */ static void end_cypher_delete(CustomScanState *node) { + check_for_connected_edges(node); + + hash_destroy(((cypher_delete_custom_scan_state *)node)->vertex_id_htab); + ExecEndNode(node->ss.ps.lefttree); } @@ -443,15 +457,15 @@ static void process_delete_list(CustomScanState *node) } /* - * For vertices, we need to check if the vertex is connected to any - * edges, * if there are, we need to delete them or throw an error, - * depending on if the query specified the DETACH option. + * For vertices, we insert the vertex ID in the hashtable + * vertex_id_htab. This hashtable is used later to process + * connected edges. */ if (original_entity_value->type == AGTV_VERTEX) { - find_connected_edges(node, css->delete_data->graph_name, - css->edge_labels, item->var_name, - id->val.int_value, css->delete_data->detach); + bool found; + hash_search(css->vertex_id_htab, (void *)&(id->val.int_value), + HASH_ENTER, &found); } /* At this point, we are ready to delete the node/vertex. */ @@ -464,30 +478,20 @@ static void process_delete_list(CustomScanState *node) } /* - * Find the edges connected to the given node. If there is any edges either - * delete them or throw an error, depending on the detach delete option. + * Scans the edge tables and checks if the deleted vertices are connected to + * any edge(s). For DETACH DELETE, the connected edges are deleted. Otherwise, + * an error is thrown. */ -static void find_connected_edges(CustomScanState *node, char *graph_name, - List *labels, char *var_name, graphid id, - bool detach_delete) +static void check_for_connected_edges(CustomScanState *node) { + ListCell *lc; cypher_delete_custom_scan_state *css = (cypher_delete_custom_scan_state *)node; EState *estate = css->css.ss.ps.state; - ListCell *lc; + char *graph_name = css->delete_data->graph_name; - Increment_Estate_CommandId(estate); - - /* - * We need to scan through all the edges to see if this vertex has - * any edges attached to it. - * - * XXX: If we implement an on-disc graph storage system. Such as - * an adjacency matrix, the performance of this check can be massively - * improved. However, right now we have to scan every edge to see if - * one has this vertex as a start or end vertex. - */ - foreach(lc, labels) + /* scans each label from css->edge_labels */ + foreach (lc, css->edge_labels) { char *label_name = lfirst(lc); ResultRelInfo *resultRelInfo; @@ -495,54 +499,66 @@ static void find_connected_edges(CustomScanState *node, char *graph_name, HeapTuple tuple; TupleTableSlot *slot; - resultRelInfo = create_entity_result_rel_info(estate, - graph_name, label_name); - + resultRelInfo = create_entity_result_rel_info(estate, graph_name, + label_name); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, estate->es_snapshot, 0, NULL); - slot = ExecInitExtraTupleSlot( estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); - // scan the table - while(true) + /* for each row */ + while (true) { - graphid startid, endid; + graphid startid; + graphid endid; bool isNull; + bool found_startid = false; + bool found_endid = false; tuple = heap_getnext(scan_desc, ForwardScanDirection); - // no more tuples to process, break and scan the next label. + /* no more tuples to process, break and scan the next label. */ if (!HeapTupleIsValid(tuple)) + { break; + } ExecStoreHeapTuple(tuple, slot, false); - startid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_start_id, &isNull)); - endid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull)); + startid = GRAPHID_GET_DATUM(slot_getattr( + slot, Anum_ag_label_edge_table_start_id, &isNull)); + endid = GRAPHID_GET_DATUM( + slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull)); + + hash_search(css->vertex_id_htab, (void *)&startid, HASH_FIND, + &found_startid); - if (id == startid || id == endid) + if (!found_startid) { - /* - * We have found an edge that uses the vertex. Either delete the - * edge or throw an error. Depending on whether the DETACH - * option was specified in the query. - */ - if (detach_delete) + hash_search(css->vertex_id_htab, (void *)&endid, HASH_FIND, + &found_endid); + } + + if (found_startid || found_endid) + { + if (css->delete_data->detach) + { delete_entity(estate, resultRelInfo, tuple); + } else - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Cannot delete vertex %s, because it still has edges attached. " - "To delete this vertex, you must first delete the attached edges.", - var_name))); + { + ereport( + ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg( + "Cannot delete a vertex that has edge(s). " + "Delete the edge(s) first, or try DETACH DELETE."))); + } } } table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } - - Decrement_Estate_CommandId(estate); } diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 58dd8fa9c..056f1a967 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -50,6 +50,9 @@ estate->es_output_cid--; \ estate->es_snapshot->curcid--; +#define DELETE_VERTEX_HTAB_NAME "delete_vertex_htab" +#define DELETE_VERTEX_HTAB_SIZE 1000000 + typedef struct cypher_create_custom_scan_state { CustomScanState css; @@ -76,6 +79,24 @@ typedef struct cypher_delete_custom_scan_state cypher_delete_information *delete_data; int flags; List *edge_labels; + + /* + * Deleted vertex IDs are stored in this hashtable. + * + * When a vertex item is deleted, it must be checked if there is any edges + * connected to it. The connected edges are either deleted or an error is + * thrown depending on the DETACH option. However, the check for connected + * edges is not done immediately. Instead the deleted vertex IDs are stored + * in the hashtable. Once all vertices are deleted, this hashtable is used + * to process the connected edges with only one scan of the edge tables. + * + * Note on performance: Additional performance gain may be possible if + * the standard DELETE .. USING .. command can be used instead of this + * hashtable. Because Postgres may create a better plan to execute that + * command depending on the statistics and available indexes on start_id + * and end_id column. + */ + HTAB *vertex_id_htab; } cypher_delete_custom_scan_state; typedef struct cypher_merge_custom_scan_state