From bf807bae1c5ae45fbd4531f39fca70bce9b6daa6 Mon Sep 17 00:00:00 2001 From: Rafsun Masud Date: Mon, 16 Oct 2023 17:16:19 -0700 Subject: [PATCH] Optimize performance of detach delete (#1276) Previously, for each vertex to be deleted, all edge tables were scanned once to process the connected edges. Now, this task is postponed until all vertices are deleted. So, the connected edges can be processed in only one scan of the edge tables regardless of the number of deleted vertices. --- regress/expected/cypher_delete.out | 115 +++++++++++++++++++++++++- regress/sql/cypher_delete.sql | 37 +++++++++ src/backend/executor/cypher_delete.c | 118 +++++++++++++++------------ src/include/executor/cypher_utils.h | 21 +++++ 4 files changed, 236 insertions(+), 55 deletions(-) diff --git a/regress/expected/cypher_delete.out b/regress/expected/cypher_delete.out index d7d198261..0a44ded2b 100644 --- a/regress/expected/cypher_delete.out +++ b/regress/expected/cypher_delete.out @@ -62,9 +62,9 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (:v)-[:e]->(:v)$$) AS (a agtype); --Should Fail SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n1 RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n2 RETURN n2$$) AS (a agtype); -ERROR: Cannot delete vertex n2, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE e RETURN e$$) AS (a agtype); a ------------------------------------------------------------------------------------------------------------------------ @@ -188,7 +188,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v) CREATE (n)-[:e]- (0 rows) SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[]->() DELETE n1 RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. --Cleanup SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype); a @@ -234,7 +234,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v)$$) AS (a agtype) (0 rows) SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->() DELETE n1, e RETURN n1$$) AS (a agtype); -ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges. +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. --Cleanup SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype); a @@ -651,6 +651,113 @@ SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (resu -------- (0 rows) +-- +-- Detach Delete +-- +SELECT create_graph('detach_delete'); +NOTICE: graph "detach_delete" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('detach_delete', +$$ + CREATE (x:Label3{name:'x', delete: true}), + (y:Label3{name:'y', delete: true}), + (a:Label1{name:'a', delete: true}), + (b:Label5{name:'b'}), + (c:Label5{name:'c'}), + (d:Label5{name:'d'}), + (m:Label7{name:'m', delete: true}), + (n:Label2{name:'n'}), + (p:Label2{name:'p'}), + (q:Label2{name:'q'}), + (a)-[:rel1{name:'ab'}]->(b), + (c)-[:rel2{name:'cd'}]->(d), + (n)-[:rel3{name:'nm'}]->(m), + (a)-[:rel4{name:'am'}]->(m), + (p)-[:rel5{name:'pq'}]->(q) +$$ +) as (a agtype); + a +--- +(0 rows) + +-- no vertices or edges are deleted (error is expected) +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype); +ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE. +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); + vname +------- + "x" + "y" + "a" + "b" + "c" + "d" + "m" + "n" + "p" + "q" +(10 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + ename +------- + "ab" + "cd" + "nm" + "am" + "pq" +(5 rows) + +-- x, y, a, m, ab, nm, am are deleted +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype); + a +--- + 1 + 1 +(2 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); + vname +------- + "b" + "c" + "d" + "n" + "p" + "q" +(6 rows) + +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + ename +------- + "cd" + "pq" +(2 rows) + +SELECT drop_graph('detach_delete', true); +NOTICE: drop cascades to 12 other objects +DETAIL: drop cascades to table detach_delete._ag_label_vertex +drop cascades to table detach_delete._ag_label_edge +drop cascades to table detach_delete."Label3" +drop cascades to table detach_delete."Label1" +drop cascades to table detach_delete."Label5" +drop cascades to table detach_delete."Label7" +drop cascades to table detach_delete."Label2" +drop cascades to table detach_delete.rel1 +drop cascades to table detach_delete.rel2 +drop cascades to table detach_delete.rel3 +drop cascades to table detach_delete.rel4 +drop cascades to table detach_delete.rel5 +NOTICE: graph "detach_delete" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_delete.sql b/regress/sql/cypher_delete.sql index 38d911683..f0d98b6ec 100644 --- a/regress/sql/cypher_delete.sql +++ b/regress/sql/cypher_delete.sql @@ -246,6 +246,43 @@ END; SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (result agtype); +-- +-- Detach Delete +-- + +SELECT create_graph('detach_delete'); +SELECT * FROM cypher('detach_delete', +$$ + CREATE (x:Label3{name:'x', delete: true}), + (y:Label3{name:'y', delete: true}), + (a:Label1{name:'a', delete: true}), + (b:Label5{name:'b'}), + (c:Label5{name:'c'}), + (d:Label5{name:'d'}), + (m:Label7{name:'m', delete: true}), + (n:Label2{name:'n'}), + (p:Label2{name:'p'}), + (q:Label2{name:'q'}), + (a)-[:rel1{name:'ab'}]->(b), + (c)-[:rel2{name:'cd'}]->(d), + (n)-[:rel3{name:'nm'}]->(m), + (a)-[:rel4{name:'am'}]->(m), + (p)-[:rel5{name:'pq'}]->(q) +$$ +) as (a agtype); + +-- no vertices or edges are deleted (error is expected) +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + +-- x, y, a, m, ab, nm, am are deleted +SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype); +SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype); + +SELECT drop_graph('detach_delete', true); + -- -- Clean up -- diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index 68fafae77..2ffc3b847 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -32,6 +32,7 @@ #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "utils/rel.h" +#include "common/hashfn.h" #include "catalog/ag_label.h" #include "executor/cypher_executor.h" @@ -48,9 +49,7 @@ static void rescan_cypher_delete(CustomScanState *node); static void process_delete_list(CustomScanState *node); -static void find_connected_edges(CustomScanState *node, char *graph_name, - List *labels, char *var_name, graphid id, - bool detach_delete); +static void check_for_connected_edges(CustomScanState *node); static agtype_value *extract_entity(CustomScanState *node, TupleTableSlot *scanTupleSlot, int entity_position); @@ -83,6 +82,7 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate, cypher_delete_custom_scan_state *css = (cypher_delete_custom_scan_state *)node; Plan *subplan; + HASHCTL hashctl; Assert(list_length(css->cs->custom_plans) == 1); @@ -112,6 +112,16 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate, */ css->edge_labels = get_all_edge_labels_per_graph(estate, css->delete_data->graph_oid); + /* init vertex_id_htab */ + MemSet(&hashctl, 0, sizeof(hashctl)); + hashctl.keysize = sizeof(graphid); + hashctl.entrysize = + sizeof(graphid); // entries are not used, but entrysize must >= keysize + hashctl.hash = tag_hash; + css->vertex_id_htab = hash_create(DELETE_VERTEX_HTAB_NAME, + DELETE_VERTEX_HTAB_SIZE, &hashctl, + HASH_ELEM | HASH_FUNCTION); + /* * Postgres does not assign the es_output_cid in queries that do * not write to disk, ie: SELECT commands. We need the command id @@ -194,6 +204,10 @@ static TupleTableSlot *exec_cypher_delete(CustomScanState *node) */ static void end_cypher_delete(CustomScanState *node) { + check_for_connected_edges(node); + + hash_destroy(((cypher_delete_custom_scan_state *)node)->vertex_id_htab); + ExecEndNode(node->ss.ps.lefttree); } @@ -443,15 +457,15 @@ static void process_delete_list(CustomScanState *node) } /* - * For vertices, we need to check if the vertex is connected to any - * edges, * if there are, we need to delete them or throw an error, - * depending on if the query specified the DETACH option. + * For vertices, we insert the vertex ID in the hashtable + * vertex_id_htab. This hashtable is used later to process + * connected edges. */ if (original_entity_value->type == AGTV_VERTEX) { - find_connected_edges(node, css->delete_data->graph_name, - css->edge_labels, item->var_name, - id->val.int_value, css->delete_data->detach); + bool found; + hash_search(css->vertex_id_htab, (void *)&(id->val.int_value), + HASH_ENTER, &found); } /* At this point, we are ready to delete the node/vertex. */ @@ -464,30 +478,20 @@ static void process_delete_list(CustomScanState *node) } /* - * Find the edges connected to the given node. If there is any edges either - * delete them or throw an error, depending on the detach delete option. + * Scans the edge tables and checks if the deleted vertices are connected to + * any edge(s). For DETACH DELETE, the connected edges are deleted. Otherwise, + * an error is thrown. */ -static void find_connected_edges(CustomScanState *node, char *graph_name, - List *labels, char *var_name, graphid id, - bool detach_delete) +static void check_for_connected_edges(CustomScanState *node) { + ListCell *lc; cypher_delete_custom_scan_state *css = (cypher_delete_custom_scan_state *)node; EState *estate = css->css.ss.ps.state; - ListCell *lc; + char *graph_name = css->delete_data->graph_name; - Increment_Estate_CommandId(estate); - - /* - * We need to scan through all the edges to see if this vertex has - * any edges attached to it. - * - * XXX: If we implement an on-disc graph storage system. Such as - * an adjacency matrix, the performance of this check can be massively - * improved. However, right now we have to scan every edge to see if - * one has this vertex as a start or end vertex. - */ - foreach(lc, labels) + /* scans each label from css->edge_labels */ + foreach (lc, css->edge_labels) { char *label_name = lfirst(lc); ResultRelInfo *resultRelInfo; @@ -495,54 +499,66 @@ static void find_connected_edges(CustomScanState *node, char *graph_name, HeapTuple tuple; TupleTableSlot *slot; - resultRelInfo = create_entity_result_rel_info(estate, - graph_name, label_name); - + resultRelInfo = create_entity_result_rel_info(estate, graph_name, + label_name); scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc, estate->es_snapshot, 0, NULL); - slot = ExecInitExtraTupleSlot( estate, RelationGetDescr(resultRelInfo->ri_RelationDesc), &TTSOpsHeapTuple); - // scan the table - while(true) + /* for each row */ + while (true) { - graphid startid, endid; + graphid startid; + graphid endid; bool isNull; + bool found_startid = false; + bool found_endid = false; tuple = heap_getnext(scan_desc, ForwardScanDirection); - // no more tuples to process, break and scan the next label. + /* no more tuples to process, break and scan the next label. */ if (!HeapTupleIsValid(tuple)) + { break; + } ExecStoreHeapTuple(tuple, slot, false); - startid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_start_id, &isNull)); - endid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull)); + startid = GRAPHID_GET_DATUM(slot_getattr( + slot, Anum_ag_label_edge_table_start_id, &isNull)); + endid = GRAPHID_GET_DATUM( + slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull)); + + hash_search(css->vertex_id_htab, (void *)&startid, HASH_FIND, + &found_startid); - if (id == startid || id == endid) + if (!found_startid) { - /* - * We have found an edge that uses the vertex. Either delete the - * edge or throw an error. Depending on whether the DETACH - * option was specified in the query. - */ - if (detach_delete) + hash_search(css->vertex_id_htab, (void *)&endid, HASH_FIND, + &found_endid); + } + + if (found_startid || found_endid) + { + if (css->delete_data->detach) + { delete_entity(estate, resultRelInfo, tuple); + } else - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Cannot delete vertex %s, because it still has edges attached. " - "To delete this vertex, you must first delete the attached edges.", - var_name))); + { + ereport( + ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg( + "Cannot delete a vertex that has edge(s). " + "Delete the edge(s) first, or try DETACH DELETE."))); + } } } table_endscan(scan_desc); destroy_entity_result_rel_info(resultRelInfo); } - - Decrement_Estate_CommandId(estate); } diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index 58dd8fa9c..056f1a967 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -50,6 +50,9 @@ estate->es_output_cid--; \ estate->es_snapshot->curcid--; +#define DELETE_VERTEX_HTAB_NAME "delete_vertex_htab" +#define DELETE_VERTEX_HTAB_SIZE 1000000 + typedef struct cypher_create_custom_scan_state { CustomScanState css; @@ -76,6 +79,24 @@ typedef struct cypher_delete_custom_scan_state cypher_delete_information *delete_data; int flags; List *edge_labels; + + /* + * Deleted vertex IDs are stored in this hashtable. + * + * When a vertex item is deleted, it must be checked if there is any edges + * connected to it. The connected edges are either deleted or an error is + * thrown depending on the DETACH option. However, the check for connected + * edges is not done immediately. Instead the deleted vertex IDs are stored + * in the hashtable. Once all vertices are deleted, this hashtable is used + * to process the connected edges with only one scan of the edge tables. + * + * Note on performance: Additional performance gain may be possible if + * the standard DELETE .. USING .. command can be used instead of this + * hashtable. Because Postgres may create a better plan to execute that + * command depending on the statistics and available indexes on start_id + * and end_id column. + */ + HTAB *vertex_id_htab; } cypher_delete_custom_scan_state; typedef struct cypher_merge_custom_scan_state