Skip to content

Commit

Permalink
Update age_load to load scalar property values with appropriate type (#…
Browse files Browse the repository at this point in the history
…2048)

Previously, property values from csv files were always loaded as strings. This
patch converts a value to an appropriate scalar type (i.e. string, bool,
numeric, null) while loading. It uses the agtype_value_from_cstring()
function for conversion.

Additional change(s):
-------------------
 - Fix: for csv rows in edge files, create_agtype_from_list_i()'s start_index
   is corrected to 4
  • Loading branch information
rafsun42 authored Aug 16, 2024
1 parent db98357 commit cf1ce46
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 5 deletions.
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_edges.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric,
1, Person1, 1, Person2, "John Smith", "true", 1
1, Person1, 1, Person2, "John", "false", "-2"
1, Person1, 1, Person2, John Smith, true, 1.4
1, Person1, 1, Person2, """John""", false, -1e10
1, Person1, 1, Person2, null, false, 0
1, Person1, 1, Person2, nUll, false, "3.14"
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_vertices.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id, string, bool, numeric,
1, "John Smith", "true", 1
2, "John", "false", "-2"
3, John Smith, true, 1.4
4, """John""", false, -1e10
5, null, false, 0
6, nUll, false, "3.14"
84 changes: 84 additions & 0 deletions regress/expected/age_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,87 @@ NOTICE: graph "agload_test_graph" has been dropped

(1 row)

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');
NOTICE: graph "agload_conversion" has been created
create_graph
--------------

(1 row)

SELECT create_vlabel('agload_conversion','Person1');
NOTICE: VLabel "Person1" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
load_labels_from_file
-----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);
a
------------------------------------------------------------------------------------
{"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
{"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2}
{"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4}
{"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0}
{"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0}
{"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
(6 rows)

SELECT create_vlabel('agload_conversion','Person2');
NOTICE: VLabel "Person2" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');
load_labels_from_file
-----------------------

(1 row)

SELECT create_elabel('agload_conversion','Edges');
NOTICE: ELabel "Edges" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);
a
--------------------------------------------------------------
{"bool": true, "string": "John Smith", "numeric": 1}
{"bool": false, "string": "John", "numeric": -2}
{"bool": true, "string": "John Smith", "numeric": 1.4}
{"bool": false, "string": "John", "numeric": -10000000000.0}
{"bool": false, "string": null, "numeric": 0}
{"bool": false, "string": "nUll", "numeric": 3.14}
(6 rows)

SELECT drop_graph('agload_conversion', true);
NOTICE: drop cascades to 5 other objects
DETAIL: drop cascades to table agload_conversion._ag_label_vertex
drop cascades to table agload_conversion._ag_label_edge
drop cascades to table agload_conversion."Person1"
drop cascades to table agload_conversion."Person2"
drop cascades to table agload_conversion."Edges"
NOTICE: graph "agload_conversion" has been dropped
drop_graph
------------

(1 row)

18 changes: 18 additions & 0 deletions regress/sql/age_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,21 @@ SELECT * FROM cypher('agload_test_graph', $$
$$) AS (result_1 agtype, result_2 agtype);

SELECT drop_graph('agload_test_graph', true);

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');

SELECT create_vlabel('agload_conversion','Person1');
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) $$) as (a agtype);

SELECT create_vlabel('agload_conversion','Person2');
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv');

SELECT create_elabel('agload_conversion','Edges');
SELECT load_edges_from_file('agload_conversion', 'Edges', 'age_load/conversion_edges.csv');
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN properties(e) $$) as (a agtype);

SELECT drop_graph('agload_conversion', true);
3 changes: 1 addition & 2 deletions src/backend/utils/adt/agtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ typedef enum /* type categories for datum_to_agtype */
} agt_type_category;

static inline Datum agtype_from_cstring(char *str, int len);
static inline agtype_value *agtype_value_from_cstring(char *str, int len);
size_t check_string_length(size_t len);
static void agtype_in_agtype_annotation(void *pstate, char *annotation);
static void agtype_in_object_start(void *pstate);
Expand Down Expand Up @@ -355,7 +354,7 @@ Datum agtype_out(PG_FUNCTION_ARGS)
* Uses the agtype parser (with hooks) to construct an agtype.
*/

static inline agtype_value *agtype_value_from_cstring(char *str, int len)
agtype_value *agtype_value_from_cstring(char *str, int len)
{
agtype_lex_context *lex;
agtype_in_state state;
Expand Down
2 changes: 1 addition & 1 deletion src/backend/utils/load/ag_load_edges.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data)
end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);

props = create_agtype_from_list_i(cr->header, cr->fields,
n_fields, 3);
n_fields, 4);

insert_edge_simple(cr->graph_oid, cr->object_name,
object_graph_id, start_vertex_graph_id,
Expand Down
51 changes: 49 additions & 2 deletions src/backend/utils/load/age_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
*/

#include "postgres.h"
#include "utils/json.h"

#include "utils/load/ag_load_edges.h"
#include "utils/load/ag_load_labels.h"
#include "utils/load/age_load.h"

static agtype_value *csv_value_to_agtype_value(char *csv_val);

agtype *create_empty_agtype(void)
{
agtype* out;
Expand All @@ -40,6 +43,50 @@ agtype *create_empty_agtype(void)
return out;
}

/*
* Converts the given csv value to an agtype_value.
*
* If csv_val is not a valid json, it is wrapped by double-quotes to make it a
* string value. Because agtype is jsonb-like, the token should be a valid
* json in order to be parsed into an agtype_value of appropriate type.
* Finally, agtype_value_from_cstring() is called for parsing.
*/
static agtype_value *csv_value_to_agtype_value(char *csv_val)
{
char *new_csv_val;
agtype_value *res;

if (!json_validate(cstring_to_text(csv_val), false, false))
{
// wrap the string with double-quote
int oldlen;
int newlen;

oldlen = strlen(csv_val);
newlen = oldlen + 2; // +2 for double-quotes
new_csv_val = (char *)palloc(sizeof(char) * (newlen + 1));

new_csv_val[0] = '"';
strncpy(&new_csv_val[1], csv_val, oldlen);
new_csv_val[oldlen + 1] = '"';
new_csv_val[oldlen + 2] = '\0';
}
else
{
new_csv_val = csv_val;
}

res = agtype_value_from_cstring(new_csv_val, strlen(new_csv_val));

// extract from top-level row scalar array
if (res->type == AGTV_ARRAY && res->val.array.raw_scalar)
{
res = &res->val.array.elems[0];
}

return res;
}

agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
int64 vertex_id)
{
Expand Down Expand Up @@ -74,7 +121,7 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len,
WAGT_KEY,
key_agtype);

value_agtype = string_to_agtype_value(fields[i]);
value_agtype = csv_value_to_agtype_value(fields[i]);
result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand Down Expand Up @@ -117,7 +164,7 @@ agtype* create_agtype_from_list_i(char **header, char **fields,
result.res = push_agtype_value(&result.parse_state,
WAGT_KEY,
key_agtype);
value_agtype = string_to_agtype_value(fields[i]);
value_agtype = csv_value_to_agtype_value(fields[i]);
result.res = push_agtype_value(&result.parse_state,
WAGT_VALUE,
value_agtype);
Expand Down
1 change: 1 addition & 0 deletions src/include/utils/agtype.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ agtype_iterator *get_next_list_element(agtype_iterator *it,
void pfree_agtype_value(agtype_value* value);
void pfree_agtype_value_content(agtype_value* value);
void pfree_agtype_in_state(agtype_in_state* value);
agtype_value *agtype_value_from_cstring(char *str, int len);

/* Oid accessors for AGTYPE */
Oid get_AGTYPEOID(void);
Expand Down

0 comments on commit cf1ce46

Please sign in to comment.