From 94254f890a4fc7e97ee55667fc51bf0e3ff8990e Mon Sep 17 00:00:00 2001 From: Shubham Dhama Date: Thu, 3 Nov 2022 21:58:03 +0530 Subject: [PATCH 1/2] Add option to output numeric data types as string. Data types like `numeric`, `real`, `double precision` supports `Infinity`, `-Infinity` and `NaN` values. Currently these values output as `null` because JSON specification does not recognize them as valid numeric values. This will create problems for the users of wal2json who need these values to maintain data integerity. --- Makefile | 2 +- README.md | 2 + expected/numeric_data_types_as_string.out | 127 ++++++++++++++++++++++ sql/numeric_data_types_as_string.sql | 41 +++++++ wal2json.c | 59 ++++++++-- 5 files changed, 221 insertions(+), 10 deletions(-) create mode 100644 expected/numeric_data_types_as_string.out create mode 100644 sql/numeric_data_types_as_string.sql diff --git a/Makefile b/Makefile index 6b71871..c0effef 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ REGRESS = cmdline insert1 update1 update2 update3 update4 delete1 delete2 \ delete3 delete4 savepoint specialvalue toast bytea message typmod \ filtertable selecttable include_timestamp include_lsn include_xids \ include_domain_data_type truncate type_oid actions position default \ - pk rename_column + pk rename_column numeric_data_types_as_string PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/README.md b/README.md index 7d75704..ddb8ccd 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,8 @@ Parameters * `include-not-null`: add _not null_ information as _columnoptionals_. Default is _false_. * `include-default`: add default expression. Default is _false_. * `include-pk`: add _primary key_ information as _pk_. Column name and data type is included. Default is _false_. +* `numeric-data-types-as-string`: use strings for numeric data types. JSON specification does not recognize `Infinity` and `NaN` as valid numeric values. There might be [potential interoperability problems](https://datatracker.ietf.org/doc/html/rfc7159#section-6) for double precision numbers. Default is _false_. +* output numeric data types as string. Default is _false_. * `pretty-print`: add spaces and indentation to JSON structures. Default is _false_. * `write-in-chunks`: write after every change instead of every changeset. Only used when `format-version` is `1`. Default is _false_. * `include-lsn`: add _nextlsn_ to each changeset. Default is _false_. diff --git a/expected/numeric_data_types_as_string.out b/expected/numeric_data_types_as_string.out new file mode 100644 index 0000000..b83844d --- /dev/null +++ b/expected/numeric_data_types_as_string.out @@ -0,0 +1,127 @@ +\set VERBOSITY terse +-- predictability +SET synchronous_commit = on; +SET extra_float_digits = 0; +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_decimal (a real, b double precision, c numeric); +CREATE TABLE table_others ( +a char(10), +b varchar(30), +c text, +d bit varying(20), +e timestamp, +f date, +g boolean, +h json, +i tsvector +); +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); + ?column? +---------- + init +(1 row) + +BEGIN; +INSERT INTO table_integer (b, c, d) VALUES(32767, 2147483647, 9223372036854775807); +INSERT INTO table_integer (b, c, d) VALUES(-32768, -2147483648, -9223372036854775808); +INSERT INTO table_decimal (a, b) VALUES('Infinity', 'Infinity'); +INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); +INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); +INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); +INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); +INSERT INTO table_others (a, b, c, d, e, f, g, h, i) VALUES('teste', 'testando', 'um texto longo', B'001110010101010', '2013-11-02 17:30:52', '2013-02-04', true, '{ "a": 123 }', 'Old Old Parr'::tsvector); +COMMIT; +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); + data +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + { + + "change": [ + + { + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["1", "32767", "2147483647", "9223372036854775807"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["2", "-32768", "-2147483648", "-9223372036854775808"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["Infinity", "Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-Infinity", "-Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["NaN", "NaN", "NaN"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["123.456", "123456789.012345", "1234567890987654321.1234567890987654321"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-123.456", "-123456789.012345", "-1234567890987654321.1234567890987654321"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_others", + + "columnnames": ["a", "b", "c", "d", "e", "f", "g", "h", "i"], + + "columntypes": ["character(10)", "character varying(30)", "text", "bit varying(20)", "timestamp without time zone", "date", "boolean", "json", "tsvector"], + + "columnvalues": ["teste ", "testando", "um texto longo", "001110010101010", "Sat Nov 02 17:30:52 2013", "02-04-2013", true, "{ \"a\": 123 }", "'Old' 'Parr'"]+ + } + + ] + + } +(1 row) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); + data +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"action":"B"} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"1"},{"name":"b","type":"smallint","value":"32767"},{"name":"c","type":"integer","value":"2147483647"},{"name":"d","type":"bigint","value":"9223372036854775807"}]} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"2"},{"name":"b","type":"smallint","value":"-32768"},{"name":"c","type":"integer","value":"-2147483648"},{"name":"d","type":"bigint","value":"-9223372036854775808"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"Infinity"},{"name":"b","type":"double precision","value":"Infinity"},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"-Infinity"},{"name":"b","type":"double precision","value":"-Infinity"},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"NaN"},{"name":"b","type":"double precision","value":"NaN"},{"name":"c","type":"numeric","value":"NaN"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"123.456"},{"name":"b","type":"double precision","value":"123456789.012345"},{"name":"c","type":"numeric","value":"1234567890987654321.1234567890987654321"}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"-123.456"},{"name":"b","type":"double precision","value":"-123456789.012345"},{"name":"c","type":"numeric","value":"-1234567890987654321.1234567890987654321"}]} + {"action":"I","schema":"public","table":"table_others","columns":[{"name":"a","type":"character(10)","value":"teste "},{"name":"b","type":"character varying(30)","value":"testando"},{"name":"c","type":"text","value":"um texto longo"},{"name":"d","type":"bit varying(20)","value":"001110010101010"},{"name":"e","type":"timestamp without time zone","value":"Sat Nov 02 17:30:52 2013"},{"name":"f","type":"date","value":"02-04-2013"},{"name":"g","type":"boolean","value":true},{"name":"h","type":"json","value":"{ \"a\": 123 }"},{"name":"i","type":"tsvector","value":"'Old' 'Parr'"}]} + {"action":"C"} +(10 rows) + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + ?column? +---------- + stop +(1 row) + diff --git a/sql/numeric_data_types_as_string.sql b/sql/numeric_data_types_as_string.sql new file mode 100644 index 0000000..73624f5 --- /dev/null +++ b/sql/numeric_data_types_as_string.sql @@ -0,0 +1,41 @@ +\set VERBOSITY terse + +-- predictability +SET synchronous_commit = on; +SET extra_float_digits = 0; + + + +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_decimal (a real, b double precision, c numeric); +CREATE TABLE table_others ( +a char(10), +b varchar(30), +c text, +d bit varying(20), +e timestamp, +f date, +g boolean, +h json, +i tsvector +); + +SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); + +BEGIN; +INSERT INTO table_integer (b, c, d) VALUES(32767, 2147483647, 9223372036854775807); +INSERT INTO table_integer (b, c, d) VALUES(-32768, -2147483648, -9223372036854775808); + +INSERT INTO table_decimal (a, b) VALUES('Infinity', 'Infinity'); +INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); +INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); +INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); +INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); + +INSERT INTO table_others (a, b, c, d, e, f, g, h, i) VALUES('teste', 'testando', 'um texto longo', B'001110010101010', '2013-11-02 17:30:52', '2013-02-04', true, '{ "a": 123 }', 'Old Old Parr'::tsvector); +COMMIT; + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); + +SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); diff --git a/wal2json.c b/wal2json.c index 462882f..ae143f9 100644 --- a/wal2json.c +++ b/wal2json.c @@ -69,6 +69,7 @@ typedef struct bool pretty_print; /* pretty-print JSON? */ bool write_in_chunks; /* write in chunks? (v1) */ + bool numeric_data_types_as_string; /* use strings for numeric data types */ JsonAction actions; /* output only these actions */ @@ -261,6 +262,7 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is data->include_typmod = true; data->include_domain_data_type = false; data->include_column_positions = false; + data->numeric_data_types_as_string = false; data->pretty_print = false; data->write_in_chunks = false; data->include_lsn = false; @@ -480,6 +482,19 @@ pg_decode_startup(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is errmsg("could not parse value \"%s\" for parameter \"%s\"", strVal(elem->arg), elem->defname))); } + else if (strcmp(elem->defname, "numeric-data-types-as-string") == 0) + { + if (elem->arg == NULL) + { + elog(DEBUG1, "numeric-data-types-as-string argument is null"); + data->numeric_data_types_as_string = true; + } + else if (!parse_bool(strVal(elem->arg), &data->numeric_data_types_as_string)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not parse value \"%s\" for parameter \"%s\"", + strVal(elem->arg), elem->defname))); + } else if (strcmp(elem->defname, "pretty-print") == 0) { if (elem->arg == NULL) @@ -1257,8 +1272,9 @@ tuple_to_stringinfo(LogicalDecodingContext *ctx, TupleDesc tupdesc, HeapTuple tu * Data types are printed with quotes unless they are number, true, * false, null, an array or an object. * - * The NaN and Infinity are not valid JSON symbols. Hence, - * regardless of sign they are represented as the string null. + * The NaN and Infinity are not valid JSON numeric values. Hence, + * regardless of sign they are represented as null when + * data->numeric_data_types_as_string is not used. */ switch (typid) { @@ -1269,7 +1285,18 @@ tuple_to_stringinfo(LogicalDecodingContext *ctx, TupleDesc tupdesc, HeapTuple tu case FLOAT4OID: case FLOAT8OID: case NUMERICOID: - if (pg_strncasecmp(outputstr, "NaN", 3) == 0 || + if (data->numeric_data_types_as_string) { + if (strspn(outputstr, "0123456789+-eE.") == strlen(outputstr) || + pg_strncasecmp(outputstr, "NaN", 3) == 0 || + pg_strncasecmp(outputstr, "Infinity", 8) == 0 || + pg_strncasecmp(outputstr, "-Infinity", 9) == 0) { + appendStringInfo(&colvalues, "%s", comma); + escape_json(&colvalues, outputstr); + } else { + elog(ERROR, "%s is not a number", outputstr); + } + } + else if (pg_strncasecmp(outputstr, "NaN", 3) == 0 || pg_strncasecmp(outputstr, "Infinity", 8) == 0 || pg_strncasecmp(outputstr, "-Infinity", 9) == 0) { @@ -1846,9 +1873,12 @@ pg_decode_change_v1(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, static void pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid typid) { - Oid typoutfunc; - bool isvarlena; - char *outstr; + JsonDecodingData *data; + Oid typoutfunc; + bool isvarlena; + char *outstr; + + data = ctx->output_plugin_private; if (isnull) { @@ -1883,8 +1913,9 @@ pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid * Data types are printed with quotes unless they are number, true, false, * null, an array or an object. * - * The NaN an Infinity are not valid JSON symbols. Hence, regardless of - * sign they are represented as the string null. + * The NaN and Infinity are not valid JSON numeric values. Hence, + * regardless of sign they are represented as null when + * data->numeric_data_types_as_string is not used. */ switch (typid) { @@ -1895,7 +1926,17 @@ pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid case FLOAT4OID: case FLOAT8OID: case NUMERICOID: - if (pg_strncasecmp(outstr, "NaN", 3) == 0 || + if (data->numeric_data_types_as_string) { + if (strspn(outstr, "0123456789+-eE.") == strlen(outstr) || + pg_strncasecmp(outstr, "NaN", 3) == 0 || + pg_strncasecmp(outstr, "Infinity", 8) == 0 || + pg_strncasecmp(outstr, "-Infinity", 9) == 0) { + escape_json(ctx->out, outstr); + } else { + elog(ERROR, "%s is not a number", outstr); + } + } + else if (pg_strncasecmp(outstr, "NaN", 3) == 0 || pg_strncasecmp(outstr, "Infinity", 8) == 0 || pg_strncasecmp(outstr, "-Infinity", 9) == 0) { From b598c1e951823ca14ada3f3c70597e1b959dbc09 Mon Sep 17 00:00:00 2001 From: Shubham Dhama Date: Tue, 15 Nov 2022 13:46:04 +0530 Subject: [PATCH 2/2] Address review comments. --- README.md | 3 +- expected/numeric_data_types_as_string.out | 234 ++++++++++++++-------- sql/numeric_data_types_as_string.sql | 22 +- wal2json.c | 26 ++- 4 files changed, 171 insertions(+), 114 deletions(-) diff --git a/README.md b/README.md index ddb8ccd..3fd55ed 100644 --- a/README.md +++ b/README.md @@ -109,8 +109,7 @@ Parameters * `include-not-null`: add _not null_ information as _columnoptionals_. Default is _false_. * `include-default`: add default expression. Default is _false_. * `include-pk`: add _primary key_ information as _pk_. Column name and data type is included. Default is _false_. -* `numeric-data-types-as-string`: use strings for numeric data types. JSON specification does not recognize `Infinity` and `NaN` as valid numeric values. There might be [potential interoperability problems](https://datatracker.ietf.org/doc/html/rfc7159#section-6) for double precision numbers. Default is _false_. -* output numeric data types as string. Default is _false_. +* `numeric-data-types-as-string`: use string for numeric data types. JSON specification does not recognize `Infinity` and `NaN` as valid numeric values. There might be [potential interoperability problems](https://datatracker.ietf.org/doc/html/rfc7159#section-6) for double precision numbers. Default is _false_. * `pretty-print`: add spaces and indentation to JSON structures. Default is _false_. * `write-in-chunks`: write after every change instead of every changeset. Only used when `format-version` is `1`. Default is _false_. * `include-lsn`: add _nextlsn_ to each changeset. Default is _false_. diff --git a/expected/numeric_data_types_as_string.out b/expected/numeric_data_types_as_string.out index b83844d..cad032e 100644 --- a/expected/numeric_data_types_as_string.out +++ b/expected/numeric_data_types_as_string.out @@ -2,19 +2,8 @@ -- predictability SET synchronous_commit = on; SET extra_float_digits = 0; -CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); CREATE TABLE table_decimal (a real, b double precision, c numeric); -CREATE TABLE table_others ( -a char(10), -b varchar(30), -c text, -d bit varying(20), -e timestamp, -f date, -g boolean, -h json, -i tsvector -); SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); ?column? ---------- @@ -29,84 +18,140 @@ INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); -INSERT INTO table_others (a, b, c, d, e, f, g, h, i) VALUES('teste', 'testando', 'um texto longo', B'001110010101010', '2013-11-02 17:30:52', '2013-02-04', true, '{ "a": 123 }', 'Old Old Parr'::tsvector); COMMIT; SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); - data -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - { + - "change": [ + - { + - "kind": "insert", + - "schema": "public", + - "table": "table_integer", + - "columnnames": ["a", "b", "c", "d"], + - "columntypes": ["smallint", "smallint", "integer", "bigint"], + - "columnvalues": ["1", "32767", "2147483647", "9223372036854775807"] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_integer", + - "columnnames": ["a", "b", "c", "d"], + - "columntypes": ["smallint", "smallint", "integer", "bigint"], + - "columnvalues": ["2", "-32768", "-2147483648", "-9223372036854775808"] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_decimal", + - "columnnames": ["a", "b", "c"], + - "columntypes": ["real", "double precision", "numeric"], + - "columnvalues": ["Infinity", "Infinity", null] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_decimal", + - "columnnames": ["a", "b", "c"], + - "columntypes": ["real", "double precision", "numeric"], + - "columnvalues": ["-Infinity", "-Infinity", null] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_decimal", + - "columnnames": ["a", "b", "c"], + - "columntypes": ["real", "double precision", "numeric"], + - "columnvalues": ["NaN", "NaN", "NaN"] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_decimal", + - "columnnames": ["a", "b", "c"], + - "columntypes": ["real", "double precision", "numeric"], + - "columnvalues": ["123.456", "123456789.012345", "1234567890987654321.1234567890987654321"] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_decimal", + - "columnnames": ["a", "b", "c"], + - "columntypes": ["real", "double precision", "numeric"], + - "columnvalues": ["-123.456", "-123456789.012345", "-1234567890987654321.1234567890987654321"] + - } + - ,{ + - "kind": "insert", + - "schema": "public", + - "table": "table_others", + - "columnnames": ["a", "b", "c", "d", "e", "f", "g", "h", "i"], + - "columntypes": ["character(10)", "character varying(30)", "text", "bit varying(20)", "timestamp without time zone", "date", "boolean", "json", "tsvector"], + - "columnvalues": ["teste ", "testando", "um texto longo", "001110010101010", "Sat Nov 02 17:30:52 2013", "02-04-2013", true, "{ \"a\": 123 }", "'Old' 'Parr'"]+ - } + - ] + + data +----------------------------------------------------------------------------------------------------------------------- + { + + "change": [ + + { + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["1", "32767", "2147483647", "9223372036854775807"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": ["2", "-32768", "-2147483648", "-9223372036854775808"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["Infinity", "Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-Infinity", "-Infinity", null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["NaN", "NaN", "NaN"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["123.456", "123456789.012345", "1234567890987654321.1234567890987654321"] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": ["-123.456", "-123456789.012345", "-1234567890987654321.1234567890987654321"]+ + } + + ] + } (1 row) -SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); - data ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1'); + data +----------------------------------------------------------------------------------------------------------------- + { + + "change": [ + + { + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": [1, 32767, 2147483647, 9223372036854775807] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_integer", + + "columnnames": ["a", "b", "c", "d"], + + "columntypes": ["smallint", "smallint", "integer", "bigint"], + + "columnvalues": [2, -32768, -2147483648, -9223372036854775808] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [null, null, null] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [123.456, 123456789.012345, 1234567890987654321.1234567890987654321] + + } + + ,{ + + "kind": "insert", + + "schema": "public", + + "table": "table_decimal", + + "columnnames": ["a", "b", "c"], + + "columntypes": ["real", "double precision", "numeric"], + + "columnvalues": [-123.456, -123456789.012345, -1234567890987654321.1234567890987654321]+ + } + + ] + + } +(1 row) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'numeric-data-types-as-string', '1'); + data +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- {"action":"B"} {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"1"},{"name":"b","type":"smallint","value":"32767"},{"name":"c","type":"integer","value":"2147483647"},{"name":"d","type":"bigint","value":"9223372036854775807"}]} {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":"2"},{"name":"b","type":"smallint","value":"-32768"},{"name":"c","type":"integer","value":"-2147483648"},{"name":"d","type":"bigint","value":"-9223372036854775808"}]} @@ -115,9 +160,22 @@ SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'fo {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"NaN"},{"name":"b","type":"double precision","value":"NaN"},{"name":"c","type":"numeric","value":"NaN"}]} {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"123.456"},{"name":"b","type":"double precision","value":"123456789.012345"},{"name":"c","type":"numeric","value":"1234567890987654321.1234567890987654321"}]} {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":"-123.456"},{"name":"b","type":"double precision","value":"-123456789.012345"},{"name":"c","type":"numeric","value":"-1234567890987654321.1234567890987654321"}]} - {"action":"I","schema":"public","table":"table_others","columns":[{"name":"a","type":"character(10)","value":"teste "},{"name":"b","type":"character varying(30)","value":"testando"},{"name":"c","type":"text","value":"um texto longo"},{"name":"d","type":"bit varying(20)","value":"001110010101010"},{"name":"e","type":"timestamp without time zone","value":"Sat Nov 02 17:30:52 2013"},{"name":"f","type":"date","value":"02-04-2013"},{"name":"g","type":"boolean","value":true},{"name":"h","type":"json","value":"{ \"a\": 123 }"},{"name":"i","type":"tsvector","value":"'Old' 'Parr'"}]} {"action":"C"} -(10 rows) +(9 rows) + +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2'); + data +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"action":"B"} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":1},{"name":"b","type":"smallint","value":32767},{"name":"c","type":"integer","value":2147483647},{"name":"d","type":"bigint","value":9223372036854775807}]} + {"action":"I","schema":"public","table":"table_integer","columns":[{"name":"a","type":"smallint","value":2},{"name":"b","type":"smallint","value":-32768},{"name":"c","type":"integer","value":-2147483648},{"name":"d","type":"bigint","value":-9223372036854775808}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":null},{"name":"b","type":"double precision","value":null},{"name":"c","type":"numeric","value":null}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":123.456},{"name":"b","type":"double precision","value":123456789.012345},{"name":"c","type":"numeric","value":1234567890987654321.1234567890987654321}]} + {"action":"I","schema":"public","table":"table_decimal","columns":[{"name":"a","type":"real","value":-123.456},{"name":"b","type":"double precision","value":-123456789.012345},{"name":"c","type":"numeric","value":-1234567890987654321.1234567890987654321}]} + {"action":"C"} +(9 rows) SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); ?column? @@ -125,3 +183,5 @@ SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); stop (1 row) +DROP TABLE table_integer; +DROP TABLE table_decimal; diff --git a/sql/numeric_data_types_as_string.sql b/sql/numeric_data_types_as_string.sql index 73624f5..9a06cad 100644 --- a/sql/numeric_data_types_as_string.sql +++ b/sql/numeric_data_types_as_string.sql @@ -6,19 +6,8 @@ SET extra_float_digits = 0; -CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); +CREATE TABLE table_integer (a smallserial, b smallint, c int, d bigint); CREATE TABLE table_decimal (a real, b double precision, c numeric); -CREATE TABLE table_others ( -a char(10), -b varchar(30), -c text, -d bit varying(20), -e timestamp, -f date, -g boolean, -h json, -i tsvector -); SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot', 'wal2json'); @@ -31,11 +20,14 @@ INSERT INTO table_decimal (a, b) VALUES('-Infinity', '-Infinity'); INSERT INTO table_decimal (a, b, c) VALUES('NaN', 'NaN', 'NaN'); INSERT INTO table_decimal (a, b, c) VALUES(123.456, 123456789.012345, 1234567890987654321.1234567890987654321); INSERT INTO table_decimal (a, b, c) VALUES(-123.456, -123456789.012345, -1234567890987654321.1234567890987654321); - -INSERT INTO table_others (a, b, c, d, e, f, g, h, i) VALUES('teste', 'testando', 'um texto longo', B'001110010101010', '2013-11-02 17:30:52', '2013-02-04', true, '{ "a": 123 }', 'Old Old Parr'::tsvector); COMMIT; SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); -SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'pretty-print', '1', 'numeric-data-types-as-string', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '1', 'pretty-print', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2', 'numeric-data-types-as-string', '1'); +SELECT data FROM pg_logical_slot_peek_changes('regression_slot', NULL, NULL, 'format-version', '2'); SELECT 'stop' FROM pg_drop_replication_slot('regression_slot'); + +DROP TABLE table_integer; +DROP TABLE table_decimal; diff --git a/wal2json.c b/wal2json.c index ae143f9..98cfd1d 100644 --- a/wal2json.c +++ b/wal2json.c @@ -1272,9 +1272,12 @@ tuple_to_stringinfo(LogicalDecodingContext *ctx, TupleDesc tupdesc, HeapTuple tu * Data types are printed with quotes unless they are number, true, * false, null, an array or an object. * - * The NaN and Infinity are not valid JSON numeric values. Hence, - * regardless of sign they are represented as null when - * data->numeric_data_types_as_string is not used. + * The NaN and Infinity are not valid JSON symbols. Hence, + * regardless of sign they are represented as the string null. + * + * Exception to this is when data->numeric_data_types_as_string is + * true. In this case, numbers (including NaN and Infinity values) + * are printed with quotes. */ switch (typid) { @@ -1873,10 +1876,10 @@ pg_decode_change_v1(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, static void pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid typid) { - JsonDecodingData *data; - Oid typoutfunc; - bool isvarlena; - char *outstr; + JsonDecodingData *data; + Oid typoutfunc; + bool isvarlena; + char *outstr; data = ctx->output_plugin_private; @@ -1913,9 +1916,12 @@ pg_decode_write_value(LogicalDecodingContext *ctx, Datum value, bool isnull, Oid * Data types are printed with quotes unless they are number, true, false, * null, an array or an object. * - * The NaN and Infinity are not valid JSON numeric values. Hence, - * regardless of sign they are represented as null when - * data->numeric_data_types_as_string is not used. + * The NaN an Infinity are not valid JSON symbols. Hence, regardless of + * sign they are represented as the string null. + * + * Exception to this is when data->numeric_data_types_as_string is + * true. In this case, numbers (including NaN and Infinity values) + * are printed with quotes. */ switch (typid) {