From 340c8bc40b66c00f730bf8eb8fd9973d06aeed46 Mon Sep 17 00:00:00 2001 From: Yijun Zhao Date: Mon, 5 Feb 2024 23:34:38 +0800 Subject: [PATCH] support geometry type for create table --- Cargo.lock | 129 +++++++++--- Cargo.toml | 1 + src/common/io/src/geometry.rs | 15 +- src/query/ast/src/ast/expr.rs | 4 + src/query/ast/src/parser/expr.rs | 2 + src/query/ast/src/parser/token.rs | 2 + .../ast/tests/it/testdata/expr-error.txt | 2 +- .../ast/tests/it/testdata/statement-error.txt | 6 +- .../formats/src/field_decoder/fast_values.rs | 2 +- .../formats/src/field_decoder/json_ast.rs | 2 +- src/query/formats/src/field_decoder/nested.rs | 2 +- .../src/field_decoder/separated_text.rs | 2 +- src/query/formats/src/field_encoder/values.rs | 13 +- src/query/functions/Cargo.toml | 2 + src/query/functions/src/scalars/geometry.rs | 183 +++++++++++++++++- .../functions/tests/it/scalars/geometry.rs | 85 +++++++- .../functions/tests/it/scalars/parser.rs | 1 + .../it/scalars/testdata/function_list.txt | 19 +- .../tests/it/scalars/testdata/geometry.txt | 159 +++++++++++++-- .../mysql/writers/query_result_writer.rs | 1 + src/query/settings/src/settings_default.rs | 7 + .../settings/src/settings_getter_setter.rs | 8 + src/query/sql/src/planner/binder/ddl/table.rs | 16 ++ .../sql/src/planner/semantic/type_check.rs | 1 + src/tests/sqlsmith/src/sql_gen/ddl.rs | 4 + src/tests/sqlsmith/src/sql_gen/expr.rs | 1 + .../02_0060_function_geometry.test | 34 +++- 27 files changed, 628 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a9b003d0d7e6..1cd9ae0b15ea6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1027,6 +1027,29 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags 2.4.0", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.46", + "which", +] + [[package]] name = "binstring" version = "0.1.1" @@ -1652,9 +1675,9 @@ checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" [[package]] name = "cmake" -version = "0.1.49" +version = "0.1.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db34956e100b30725f2eb215f90d4871051239535632f84fea3bc92722c66b7c" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" dependencies = [ "cc", ] @@ -2763,9 +2786,11 @@ dependencies = [ "num-traits", "once_cell", "ordered-float 4.2.0", + "proj", "rand 0.8.5", "regex", "roaring", + "scroll 0.12.0", "sha1", "sha2", "simdutf8", @@ -5047,23 +5072,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.48.0", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] @@ -5689,7 +5703,7 @@ dependencies = [ "geo-types", "geojson", "log", - "scroll", + "scroll 0.11.0", "serde_json", "thiserror", "wkt", @@ -6641,7 +6655,7 @@ dependencies = [ "gix-command", "gix-config-value 0.12.5", "parking_lot 0.12.1", - "rustix 0.38.11", + "rustix 0.38.28", "thiserror", ] @@ -6654,7 +6668,7 @@ dependencies = [ "gix-command", "gix-config-value 0.13.0", "parking_lot 0.12.1", - "rustix 0.38.11", + "rustix 0.38.28", "thiserror", ] @@ -8241,9 +8255,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "linux-raw-sys" -version = "0.4.5" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -8852,7 +8866,7 @@ checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef" dependencies = [ "base64 0.21.0", "bigdecimal", - "bindgen", + "bindgen 0.68.1", "bitflags 2.4.0", "bitvec", "btoi", @@ -10271,6 +10285,30 @@ dependencies = [ "human_format", ] +[[package]] +name = "proj" +version = "0.27.2" +source = "git+https://github.com/ariesdevil/proj?rev=51e1c60#51e1c605d1e91bcd9b13a19722685f891a31d8b6" +dependencies = [ + "geo-types", + "libc", + "num-traits", + "proj-sys", + "thiserror", +] + +[[package]] +name = "proj-sys" +version = "0.23.2" +source = "git+https://github.com/ariesdevil/proj?rev=51e1c60#51e1c605d1e91bcd9b13a19722685f891a31d8b6" +dependencies = [ + "bindgen 0.68.1", + "cmake", + "flate2", + "pkg-config", + "tar", +] + [[package]] name = "prometheus" version = "0.13.3" @@ -11306,15 +11344,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.11" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ "bitflags 2.4.0", "errno", "libc", - "linux-raw-sys 0.4.5", - "windows-sys 0.48.0", + "linux-raw-sys 0.4.13", + "windows-sys 0.52.0", ] [[package]] @@ -11453,6 +11491,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +[[package]] +name = "scroll" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" + [[package]] name = "sct" version = "0.7.0" @@ -12471,6 +12515,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.12.7" @@ -13712,6 +13767,15 @@ dependencies = [ "windows-targets 0.48.0", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -13954,6 +14018,17 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914566e6413e7fa959cc394fb30e563ba80f3541fbd40816d4c05a0fc3f2a0f1" +dependencies = [ + "libc", + "linux-raw-sys 0.4.13", + "rustix 0.38.28", +] + [[package]] name = "xml-rs" version = "0.8.14" @@ -14023,7 +14098,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7cf70fdbc0de3f42b404f49b0d4686a82562254ea29ff0a155eef2f5430f4b0" dependencies = [ - "bindgen", + "bindgen 0.66.1", "cmake", ] diff --git a/Cargo.toml b/Cargo.toml index 43c18b6ed1a9e..0f8159c296405 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,3 +261,4 @@ sentry = { git = "https://github.com/getsentry/sentry-rust", rev = "6ef6d97" } micromarshal = { git = "https://github.com/ariesdevil/opensrv", rev = "6c96813" } async-backtrace = { git = "https://github.com/zhang2014/async-backtrace.git", rev = "dea4553" } geozero = { git = "https://github.com/georust/geozero", rev = "1d78b36" } +proj = { git = "https://github.com/ariesdevil/proj", rev = "51e1c60" } diff --git a/src/common/io/src/geometry.rs b/src/common/io/src/geometry.rs index 929226e95dede..c1ac894630bd6 100644 --- a/src/common/io/src/geometry.rs +++ b/src/common/io/src/geometry.rs @@ -19,22 +19,25 @@ use geozero::CoordDimensions; use geozero::ToWkb; use wkt::TryFromWkt; -pub fn parse_to_ewkb(buf: &[u8]) -> Result> { +pub fn parse_to_ewkb(buf: &[u8], srid: Option) -> Result> { let wkt = std::str::from_utf8(buf).map_err(|e| ErrorCode::GeometryError(e.to_string()))?; - let mut srid: Option = None; let input_wkt = wkt.trim().to_ascii_uppercase(); let parts: Vec<&str> = input_wkt.split(';').collect(); - if input_wkt.starts_with("SRID=") && parts.len() == 2 { - srid = Some(parts[0].replace("SRID=", "").parse()?); - } + let parsed_srid: Option = srid.or_else(|| { + if input_wkt.starts_with("SRID=") && parts.len() == 2 { + parts[0].replace("SRID=", "").parse().ok() + } else { + None + } + }); let geo_part = if parts.len() == 2 { parts[1] } else { parts[0] }; let geom: Geometry = Geometry::try_from_wkt_str(geo_part) .map_err(|e| ErrorCode::GeometryError(e.to_string()))?; - geom.to_ewkb(CoordDimensions::xy(), srid) + geom.to_ewkb(CoordDimensions::xy(), parsed_srid) .map_err(ErrorCode::from) } diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index 58826447a10cc..189c197d8cb59 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -323,6 +323,7 @@ pub enum TypeName { fields_type: Vec, }, Variant, + Geometry, Nullable(Box), NotNull(Box), } @@ -900,6 +901,9 @@ impl Display for TypeName { TypeName::Variant => { write!(f, "VARIANT")?; } + TypeName::Geometry => { + write!(f, "GEOMETRY")?; + } TypeName::Nullable(ty) => { write!(f, "{} NULL", ty)?; } diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 1e26b781326fb..69521ee7ba81d 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1586,6 +1586,7 @@ pub fn type_name(i: Input) -> IResult { rule! { ( STRING | VARCHAR | CHAR | CHARACTER | TEXT ) ~ ( "(" ~ ^#literal_u64 ~ ^")" )? }, ); let ty_variant = value(TypeName::Variant, rule! { VARIANT | JSON }); + let ty_geometry = value(TypeName::Geometry, rule! { GEOMETRY }); map_res( alt(( rule! { @@ -1614,6 +1615,7 @@ pub fn type_name(i: Input) -> IResult { | #ty_binary | #ty_string | #ty_variant + | #ty_geometry | #ty_nullable ) ~ #nullable? : "type name" }, )), diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 49a0928ac09f6..43e4fc945a58d 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -597,6 +597,8 @@ pub enum TokenKind { FUSE, #[token("GENERATED", ignore(ascii_case))] GENERATED, + #[token("GEOMETRY", ignore(ascii_case))] + GEOMETRY, #[token("GLOBAL", ignore(ascii_case))] GLOBAL, #[token("GRAPH", ignore(ascii_case))] diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index f34d1499703a7..3b895619304e8 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -30,7 +30,7 @@ error: --> SQL:1:14 | 1 | CAST(col1 AS foo) - | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` + | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `GEOMETRY`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` | | | while parsing `CAST(... AS ...)` | while parsing expression diff --git a/src/query/ast/tests/it/testdata/statement-error.txt b/src/query/ast/tests/it/testdata/statement-error.txt index 164bb61aaaf35..61d2e28e9e556 100644 --- a/src/query/ast/tests/it/testdata/statement-error.txt +++ b/src/query/ast/tests/it/testdata/statement-error.txt @@ -29,7 +29,7 @@ error: --> SQL:1:19 | 1 | create table a (c varch) - | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, or `JSON` + | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, `JSON`, or `GEOMETRY` | | | | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` | while parsing `CREATE [OR REPLACE] TABLE [IF NOT EXISTS] [.] [] []` @@ -42,7 +42,7 @@ error: --> SQL:1:25 | 1 | create table a (c tuple()) - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `NULLABLE`, , or + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, `NULLABLE`, , or | | | | | | | while parsing type name | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` @@ -70,7 +70,7 @@ error: --> SQL:1:38 | 1 | create table a (b tuple(c int, uint64)); - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, or `NULLABLE` + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `GEOMETRY`, or `NULLABLE` | | | | | | | while parsing TUPLE( , ...) | | | while parsing type name diff --git a/src/query/formats/src/field_decoder/fast_values.rs b/src/query/formats/src/field_decoder/fast_values.rs index f6178af3dadf4..a53457d5afccb 100644 --- a/src/query/formats/src/field_decoder/fast_values.rs +++ b/src/query/formats/src/field_decoder/fast_values.rs @@ -489,7 +489,7 @@ impl FastFieldDecoderValues { ) -> Result<()> { let mut buf = Vec::new(); self.read_string_inner(reader, &mut buf, positions)?; - let geom = parse_to_ewkb(&buf)?; + let geom = parse_to_ewkb(&buf, None)?; column.put_slice(geom.as_bytes()); column.commit_row(); Ok(()) diff --git a/src/query/formats/src/field_decoder/json_ast.rs b/src/query/formats/src/field_decoder/json_ast.rs index a291973956c7b..dff24aa27cc8e 100644 --- a/src/query/formats/src/field_decoder/json_ast.rs +++ b/src/query/formats/src/field_decoder/json_ast.rs @@ -330,7 +330,7 @@ impl FieldJsonAstDecoder { fn read_geometry(&self, column: &mut BinaryColumnBuilder, value: &Value) -> Result<()> { match value { Value::String(v) => { - let geom = parse_to_ewkb(v.as_bytes())?; + let geom = parse_to_ewkb(v.as_bytes(), None)?; column.put_slice(&geom); column.commit_row(); Ok(()) diff --git a/src/query/formats/src/field_decoder/nested.rs b/src/query/formats/src/field_decoder/nested.rs index 98dbdb50fe2d1..271f81283177d 100644 --- a/src/query/formats/src/field_decoder/nested.rs +++ b/src/query/formats/src/field_decoder/nested.rs @@ -328,7 +328,7 @@ impl NestedValues { ) -> Result<()> { let mut buf = Vec::new(); self.read_string_inner(reader, &mut buf)?; - let geom = parse_to_ewkb(&buf)?; + let geom = parse_to_ewkb(&buf, None)?; column.put_slice(geom.as_bytes()); column.commit_row(); Ok(()) diff --git a/src/query/formats/src/field_decoder/separated_text.rs b/src/query/formats/src/field_decoder/separated_text.rs index b63c238c73e65..11825883c4337 100644 --- a/src/query/formats/src/field_decoder/separated_text.rs +++ b/src/query/formats/src/field_decoder/separated_text.rs @@ -332,7 +332,7 @@ impl SeparatedTextDecoder { } fn read_geometry(&self, column: &mut BinaryColumnBuilder, data: &[u8]) -> Result<()> { - let geom = parse_to_ewkb(data)?; + let geom = parse_to_ewkb(data, None)?; column.put_slice(geom.as_bytes()); column.commit_row(); Ok(()) diff --git a/src/query/formats/src/field_encoder/values.rs b/src/query/formats/src/field_encoder/values.rs index cccbef7e9d1f1..a082927d118cd 100644 --- a/src/query/formats/src/field_encoder/values.rs +++ b/src/query/formats/src/field_encoder/values.rs @@ -32,9 +32,9 @@ use databend_common_io::constants::NAN_BYTES_LOWER; use databend_common_io::constants::NAN_BYTES_SNAKE; use databend_common_io::constants::NULL_BYTES_UPPER; use databend_common_io::constants::TRUE_BYTES_NUM; -use geozero::wkb::Ewkb; -use geozero::CoordDimensions; -use geozero::ToWkb; +use geozero::wkb::FromWkb; +use geozero::wkb::WkbDialect; +use geozero::wkt::Ewkt; use lexical_core::ToLexical; use micromarshal::Marshal; use micromarshal::Unmarshal; @@ -296,10 +296,9 @@ impl FieldEncoderValues { in_nested: bool, ) { let v = unsafe { column.index_unchecked(row_index) }; - let s = Ewkb(v.to_vec()) - .to_ewkb(CoordDimensions::xy(), None) - .unwrap(); - self.write_string_inner(&s, out_buf, in_nested); + let mut data_cursor = std::io::Cursor::new(v); + let s = Ewkt::from_wkb(&mut data_cursor, WkbDialect::Ewkb).unwrap(); + self.write_string_inner(s.0.as_bytes(), out_buf, in_nested); } fn write_array( diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index ea9ef48f69ab3..21ff3875c815d 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -51,9 +51,11 @@ ordered-float = { workspace = true, features = [ "serde", "rand", ] } +proj = { version = "0.27.2", features = ["geo-types", "bundled_proj"] } rand = { workspace = true } regex = { workspace = true } roaring = "0.10.1" +scroll = "0.12.0" sha1 = "0.10.5" sha2 = "0.10.6" simdutf8 = "0.1.4" diff --git a/src/query/functions/src/scalars/geometry.rs b/src/query/functions/src/scalars/geometry.rs index 9a05c1168267d..9609b642e2520 100644 --- a/src/query/functions/src/scalars/geometry.rs +++ b/src/query/functions/src/scalars/geometry.rs @@ -12,26 +12,50 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::io::Read; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; use databend_common_expression::types::geometry::GeometryType; +use databend_common_expression::types::Int32Type; use databend_common_expression::types::NumberType; use databend_common_expression::types::StringType; use databend_common_expression::types::F64; use databend_common_expression::vectorize_with_builder_1_arg; use databend_common_expression::vectorize_with_builder_2_arg; +use databend_common_expression::vectorize_with_builder_3_arg; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; +use databend_common_io::parse_to_ewkb; use geo::Geometry; use geo::Point; use geozero::wkb::Ewkb; use geozero::CoordDimensions; +use geozero::GeozeroGeometry; +use geozero::ToGeo; use geozero::ToWkb; use geozero::ToWkt; +use proj::Proj; +use proj::Transform; +use scroll::Endian; +use scroll::IOread; + +const GEO_TYPE_ID_MASK: u32 = 0x2000_0000; pub fn register(registry: &mut FunctionRegistry) { - registry.register_aliases("st_makepoint", &["st_point"]); + // aliases + registry.register_aliases("st_makegeompoint", &["st_geom_point"]); + registry.register_aliases("st_geometryfromwkt", &[ + "st_geomfromwkt", + "st_geometryfromewkt", + "st_geomfromewkt", + "st_geometryfromtext", + "st_geomfromtext", + ]); + // functions registry.register_passthrough_nullable_2_arg::, NumberType, GeometryType, _, _>( - "st_makepoint", + "st_makegeompoint", |_,_, _| FunctionDomain::Full, vectorize_with_builder_2_arg::, NumberType, GeometryType>(|longitude, latitude, builder, ctx| { if let Some(validity) = &ctx.validity { @@ -53,6 +77,44 @@ pub fn register(registry: &mut FunctionRegistry) { }) ); + registry.register_passthrough_nullable_1_arg::( + "st_geometryfromwkt", + |_, _| FunctionDomain::MayThrow, + vectorize_with_builder_1_arg::(|wkt, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + match parse_to_ewkb(wkt.as_bytes(), None) { + Ok(data) => builder.put_slice(data.as_slice()), + Err(e) => ctx.set_error(builder.len(), e.to_string()), + } + builder.commit_row(); + }), + ); + + registry.register_passthrough_nullable_2_arg::( + "st_geometryfromwkt", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::( + |wkt, srid, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + match parse_to_ewkb(wkt.as_bytes(), Some(srid)) { + Ok(data) => builder.put_slice(data.as_slice()), + Err(e) => ctx.set_error(builder.len(), e.to_string()), + } + builder.commit_row(); + }, + ), + ); + registry.register_passthrough_nullable_1_arg::( "to_string", |_, _| FunctionDomain::MayThrow, @@ -74,4 +136,121 @@ pub fn register(registry: &mut FunctionRegistry) { builder.commit_row(); }), ); + + registry.register_passthrough_nullable_2_arg::( + "st_transform", + |_, _, _| FunctionDomain::MayThrow, + vectorize_with_builder_2_arg::( + |original, srid, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + + #[allow(unused_assignments)] + let mut from_srid = 0; + + // All representations of the geo types supported by crates under the GeoRust organization, have not implemented srid(). + // Currently, the srid() of all types returns the default value `None`, so we need to parse it manually here. + match read_ewkb_srid(&mut std::io::Cursor::new(original)) { + Ok(srid) if srid.is_some() => from_srid = srid.unwrap(), + _ => { + ctx.set_error( + builder.len(), + ErrorCode::GeometryError(" input geometry must has the correct SRID") + .to_string(), + ); + builder.commit_row(); + return; + } + } + + let result = { + Ewkb(original).to_geo().map_err(ErrorCode::from).and_then( + |mut geom: Geometry| { + Proj::new_known_crs(&make_crs(from_srid), &make_crs(srid), None) + .map_err(|e| ErrorCode::GeometryError(e.to_string())) + .and_then(|proj| { + geom.transform(&proj) + .map_err(|e| ErrorCode::GeometryError(e.to_string())) + .and_then(|_| { + geom.to_ewkb(geom.dims(), Some(srid)) + .map_err(ErrorCode::from) + }) + }) + }, + ) + }; + + match result { + Ok(data) => { + builder.put_slice(data.as_slice()); + } + Err(e) => { + ctx.set_error(builder.len(), e.to_string()); + } + } + + builder.commit_row(); + }, + ), + ); + + registry.register_passthrough_nullable_3_arg::( + "st_transform", + |_, _, _,_| FunctionDomain::MayThrow, + vectorize_with_builder_3_arg::( + |original, from_srid, to_srid, builder, ctx| { + if let Some(validity) = &ctx.validity { + if !validity.get_bit(builder.len()) { + builder.commit_row(); + return; + } + } + + let result = { + Proj::new_known_crs(&make_crs(from_srid), &make_crs(to_srid), None) + .map_err(|e| ErrorCode::GeometryError(e.to_string())) + .and_then(|proj| { + let old = Ewkb(original.to_vec()); + Ewkb(old.to_ewkb(old.dims(), Some(from_srid)).unwrap()).to_geo().map_err(ErrorCode::from).and_then(|mut geom| { + geom.transform(&proj).map_err(|e|ErrorCode::GeometryError(e.to_string())).and_then(|_| { + geom.to_ewkb(old.dims(), Some(to_srid)).map_err(ErrorCode::from) + }) + }) + }) + }; + match result { + Ok(data) => { + builder.put_slice(data.as_slice()); + } + Err(e) => { + ctx.set_error(builder.len(), e.to_string()); + } + } + + builder.commit_row(); + }, + ), + ); +} + +fn make_crs(srid: i32) -> String { + format!("EPSG:{}", srid) +} + +fn read_ewkb_srid(raw: &mut R) -> Result> { + let byte_order = raw.ioread::()?; + let is_little_endian = byte_order != 0; + let endian = Endian::from(is_little_endian); + let type_id = raw.ioread_with::(endian)?; + let srid = if type_id & GEO_TYPE_ID_MASK == GEO_TYPE_ID_MASK { + Some(raw.ioread_with::(endian)?) + } else { + None + }; + + Ok(srid) } diff --git a/src/query/functions/tests/it/scalars/geometry.rs b/src/query/functions/tests/it/scalars/geometry.rs index b1e48ecb75065..da783ddde5d8d 100644 --- a/src/query/functions/tests/it/scalars/geometry.rs +++ b/src/query/functions/tests/it/scalars/geometry.rs @@ -15,6 +15,8 @@ use std::io::Write; use databend_common_expression::types::Float64Type; +use databend_common_expression::types::Int32Type; +use databend_common_expression::types::StringType; use databend_common_expression::FromData; use goldenfile::Mint; @@ -27,21 +29,94 @@ fn test_geometry() { test_st_makepoint(file); test_to_string(file); + test_st_geometryfromwkt(file); + test_st_transform(file); } fn test_st_makepoint(file: &mut impl Write) { - run_ast(file, "st_makepoint(7.0, 8.0)", &[]); - run_ast(file, "st_makepoint(7.0, -8.0)", &[]); - run_ast(file, "st_makepoint(a, b)", &[ + run_ast(file, "st_makegeompoint(7.0, 8.0)", &[]); + run_ast(file, "st_makegeompoint(7.0, -8.0)", &[]); + run_ast(file, "st_makegeompoint(a, b)", &[ ("a", Float64Type::from_data(vec![1.0, 2.0, 3.0])), ("b", Float64Type::from_data(vec![1.0, 2.0, 3.0])), ]); } fn test_to_string(file: &mut impl Write) { - run_ast(file, "to_string(st_makepoint(7.0, -8.0))", &[]); - run_ast(file, "to_string(st_makepoint(a, b))", &[ + run_ast(file, "to_string(st_makegeompoint(7.0, -8.0))", &[]); + run_ast(file, "to_string(st_makegeompoint(a, b))", &[ ("a", Float64Type::from_data(vec![1.0, 2.0, 3.0])), ("b", Float64Type::from_data(vec![1.0, 2.0, 3.0])), ]); } + +fn test_st_geometryfromwkt(file: &mut impl Write) { + // without srid + run_ast( + file, + "st_geometryfromwkt('POINT(389866.35 5819003.03)')", + &[], + ); + + run_ast(file, "st_geometryfromwkt(a)", &[( + "a", + StringType::from_data(vec![ + "POINT(389866.35 5819003.03)", + "POINT(389866.35 5819003.03)", + "POINT(389866.35 5819003.03)", + ]), + )]); + + // with srid + run_ast( + file, + "st_geometryfromwkt('POINT(389866.35 5819003.03)', 32633)", + &[], + ); + + run_ast(file, "st_geometryfromwkt(a, b)", &[ + ( + "a", + StringType::from_data(vec![ + "POINT(389866.35 5819003.03)", + "POINT(389866.35 5819003.03)", + "POINT(389866.35 5819003.03)", + ]), + ), + ("b", Int32Type::from_data(vec![32633, 4326, 3857])), + ]); +} + +fn test_st_transform(file: &mut impl Write) { + // just to_srid + run_ast( + file, + "st_transform(st_geomfromwkt('POINT(389866.35 5819003.03)', 32633), 3857)", + &[], + ); + + run_ast(file, "st_transform(st_geomfromwkt(a, b), c)", &[ + ( + "a", + StringType::from_data(vec!["POINT(389866.35 5819003.03)"]), + ), + ("b", Int32Type::from_data(vec![32633])), + ("c", Int32Type::from_data(vec![3857])), + ]); + + // from_srid and to_srid + run_ast( + file, + "st_transform(st_geomfromwkt('POINT(4.500212 52.161170)'), 4326, 28992)", + &[], + ); + + run_ast(file, "st_transform(st_geomfromwkt(a), b, c)", &[ + ( + "a", + StringType::from_data(vec!["POINT(4.500212 52.161170)"]), + ), + ("b", Int32Type::from_data(vec![4326])), + ("c", Int32Type::from_data(vec![28992])), + ]); +} diff --git a/src/query/functions/tests/it/scalars/parser.rs b/src/query/functions/tests/it/scalars/parser.rs index db8934468c201..2c64f10c4f586 100644 --- a/src/query/functions/tests/it/scalars/parser.rs +++ b/src/query/functions/tests/it/scalars/parser.rs @@ -552,6 +552,7 @@ fn transform_data_type(target_type: databend_common_ast::ast::TypeName) -> DataT DataType::Nullable(Box::new(transform_data_type(*inner_type))) } databend_common_ast::ast::TypeName::Variant => DataType::Variant, + databend_common_ast::ast::TypeName::Geometry => DataType::Geometry, databend_common_ast::ast::TypeName::NotNull(inner_type) => transform_data_type(*inner_type), } } diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index d619bdab59926..94cc9bc246c57 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -28,7 +28,12 @@ remove_nullable -> assume_not_null rlike -> regexp sha1 -> sha siphash -> siphash64 -st_point -> st_makepoint +st_geom_point -> st_makegeompoint +st_geometryfromewkt -> st_geometryfromwkt +st_geometryfromtext -> st_geometryfromwkt +st_geomfromewkt -> st_geometryfromwkt +st_geomfromtext -> st_geometryfromwkt +st_geomfromwkt -> st_geometryfromwkt str_to_date -> to_date str_to_timestamp -> to_timestamp substr_utf8 -> substr @@ -3163,8 +3168,16 @@ Functions overloads: 17 sqrt(Float32 NULL) :: Float64 NULL 18 sqrt(Float64) :: Float64 19 sqrt(Float64 NULL) :: Float64 NULL -0 st_makepoint(Float64, Float64) :: Geometry -1 st_makepoint(Float64 NULL, Float64 NULL) :: Geometry NULL +0 st_geometryfromwkt(String) :: Geometry +1 st_geometryfromwkt(String NULL) :: Geometry NULL +2 st_geometryfromwkt(String, Int32) :: Geometry +3 st_geometryfromwkt(String NULL, Int32 NULL) :: Geometry NULL +0 st_makegeompoint(Float64, Float64) :: Geometry +1 st_makegeompoint(Float64 NULL, Float64 NULL) :: Geometry NULL +0 st_transform(Geometry, Int32) :: Geometry +1 st_transform(Geometry NULL, Int32 NULL) :: Geometry NULL +2 st_transform(Geometry, Int32, Int32) :: Geometry +3 st_transform(Geometry NULL, Int32 NULL, Int32 NULL) :: Geometry NULL 0 strcmp(String, String) :: Int8 1 strcmp(String NULL, String NULL) :: Int8 NULL 0 string_to_h3(String) :: UInt64 diff --git a/src/query/functions/tests/it/scalars/testdata/geometry.txt b/src/query/functions/tests/it/scalars/testdata/geometry.txt index 0344e83fc17d0..4a9631f5bdd0c 100644 --- a/src/query/functions/tests/it/scalars/testdata/geometry.txt +++ b/src/query/functions/tests/it/scalars/testdata/geometry.txt @@ -1,24 +1,24 @@ -ast : st_makepoint(7.0, 8.0) -raw expr : st_makepoint(7.0, 8.0) -checked expr : st_makepoint(to_float64(7.0_d128(2,1)), to_float64(8.0_d128(2,1))) +ast : st_makegeompoint(7.0, 8.0) +raw expr : st_makegeompoint(7.0, 8.0) +checked expr : st_makegeompoint(to_float64(7.0_d128(2,1)), to_float64(8.0_d128(2,1))) optimized expr : "POINT(7 8)" output type : Geometry output domain : Undefined output : '"POINT(7 8)"' -ast : st_makepoint(7.0, -8.0) -raw expr : st_makepoint(7.0, minus(8.0)) -checked expr : st_makepoint(to_float64(7.0_d128(2,1)), to_float64(minus(8.0_d128(2,1)))) +ast : st_makegeompoint(7.0, -8.0) +raw expr : st_makegeompoint(7.0, minus(8.0)) +checked expr : st_makegeompoint(to_float64(7.0_d128(2,1)), to_float64(minus(8.0_d128(2,1)))) optimized expr : "POINT(7 -8)" output type : Geometry output domain : Undefined output : '"POINT(7 -8)"' -ast : st_makepoint(a, b) -raw expr : st_makepoint(a::Float64, b::Float64) -checked expr : st_makepoint(a, b) +ast : st_makegeompoint(a, b) +raw expr : st_makegeompoint(a::Float64, b::Float64) +checked expr : st_makegeompoint(a, b) evaluation: +--------+---------+---------+----------------+ | | a | b | Output | @@ -39,18 +39,18 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -ast : to_string(st_makepoint(7.0, -8.0)) -raw expr : to_string(st_makepoint(7.0, minus(8.0))) -checked expr : to_string(st_makepoint(to_float64(7.0_d128(2,1)), to_float64(minus(8.0_d128(2,1))))) +ast : to_string(st_makegeompoint(7.0, -8.0)) +raw expr : to_string(st_makegeompoint(7.0, minus(8.0))) +checked expr : to_string(st_makegeompoint(to_float64(7.0_d128(2,1)), to_float64(minus(8.0_d128(2,1))))) optimized expr : "POINT(7 -8)" output type : String output domain : {"POINT(7 -8)"..="POINT(7 -8)"} output : 'POINT(7 -8)' -ast : to_string(st_makepoint(a, b)) -raw expr : to_string(st_makepoint(a::Float64, b::Float64)) -checked expr : to_string(st_makepoint(a, b)) +ast : to_string(st_makegeompoint(a, b)) +raw expr : to_string(st_makegeompoint(a::Float64, b::Float64)) +checked expr : to_string(st_makegeompoint(a, b)) evaluation: +--------+---------+---------+--------------+ | | a | b | Output | @@ -71,3 +71,132 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------+ +ast : st_geometryfromwkt('POINT(389866.35 5819003.03)') +raw expr : st_geometryfromwkt('POINT(389866.35 5819003.03)') +checked expr : st_geometryfromwkt("POINT(389866.35 5819003.03)") +optimized expr : "POINT(389866.35 5819003.03)" +output type : Geometry +output domain : Undefined +output : '"POINT(389866.35 5819003.03)"' + + +ast : st_geometryfromwkt(a) +raw expr : st_geometryfromwkt(a::String) +checked expr : st_geometryfromwkt(a) +optimized expr : "POINT(389866.35 5819003.03)" +evaluation: ++--------+-----------------------------------------------------------------+---------------------------------+ +| | a | Output | ++--------+-----------------------------------------------------------------+---------------------------------+ +| Type | String | Geometry | +| Domain | {"POINT(389866.35 5819003.03)"..="POINT(389866.35 5819003.03)"} | Undefined | +| Row 0 | 'POINT(389866.35 5819003.03)' | '"POINT(389866.35 5819003.03)"' | +| Row 1 | 'POINT(389866.35 5819003.03)' | '"POINT(389866.35 5819003.03)"' | +| Row 2 | 'POINT(389866.35 5819003.03)' | '"POINT(389866.35 5819003.03)"' | ++--------+-----------------------------------------------------------------+---------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27, 54, 81] } | +| Output | BinaryColumn { data: 0x010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641010100000066666666a9cb17411f85ebc19e325641, offsets: [0, 21, 42, 63] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : st_geometryfromwkt('POINT(389866.35 5819003.03)', 32633) +raw expr : st_geometryfromwkt('POINT(389866.35 5819003.03)', 32633) +checked expr : st_geometryfromwkt("POINT(389866.35 5819003.03)", to_int32(32633_u16)) +optimized expr : "SRID=32633;POINT(389866.35 5819003.03)" +output type : Geometry +output domain : Undefined +output : '"SRID=32633;POINT(389866.35 5819003.03)"' + + +ast : st_geometryfromwkt(a, b) +raw expr : st_geometryfromwkt(a::String, b::Int32) +checked expr : st_geometryfromwkt(a, b) +optimized expr : st_geometryfromwkt("POINT(389866.35 5819003.03)", b) +evaluation: ++--------+-----------------------------------------------------------------+----------------+--------------------------------------------+ +| | a | b | Output | ++--------+-----------------------------------------------------------------+----------------+--------------------------------------------+ +| Type | String | Int32 | Geometry | +| Domain | {"POINT(389866.35 5819003.03)"..="POINT(389866.35 5819003.03)"} | {3857..=32633} | Unknown | +| Row 0 | 'POINT(389866.35 5819003.03)' | 32633 | '"SRID=32633;POINT(389866.35 5819003.03)"' | +| Row 1 | 'POINT(389866.35 5819003.03)' | 4326 | '"SRID=4326;POINT(389866.35 5819003.03)"' | +| Row 2 | 'POINT(389866.35 5819003.03)' | 3857 | '"SRID=3857;POINT(389866.35 5819003.03)"' | ++--------+-----------------------------------------------------------------+----------------+--------------------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27, 54, 81] } | +| b | Int32([32633, 4326, 3857]) | +| Output | BinaryColumn { data: 0x0101000020797f000066666666a9cb17411f85ebc19e3256410101000020e610000066666666a9cb17411f85ebc19e3256410101000020110f000066666666a9cb17411f85ebc19e325641, offsets: [0, 25, 50, 75] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + +ast : st_transform(st_geomfromwkt('POINT(389866.35 5819003.03)', 32633), 3857) +raw expr : st_transform(st_geomfromwkt('POINT(389866.35 5819003.03)', 32633), 3857) +checked expr : st_transform(st_geometryfromwkt("POINT(389866.35 5819003.03)", to_int32(32633_u16)), to_int32(3857_u16)) +optimized expr : "SRID=3857;POINT(1489140.0937656453 6892872.198680114)" +output type : Geometry +output domain : Undefined +output : '"SRID=3857;POINT(1489140.0937656453 6892872.198680114)"' + + +ast : st_transform(st_geomfromwkt(a, b), c) +raw expr : st_transform(st_geomfromwkt(a::String, b::Int32), c::Int32) +checked expr : st_transform(st_geometryfromwkt(a, b), c) +optimized expr : "SRID=3857;POINT(1489140.0937656453 6892872.198680114)" +evaluation: ++--------+-----------------------------------------------------------------+-----------------+---------------+-----------------------------------------------------------+ +| | a | b | c | Output | ++--------+-----------------------------------------------------------------+-----------------+---------------+-----------------------------------------------------------+ +| Type | String | Int32 | Int32 | Geometry | +| Domain | {"POINT(389866.35 5819003.03)"..="POINT(389866.35 5819003.03)"} | {32633..=32633} | {3857..=3857} | Undefined | +| Row 0 | 'POINT(389866.35 5819003.03)' | 32633 | 3857 | '"SRID=3857;POINT(1489140.0937656453 6892872.198680114)"' | ++--------+-----------------------------------------------------------------+-----------------+---------------+-----------------------------------------------------------+ +evaluation (internal): ++--------+---------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+---------------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x504f494e54283338393836362e333520353831393030332e303329, offsets: [0, 27] } | +| b | Int32([32633]) | +| c | Int32([3857]) | +| Output | BinaryColumn { data: 0x0101000020110f00007c060118f4b83641cc2cb70c524b5a41, offsets: [0, 25] } | ++--------+---------------------------------------------------------------------------------------------------+ + + +ast : st_transform(st_geomfromwkt('POINT(4.500212 52.161170)'), 4326, 28992) +raw expr : st_transform(st_geomfromwkt('POINT(4.500212 52.161170)'), 4326, 28992) +checked expr : st_transform(st_geometryfromwkt("POINT(4.500212 52.161170)"), to_int32(4326_u16), to_int32(28992_u16)) +optimized expr : "SRID=28992;POINT(94308.67050247335 464038.16865487053)" +output type : Geometry +output domain : Undefined +output : '"SRID=28992;POINT(94308.67050247335 464038.16865487053)"' + + +ast : st_transform(st_geomfromwkt(a), b, c) +raw expr : st_transform(st_geomfromwkt(a::String), b::Int32, c::Int32) +checked expr : st_transform(st_geometryfromwkt(a), b, c) +optimized expr : "SRID=28992;POINT(94308.67050247335 464038.16865487053)" +evaluation: ++--------+-------------------------------------------------------------+---------------+-----------------+------------------------------------------------------------+ +| | a | b | c | Output | ++--------+-------------------------------------------------------------+---------------+-----------------+------------------------------------------------------------+ +| Type | String | Int32 | Int32 | Geometry | +| Domain | {"POINT(4.500212 52.161170)"..="POINT(4.500212 52.161170)"} | {4326..=4326} | {28992..=28992} | Undefined | +| Row 0 | 'POINT(4.500212 52.161170)' | 4326 | 28992 | '"SRID=28992;POINT(94308.67050247335 464038.16865487053)"' | ++--------+-------------------------------------------------------------+---------------+-----------------+------------------------------------------------------------+ +evaluation (internal): ++--------+-----------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------+ +| a | StringColumn { data: 0x504f494e5428342e3530303231322035322e31363131373029, offsets: [0, 25] } | +| b | Int32([4326]) | +| c | Int32([28992]) | +| Output | BinaryColumn { data: 0x0101000020407100002fcd60ba4a06f740c5dcb3ac98521c41, offsets: [0, 25] } | ++--------+-----------------------------------------------------------------------------------------------+ + + diff --git a/src/query/service/src/servers/mysql/writers/query_result_writer.rs b/src/query/service/src/servers/mysql/writers/query_result_writer.rs index cb13a6b49ffd7..81bb17d7afb99 100644 --- a/src/query/service/src/servers/mysql/writers/query_result_writer.rs +++ b/src/query/service/src/servers/mysql/writers/query_result_writer.rs @@ -176,6 +176,7 @@ impl<'a, W: AsyncWrite + Send + Unpin> DFQueryResultWriter<'a, W> { DataType::Bitmap => Ok(ColumnType::MYSQL_TYPE_VARCHAR), DataType::Tuple(_) => Ok(ColumnType::MYSQL_TYPE_VARCHAR), DataType::Variant => Ok(ColumnType::MYSQL_TYPE_VARCHAR), + DataType::Geometry => Ok(ColumnType::MYSQL_TYPE_GEOMETRY), DataType::Decimal(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL), _ => Err(ErrorCode::Unimplemented(format!( "Unsupported column type:{:?}", diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 91c5c6c656e0f..a88daaa113b89 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -677,6 +677,13 @@ impl DefaultSettings { desc: "Cost factor of transmit via network for a data row", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=u64::MAX)), + }), + // this setting will be removed when geometry type stable. + ("enable_geo_create_table", DefaultSettingValue{ + value: UserSettingValue::UInt64(0), + desc: "Create and alter table with geometry type", + mode:SettingMode::Both, + range: Some(SettingRange::Numeric(0..=1)) }) ]); diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index d31477d81327f..3308d9da8d232 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -602,4 +602,12 @@ impl Settings { pub fn get_cost_factor_network_per_row(&self) -> Result { self.try_get_u64("cost_factor_network_per_row") } + + pub fn get_enable_geo_create_table(&self) -> Result { + Ok(self.try_get_u64("enable_geo_create_table")? != 0) + } + + pub fn set_enable_geo_create_table(&self, val: bool) -> Result<()> { + self.try_set_u64("enable_geo_create_table", u64::from(val)) + } } diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index b52eab41ba2ec..d8ef0b00eb7d7 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -44,6 +44,7 @@ use databend_common_ast::ast::ShowTablesStmt; use databend_common_ast::ast::Statement; use databend_common_ast::ast::TableReference; use databend_common_ast::ast::TruncateTableStmt; +use databend_common_ast::ast::TypeName; use databend_common_ast::ast::UndropTableStmt; use databend_common_ast::ast::UriLocation; use databend_common_ast::ast::VacuumDropTableStmt; @@ -468,6 +469,21 @@ impl Binder { options.insert("TRANSIENT".to_owned(), "T".to_owned()); } + // todo(geometry): remove this when geometry stable. + if let Some(CreateTableSource::Columns(cols)) = &source { + if cols + .iter() + .any(|col| matches!(col.data_type, TypeName::Geometry)) + && !self.ctx.get_settings().get_enable_geo_create_table()? + { + return Err(ErrorCode::GeometryError( + "Create table using the geometry type is an experimental feature. \ + You can `set enable_geo_create_table=1` to use this feature. \ + We do not guarantee its compatibility until we doc this feature.", + )); + } + } + // Build table schema let (schema, field_comments) = match (&source, &as_query) { (Some(source), None) => { diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 34c2ff0346372..1bb3e4a631ec1 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -3880,6 +3880,7 @@ pub fn resolve_type_name(type_name: &TypeName, not_null: bool) -> Result TableDataType::Variant, + TypeName::Geometry => TableDataType::Geometry, TypeName::NotNull(inner_type) => { let data_type = resolve_type_name(inner_type, not_null)?; data_type.remove_nullable() diff --git a/src/tests/sqlsmith/src/sql_gen/ddl.rs b/src/tests/sqlsmith/src/sql_gen/ddl.rs index 13b3dc96ca082..7d99be585ab53 100644 --- a/src/tests/sqlsmith/src/sql_gen/ddl.rs +++ b/src/tests/sqlsmith/src/sql_gen/ddl.rs @@ -262,6 +262,10 @@ fn gen_default_expr(type_name: &TypeName) -> Expr { span: None, lit: Literal::String("null".to_string()), }, + TypeName::Geometry => Expr::Literal { + span: None, + lit: Literal::String("POINT(0, 0)".to_string()), + }, TypeName::Nullable(_) => Expr::Literal { span: None, lit: Literal::Null, diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index 93c020a0904c4..a4a8eafb6efed 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -686,6 +686,7 @@ fn convert_to_type_name(ty: &DataType) -> TypeName { DataType::Bitmap => TypeName::Bitmap, DataType::Variant => TypeName::Variant, DataType::Binary => TypeName::Binary, + DataType::Geometry => TypeName::Geometry, DataType::Nullable(box inner_ty) => { TypeName::Nullable(Box::new(convert_to_type_name(inner_ty))) } diff --git a/tests/sqllogictests/suites/query/02_function/02_0060_function_geometry.test b/tests/sqllogictests/suites/query/02_function/02_0060_function_geometry.test index 5412f6e52a088..ffe5d18f01fc4 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0060_function_geometry.test +++ b/tests/sqllogictests/suites/query/02_function/02_0060_function_geometry.test @@ -8,16 +8,46 @@ statement ok INSERT INTO t1 VALUES(55.77922738, 37.63098076), (5.77922738, 7.63098076) query T -SELECT to_string(st_makepoint(lat, lon)) FROM t1 +SELECT to_string(st_makegeompoint(lat, lon)) FROM t1 ---- POINT(55.77922738 37.63098076) POINT(5.77922738 7.63098076) query T -SELECT st_makepoint(lat, lon)::String FROM t1 +SELECT st_makegeompoint(lat, lon)::String FROM t1 ---- POINT(55.77922738 37.63098076) POINT(5.77922738 7.63098076) statement ok DROP TABLE IF EXISTS t1 + +statement error 1801 +CREATE TABLE t1 (a int, g geometry) + +statement ok +SET enable_geo_create_table=1 + +statement ok +CREATE TABLE t1 (a int, g geometry) + +statement ok +INSERT INTO t1 VALUES(1, ST_GEOMFROMWKT('POINT(389866.35 5819003.03)', 32633)), (2, ST_GEOMFROMWKT('POINT(4.500212 52.161170)', 4326)) + +query IT +SELECT a, g FROM t1 +---- +1 SRID=32633;POINT(389866.35 5819003.03) +2 SRID=4326;POINT(4.500212 52.16117) + +query T +SELECT ST_TRANSFORM(g, 3857) AS transformed_geom FROM t1 +---- +SRID=3857;POINT(1489140.0937656453 6892872.198680114) +SRID=3857;POINT(500961.30830177927 6829319.683153116) + +statement ok +SET enable_geo_create_table=0 + +statement ok +DROP TABLE IF EXISTS t1