-
Notifications
You must be signed in to change notification settings - Fork 612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(frontend): initially introduce table def sql purification #19949
Changes from all commits
0989795
79e6808
b6a4a39
d50570c
c852c43
4280204
8f47274
a688e58
ae6a230
4559392
9dfc0d1
f39063a
154c590
9209bb8
15371b8
5a21eb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Test definition purification for `CREATE TABLE AS`, mainly focusing on the data types. | ||
statement ok | ||
create table ctas as select | ||
0::int as v0, | ||
1::decimal as v1, | ||
'2022-03-13 01:00:00'::timestamp as v2, | ||
'2022-03-13 01:00:00Z'::timestamptz as v3, | ||
array['foo', 'bar', 'null'] as v4, | ||
(1, (2, 3))::STRUCT<i BIGINT, j STRUCT<a BIGINT, b VARCHAR>> as v5, | ||
hex_to_int256('0x11') as v6, | ||
map{'key1': 1, 'key2': 2, 'key3': 3} as v7 | ||
; | ||
|
||
query TT | ||
show create table ctas; | ||
---- | ||
public.ctas CREATE TABLE ctas (v0 INT, v1 NUMERIC, v2 TIMESTAMP, v3 TIMESTAMP WITH TIME ZONE, v4 CHARACTER VARYING[], v5 STRUCT<i BIGINT, j STRUCT<a BIGINT, b CHARACTER VARYING>>, v6 rw_int256, v7 MAP(CHARACTER VARYING,INT)) | ||
|
||
statement ok | ||
drop table ctas; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
// Copyright 2025 RisingWave Labs | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use risingwave_common::bail; | ||
use risingwave_common::catalog::{ColumnCatalog, ColumnId}; | ||
use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; | ||
use risingwave_sqlparser::ast::*; | ||
|
||
use crate::error::Result; | ||
use crate::utils::data_type::DataTypeToAst as _; | ||
|
||
/// Try to restore missing column definitions and constraints in the persisted table definition, | ||
/// if the schema of the table is derived from external systems (like schema registry) or it's | ||
/// created by `CREATE TABLE AS`. | ||
/// | ||
/// Returns error if restoring failed, or called on non-`TableType::Table`, or the persisted | ||
/// definition is invalid. | ||
pub fn try_purify_table_create_sql_ast( | ||
mut base: Statement, | ||
columns: &[ColumnCatalog], | ||
row_id_index: Option<usize>, | ||
pk_column_ids: &[ColumnId], | ||
) -> Result<Statement> { | ||
let Statement::CreateTable { | ||
columns: column_defs, | ||
constraints, | ||
wildcard_idx, | ||
.. | ||
} = &mut base | ||
else { | ||
bail!("expect `CREATE TABLE` statement, found: `{:?}`", base); | ||
}; | ||
|
||
// Filter out columns that are not defined by users in SQL. | ||
let defined_columns = columns.iter().filter(|c| c.is_user_defined()); | ||
|
||
// If all columns are defined... | ||
// - either the schema is fully specified by the user, | ||
// - the persisted definition is already purified. | ||
// No need to proceed. | ||
if !column_defs.is_empty() && wildcard_idx.is_none() { | ||
let defined_columns_len = defined_columns.count(); | ||
if column_defs.len() != defined_columns_len { | ||
bail /* unlikely */ !( | ||
xxchan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"column count mismatch: defined {} columns, but {} columns in the definition", | ||
defined_columns_len, | ||
column_defs.len() | ||
); | ||
} | ||
|
||
return Ok(base); | ||
} | ||
|
||
// Schema inferred. Now derive the missing columns and constraints. | ||
// First, remove the wildcard from the definition. | ||
*wildcard_idx = None; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We might need tests for wildcard, and generated column, include column ,etc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
// Derive `ColumnDef` from `ColumnCatalog`. | ||
let mut purified_column_defs = Vec::new(); | ||
for column in defined_columns { | ||
// If the column is already defined in the persisted definition, keep it. | ||
if let Some(existing) = column_defs | ||
.iter() | ||
.find(|c| c.name.real_value() == column.name()) | ||
{ | ||
purified_column_defs.push(existing.clone()); | ||
continue; | ||
} | ||
|
||
if let Some(c) = &column.column_desc.generated_or_default_column { | ||
match c { | ||
GeneratedOrDefaultColumn::GeneratedColumn(_) => { | ||
unreachable!("generated column must not be inferred"); | ||
} | ||
GeneratedOrDefaultColumn::DefaultColumn(_) => { | ||
// TODO: convert `ExprNode` back to ast can be a bit tricky. | ||
// Fortunately, this case is rare as inferring default values is not | ||
// widely supported. | ||
bail /* unlikely */ !("purifying default value is not supported yet"); | ||
} | ||
} | ||
} | ||
|
||
let column_def = ColumnDef { | ||
name: column.name().into(), | ||
data_type: Some(column.data_type().to_ast()), | ||
collation: None, | ||
options: Vec::new(), // pk will be specified with table constraints | ||
}; | ||
purified_column_defs.push(column_def); | ||
} | ||
*column_defs = purified_column_defs; | ||
|
||
if row_id_index.is_none() { | ||
// User-defined primary key. | ||
let mut pk_columns = Vec::new(); | ||
|
||
for &id in pk_column_ids { | ||
let column = columns.iter().find(|c| c.column_id() == id).unwrap(); | ||
if !column.is_user_defined() { | ||
bail /* unlikely */ !( | ||
"primary key column \"{}\" is not user-defined", | ||
column.name() | ||
); | ||
} | ||
pk_columns.push(column.name().into()); | ||
} | ||
|
||
let pk_constraint = TableConstraint::Unique { | ||
name: None, | ||
columns: pk_columns, | ||
is_primary: true, | ||
}; | ||
|
||
// We don't support table constraints other than `PRIMARY KEY`, thus simply overwrite. | ||
BugenZhao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert!( | ||
constraints.len() <= 1 | ||
&& constraints.iter().all(|c| matches!( | ||
c, | ||
TableConstraint::Unique { | ||
is_primary: true, | ||
.. | ||
} | ||
)), | ||
"unexpected table constraints: {constraints:?}", | ||
); | ||
|
||
*constraints = vec![pk_constraint]; | ||
} | ||
|
||
Ok(base) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,11 +33,14 @@ use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; | |
use risingwave_pb::plan_common::DefaultColumnDesc; | ||
use risingwave_sqlparser::ast; | ||
use risingwave_sqlparser::parser::Parser; | ||
use thiserror_ext::AsReport as _; | ||
|
||
use super::purify::try_purify_table_create_sql_ast; | ||
use super::{ColumnId, DatabaseId, FragmentId, OwnedByUserCatalog, SchemaId, SinkId}; | ||
use crate::error::{ErrorCode, Result, RwError}; | ||
use crate::expr::ExprImpl; | ||
use crate::optimizer::property::Cardinality; | ||
use crate::session::current::notice_to_user; | ||
use crate::user::UserId; | ||
|
||
/// `TableCatalog` Includes full information about a table. | ||
|
@@ -273,6 +276,50 @@ impl TableVersion { | |
} | ||
} | ||
|
||
impl TableCatalog { | ||
/// Returns the SQL definition when the table was created, purified with best effort | ||
/// if it's a table. | ||
pub fn create_sql_purified(&self) -> String { | ||
self.create_sql_ast_purified() | ||
.map(|stmt| stmt.to_string()) | ||
.unwrap_or_else(|_| self.create_sql()) | ||
} | ||
Comment on lines
+282
to
+286
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we still return the result here? Not sure if we can guarantee this for all exist tables created previously. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By falling back to |
||
|
||
/// Returns the parsed SQL definition when the table was created, purified with best effort | ||
/// if it's a table. | ||
/// | ||
/// Returns error if it's invalid. | ||
pub fn create_sql_ast_purified(&self) -> Result<ast::Statement> { | ||
// Purification is only applicable to tables. | ||
if let TableType::Table = self.table_type() { | ||
let base = if self.definition.is_empty() { | ||
// Created by `CREATE TABLE AS`, create a skeleton `CREATE TABLE` statement. | ||
let name = ast::ObjectName(vec![self.name.as_str().into()]); | ||
ast::Statement::default_create_table(name) | ||
} else { | ||
self.create_sql_ast()? | ||
}; | ||
|
||
match try_purify_table_create_sql_ast( | ||
base, | ||
self.columns(), | ||
self.row_id_index, | ||
&self.pk_column_ids(), | ||
) { | ||
Ok(stmt) => return Ok(stmt), | ||
Err(e) => notice_to_user(format!( | ||
"error occurred while purifying definition for table \"{}\", \ | ||
results may be inaccurate: {}", | ||
self.name, | ||
e.as_report() | ||
)), | ||
} | ||
} | ||
|
||
self.create_sql_ast() | ||
} | ||
} | ||
|
||
impl TableCatalog { | ||
/// Get a reference to the table catalog's table id. | ||
pub fn id(&self) -> TableId { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After the change, how/is it possible (is it meaningful?) to get the "original create sql" for CTAS and schema registry? It seems useful for debugging/development purpose.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be done via inspecting
definition
field of the metadata (like through the dashboard). However, I suppose it's indeed not that meaningful, as altering a CTAS will cause the purified definition to be persisted.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: This is not the current behavior as purified definition is only used for
SHOW
but not the base for schema change replanning.