From e0d66c098a31abebfd6e0a77cc9f84ebe759fef2 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:38:26 +0900 Subject: [PATCH] Support TVF (#209) * Support TVF * Update testdata * Do go test --update * Fix indentation * Rename to update_with_safe_ml_predict.sql * Fix doc comment * Simplify implementation --- ast/ast.go | 56 +++ ast/pos.go | 24 ++ ast/sql.go | 17 + parser.go | 60 +++ .../input/dml/update_with_safe_ml_predict.sql | 12 + .../input/query/select_from_change_stream.sql | 6 + .../query/select_from_ml_predict_hint.sql | 7 + .../query/select_from_ml_predict_simple.sql | 2 + .../select_from_ml_predict_textbison.sql | 15 + .../dml/update_with_safe_ml_predict.sql.txt | 205 ++++++++++ .../query/select_from_change_stream.sql.txt | 86 ++++ .../query/select_from_ml_predict_hint.sql.txt | 175 +++++++++ .../select_from_ml_predict_simple.sql.txt | 80 ++++ .../select_from_ml_predict_textbison.sql.txt | 370 ++++++++++++++++++ .../select_from_change_stream.sql.txt | 86 ++++ .../select_from_ml_predict_hint.sql.txt | 175 +++++++++ .../select_from_ml_predict_simple.sql.txt | 80 ++++ .../select_from_ml_predict_textbison.sql.txt | 370 ++++++++++++++++++ .../update_with_safe_ml_predict.sql.txt | 205 ++++++++++ 19 files changed, 2031 insertions(+) create mode 100644 testdata/input/dml/update_with_safe_ml_predict.sql create mode 100644 testdata/input/query/select_from_change_stream.sql create mode 100644 testdata/input/query/select_from_ml_predict_hint.sql create mode 100644 testdata/input/query/select_from_ml_predict_simple.sql create mode 100644 testdata/input/query/select_from_ml_predict_textbison.sql create mode 100644 testdata/result/dml/update_with_safe_ml_predict.sql.txt create mode 100644 testdata/result/query/select_from_change_stream.sql.txt create mode 100644 testdata/result/query/select_from_ml_predict_hint.sql.txt create mode 100644 testdata/result/query/select_from_ml_predict_simple.sql.txt create mode 100644 testdata/result/query/select_from_ml_predict_textbison.sql.txt create mode 100644 testdata/result/statement/select_from_change_stream.sql.txt create mode 100644 testdata/result/statement/select_from_ml_predict_hint.sql.txt create mode 100644 testdata/result/statement/select_from_ml_predict_simple.sql.txt create mode 100644 testdata/result/statement/select_from_ml_predict_textbison.sql.txt create mode 100644 testdata/result/statement/update_with_safe_ml_predict.sql.txt diff --git a/ast/ast.go b/ast/ast.go index c731c2c3..2ec8b2a3 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -144,6 +144,7 @@ func (PathTableExpr) isTableExpr() {} func (SubQueryTableExpr) isTableExpr() {} func (ParenTableExpr) isTableExpr() {} func (Join) isTableExpr() {} +func (TVFCallExpr) isTableExpr() {} // JoinCondition represents condition part of JOIN expression. type JoinCondition interface { @@ -221,6 +222,15 @@ func (IntervalArg) isArg() {} func (SequenceArg) isArg() {} func (LambdaArg) isArg() {} +type TVFArg interface { + Node + isTVFArg() +} + +func (ExprArg) isTVFArg() {} +func (ModelArg) isTVFArg() {} +func (TableArg) isTVFArg() {} + // NullHandlingModifier represents IGNORE/RESPECT NULLS of aggregate function calls type NullHandlingModifier interface { Node @@ -1162,6 +1172,28 @@ type CallExpr struct { Hint *Hint // optional } +// TVFCallExpr is table-valued function call expression node. +// +// {{.Name | sql}}( +// {{.Args | sqlJoin ", "}} +// {{if len(.Args) > 0 && len(.NamedArgs) > 0}}, {{end}} +// {{.NamedArgs | sqlJoin ", "}} +// ) +// {{.Hint | sqlOpt}} +// {{.Sample | sqlOpt}} +type TVFCallExpr struct { + // pos = Name.pos + // end = (Sample ?? Hint).end || Rparen + 1 + + Rparen token.Pos // position of ")" + + Name *Path + Args []TVFArg + NamedArgs []*NamedArg + Hint *Hint // optional + Sample *TableSample // optional +} + // ExprArg is argument of the generic function call. // // {{.Expr | sql}} @@ -1212,6 +1244,30 @@ type LambdaArg struct { Expr Expr } +// ModelArg is argument of model function call. +// +// MODEL {{.Name | sql}} +type ModelArg struct { + // pos = Model + // end = Name.end + + Model token.Pos // position of "MODEL" keyword + + Name *Path +} + +// TableArg is TABLE table_name argument of table valued function call. +// +// TABLE {{.Name | sql}} +type TableArg struct { + // pos = Table + // end = Name.end + + Table token.Pos // position of "TABLE" keyword + + Name *Path +} + // NamedArg represents a name and value pair in named arguments // // {{.Name | sql}} => {{.Value | sql}} diff --git a/ast/pos.go b/ast/pos.go index bbfeb4ca..ee7bcc10 100644 --- a/ast/pos.go +++ b/ast/pos.go @@ -398,6 +398,14 @@ func (c *CallExpr) End() token.Pos { return posChoice(nodeEnd(wrapNode(c.Hint)), posAdd(c.Rparen, 1)) } +func (t *TVFCallExpr) Pos() token.Pos { + return nodePos(wrapNode(t.Name)) +} + +func (t *TVFCallExpr) End() token.Pos { + return posChoice(nodeEnd(nodeChoice(wrapNode(t.Sample), wrapNode(t.Hint))), posAdd(t.Rparen, 1)) +} + func (e *ExprArg) Pos() token.Pos { return nodePos(wrapNode(e.Expr)) } @@ -430,6 +438,22 @@ func (l *LambdaArg) End() token.Pos { return nodeEnd(wrapNode(l.Expr)) } +func (m *ModelArg) Pos() token.Pos { + return m.Model +} + +func (m *ModelArg) End() token.Pos { + return nodeEnd(wrapNode(m.Name)) +} + +func (t *TableArg) Pos() token.Pos { + return t.Table +} + +func (t *TableArg) End() token.Pos { + return nodeEnd(wrapNode(t.Name)) +} + func (n *NamedArg) Pos() token.Pos { return nodePos(wrapNode(n.Name)) } diff --git a/ast/sql.go b/ast/sql.go index 06e1e52b..2ce684c5 100644 --- a/ast/sql.go +++ b/ast/sql.go @@ -516,6 +516,15 @@ func (l *LambdaArg) SQL() string { l.Expr.SQL() } +func (c *TVFCallExpr) SQL() string { + return c.Name.SQL() + "(" + + sqlJoin(c.Args, ", ") + + strOpt(len(c.Args) > 0 && len(c.NamedArgs) > 0, ", ") + + sqlJoin(c.NamedArgs, ", ") + + ")" + + sqlOpt(" ", c.Hint, "") +} + func (n *NamedArg) SQL() string { return n.Name.SQL() + " => " + n.Value.SQL() } func (i *IgnoreNulls) SQL() string { return "IGNORE NULLS" } @@ -542,6 +551,14 @@ func (s *SequenceArg) SQL() string { return "SEQUENCE " + s.Expr.SQL() } +func (s *ModelArg) SQL() string { + return "MODEL " + s.Name.SQL() +} + +func (s *TableArg) SQL() string { + return "TABLE " + s.Name.SQL() +} + func (*CountStarExpr) SQL() string { return "COUNT(*)" } diff --git a/parser.go b/parser.go index cf3f2235..510c3dc9 100644 --- a/parser.go +++ b/parser.go @@ -817,6 +817,9 @@ func (p *Parser) parseSimpleTableExpr() ast.TableExpr { if p.Token.Kind == token.TokenIdent { ids := p.parseIdentOrPath() + if p.Token.Kind == "(" { + return p.parseTVFCallExpr(ids) + } if len(ids) == 1 { return p.parseTableNameSuffix(ids[0]) } @@ -826,6 +829,63 @@ func (p *Parser) parseSimpleTableExpr() ast.TableExpr { panic(p.errorfAtToken(&p.Token, "expected token: (, UNNEST, , but: %s", p.Token.Kind)) } +func (p *Parser) parseTVFCallExpr(ids []*ast.Ident) *ast.TVFCallExpr { + p.expect("(") + + var args []ast.TVFArg + if p.Token.Kind != ")" { + for !p.lookaheadNamedArg() { + args = append(args, p.parseTVFArg()) + if p.Token.Kind != "," { + break + } + p.nextToken() + } + } + + var namedArgs []*ast.NamedArg + if p.lookaheadNamedArg() { + namedArgs = parseCommaSeparatedList(p, p.parseNamedArg) + } + + rparen := p.expect(")").Pos + hint := p.tryParseHint() + sample := p.tryParseTableSample() + + return &ast.TVFCallExpr{ + Rparen: rparen, + Name: &ast.Path{Idents: ids}, + Args: args, + NamedArgs: namedArgs, + Hint: hint, + Sample: sample, + } +} + +func (p *Parser) parseTVFArg() ast.TVFArg { + pos := p.Token.Pos + switch { + case p.Token.IsKeywordLike("TABLE"): + p.nextToken() + path := p.parsePath() + + return &ast.TableArg{ + Table: pos, + Name: path, + } + case p.Token.IsKeywordLike("MODEL"): + p.nextToken() + path := p.parsePath() + + return &ast.ModelArg{ + Model: pos, + Name: path, + } + default: + return p.parseExprArg() + } +} + func (p *Parser) parseIdentOrPath() []*ast.Ident { ids := []*ast.Ident{p.parseIdent()} for p.Token.Kind == "." { diff --git a/testdata/input/dml/update_with_safe_ml_predict.sql b/testdata/input/dml/update_with_safe_ml_predict.sql new file mode 100644 index 00000000..67f25010 --- /dev/null +++ b/testdata/input/dml/update_with_safe_ml_predict.sql @@ -0,0 +1,12 @@ +-- https://cloud.google.com/spanner/docs/backfill-embeddings?hl=en#backfill +UPDATE products +SET + products.desc_embed = ( + SELECT embeddings.values + FROM SAFE.ML.PREDICT( + MODEL gecko_model, + (SELECT products.description AS content) + ) @{remote_udf_max_rows_per_rpc=200} + ), + products.desc_embed_model_version = 3 +WHERE products.desc_embed IS NULL \ No newline at end of file diff --git a/testdata/input/query/select_from_change_stream.sql b/testdata/input/query/select_from_change_stream.sql new file mode 100644 index 00000000..a649601a --- /dev/null +++ b/testdata/input/query/select_from_change_stream.sql @@ -0,0 +1,6 @@ +SELECT ChangeRecord FROM READ_SingersNameStream ( + start_timestamp => "2022-05-01T09:00:00Z", + end_timestamp => NULL, + partition_token => NULL, + heartbeat_milliseconds => 10000 +) \ No newline at end of file diff --git a/testdata/input/query/select_from_ml_predict_hint.sql b/testdata/input/query/select_from_ml_predict_hint.sql new file mode 100644 index 00000000..2d17a97c --- /dev/null +++ b/testdata/input/query/select_from_ml_predict_hint.sql @@ -0,0 +1,7 @@ +-- https://cloud.google.com/spanner/docs/ml-tutorial-generative-ai?hl=en#register_a_generative_ai_model_in_a_schema +SELECT content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT "Is 13 prime?" AS prompt), + STRUCT(256 AS maxOutputTokens, 0.2 AS temperature, 40 as topK, 0.95 AS topP) +) @{remote_udf_max_rows_per_rpc=1} \ No newline at end of file diff --git a/testdata/input/query/select_from_ml_predict_simple.sql b/testdata/input/query/select_from_ml_predict_simple.sql new file mode 100644 index 00000000..8220177e --- /dev/null +++ b/testdata/input/query/select_from_ml_predict_simple.sql @@ -0,0 +1,2 @@ +SELECT id, color, value +FROM ML.PREDICT(MODEL DiamondAppraise, TABLE Diamonds) \ No newline at end of file diff --git a/testdata/input/query/select_from_ml_predict_textbison.sql b/testdata/input/query/select_from_ml_predict_textbison.sql new file mode 100644 index 00000000..6352ba90 --- /dev/null +++ b/testdata/input/query/select_from_ml_predict_textbison.sql @@ -0,0 +1,15 @@ +SELECT product_id, product_name, content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT + product.id as product_id, + product.name as product_name, + CONCAT("Is this product safe for infants?", "\n", + "Product Name: ", product.name, "\n", + "Category Name: ", category.name, "\n", + "Product Description:", product.description) AS prompt + FROM + Products AS product JOIN Categories AS category + ON product.category_id = category.id), + STRUCT(100 AS maxOutputTokens) +) @{remote_udf_max_rows_per_rpc=1} \ No newline at end of file diff --git a/testdata/result/dml/update_with_safe_ml_predict.sql.txt b/testdata/result/dml/update_with_safe_ml_predict.sql.txt new file mode 100644 index 00000000..423cf296 --- /dev/null +++ b/testdata/result/dml/update_with_safe_ml_predict.sql.txt @@ -0,0 +1,205 @@ +--- update_with_safe_ml_predict.sql +-- https://cloud.google.com/spanner/docs/backfill-embeddings?hl=en#backfill +UPDATE products +SET + products.desc_embed = ( + SELECT embeddings.values + FROM SAFE.ML.PREDICT( + MODEL gecko_model, + (SELECT products.description AS content) + ) @{remote_udf_max_rows_per_rpc=200} + ), + products.desc_embed_model_version = 3 +WHERE products.desc_embed IS NULL +--- AST +&ast.Update{ + Update: 76, + TableName: &ast.Ident{ + NamePos: 83, + NameEnd: 91, + Name: "products", + }, + Updates: []*ast.UpdateItem{ + &ast.UpdateItem{ + Path: []*ast.Ident{ + &ast.Ident{ + NamePos: 100, + NameEnd: 108, + Name: "products", + }, + &ast.Ident{ + NamePos: 109, + NameEnd: 119, + Name: "desc_embed", + }, + }, + DefaultExpr: &ast.DefaultExpr{ + DefaultPos: -1, + Expr: &ast.ScalarSubQuery{ + Lparen: 122, + Rparen: 333, + Query: &ast.Select{ + Select: 132, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 139, + NameEnd: 149, + Name: "embeddings", + }, + &ast.Ident{ + NamePos: 150, + NameEnd: 156, + Name: "values", + }, + }, + }, + }, + }, + From: &ast.From{ + From: 165, + Source: &ast.TVFCallExpr{ + Rparen: 292, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 170, + NameEnd: 174, + Name: "SAFE", + }, + &ast.Ident{ + NamePos: 175, + NameEnd: 177, + Name: "ML", + }, + &ast.Ident{ + NamePos: 178, + NameEnd: 185, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 203, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 209, + NameEnd: 220, + Name: "gecko_model", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 238, + Rparen: 277, + Query: &ast.Select{ + Select: 239, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 246, + NameEnd: 254, + Name: "products", + }, + &ast.Ident{ + NamePos: 255, + NameEnd: 266, + Name: "description", + }, + }, + }, + As: &ast.AsAlias{ + As: 267, + Alias: &ast.Ident{ + NamePos: 270, + NameEnd: 277, + Name: "content", + }, + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 294, + Rbrace: 327, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 296, + NameEnd: 323, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 324, + ValueEnd: 327, + Base: 10, + Value: "200", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &ast.UpdateItem{ + Path: []*ast.Ident{ + &ast.Ident{ + NamePos: 340, + NameEnd: 348, + Name: "products", + }, + &ast.Ident{ + NamePos: 349, + NameEnd: 373, + Name: "desc_embed_model_version", + }, + }, + DefaultExpr: &ast.DefaultExpr{ + DefaultPos: -1, + Expr: &ast.IntLiteral{ + ValuePos: 376, + ValueEnd: 377, + Base: 10, + Value: "3", + }, + }, + }, + }, + Where: &ast.Where{ + Where: 378, + Expr: &ast.IsNullExpr{ + Null: 407, + Left: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 384, + NameEnd: 392, + Name: "products", + }, + &ast.Ident{ + NamePos: 393, + NameEnd: 403, + Name: "desc_embed", + }, + }, + }, + }, + }, +} + +--- SQL +UPDATE products SET products.desc_embed = (SELECT embeddings.values FROM SAFE.ML.PREDICT(MODEL gecko_model, (SELECT products.description AS content)) @{remote_udf_max_rows_per_rpc=200}), products.desc_embed_model_version = 3 WHERE products.desc_embed IS NULL diff --git a/testdata/result/query/select_from_change_stream.sql.txt b/testdata/result/query/select_from_change_stream.sql.txt new file mode 100644 index 00000000..c94185fd --- /dev/null +++ b/testdata/result/query/select_from_change_stream.sql.txt @@ -0,0 +1,86 @@ +--- select_from_change_stream.sql +SELECT ChangeRecord FROM READ_SingersNameStream ( + start_timestamp => "2022-05-01T09:00:00Z", + end_timestamp => NULL, + partition_token => NULL, + heartbeat_milliseconds => 10000 +) +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 19, + Name: "ChangeRecord", + }, + }, + }, + From: &ast.From{ + From: 20, + Source: &ast.TVFCallExpr{ + Rparen: 181, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 25, + NameEnd: 47, + Name: "READ_SingersNameStream", + }, + }, + }, + NamedArgs: []*ast.NamedArg{ + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 52, + NameEnd: 67, + Name: "start_timestamp", + }, + Value: &ast.StringLiteral{ + ValuePos: 71, + ValueEnd: 93, + Value: "2022-05-01T09:00:00Z", + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 97, + NameEnd: 110, + Name: "end_timestamp", + }, + Value: &ast.NullLiteral{ + Null: 114, + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 122, + NameEnd: 137, + Name: "partition_token", + }, + Value: &ast.NullLiteral{ + Null: 141, + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 149, + NameEnd: 171, + Name: "heartbeat_milliseconds", + }, + Value: &ast.IntLiteral{ + ValuePos: 175, + ValueEnd: 180, + Base: 10, + Value: "10000", + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT ChangeRecord FROM READ_SingersNameStream(start_timestamp => "2022-05-01T09:00:00Z", end_timestamp => NULL, partition_token => NULL, heartbeat_milliseconds => 10000) diff --git a/testdata/result/query/select_from_ml_predict_hint.sql.txt b/testdata/result/query/select_from_ml_predict_hint.sql.txt new file mode 100644 index 00000000..cc99b9a8 --- /dev/null +++ b/testdata/result/query/select_from_ml_predict_hint.sql.txt @@ -0,0 +1,175 @@ +--- select_from_ml_predict_hint.sql +-- https://cloud.google.com/spanner/docs/ml-tutorial-generative-ai?hl=en#register_a_generative_ai_model_in_a_schema +SELECT content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT "Is 13 prime?" AS prompt), + STRUCT(256 AS maxOutputTokens, 0.2 AS temperature, 40 as topK, 0.95 AS topP) +) @{remote_udf_max_rows_per_rpc=1} +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Select: 116, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 123, + NameEnd: 130, + Name: "content", + }, + }, + }, + From: &ast.From{ + From: 131, + Source: &ast.TVFCallExpr{ + Rparen: 289, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 136, + NameEnd: 138, + Name: "ML", + }, + &ast.Ident{ + NamePos: 139, + NameEnd: 146, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 152, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 158, + NameEnd: 167, + Name: "TextBison", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 173, + Rparen: 205, + Query: &ast.Select{ + Select: 174, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.StringLiteral{ + ValuePos: 181, + ValueEnd: 195, + Value: "Is 13 prime?", + }, + As: &ast.AsAlias{ + As: 196, + Alias: &ast.Ident{ + NamePos: 199, + NameEnd: 205, + Name: "prompt", + }, + }, + }, + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.TypelessStructLiteral{ + Struct: 212, + Rparen: 287, + Values: []ast.TypelessStructLiteralArg{ + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 219, + ValueEnd: 222, + Base: 10, + Value: "256", + }, + As: &ast.AsAlias{ + As: 223, + Alias: &ast.Ident{ + NamePos: 226, + NameEnd: 241, + Name: "maxOutputTokens", + }, + }, + }, + &ast.Alias{ + Expr: &ast.FloatLiteral{ + ValuePos: 243, + ValueEnd: 246, + Value: "0.2", + }, + As: &ast.AsAlias{ + As: 247, + Alias: &ast.Ident{ + NamePos: 250, + NameEnd: 261, + Name: "temperature", + }, + }, + }, + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 263, + ValueEnd: 265, + Base: 10, + Value: "40", + }, + As: &ast.AsAlias{ + As: 266, + Alias: &ast.Ident{ + NamePos: 269, + NameEnd: 273, + Name: "topK", + }, + }, + }, + &ast.Alias{ + Expr: &ast.FloatLiteral{ + ValuePos: 275, + ValueEnd: 279, + Value: "0.95", + }, + As: &ast.AsAlias{ + As: 280, + Alias: &ast.Ident{ + NamePos: 283, + NameEnd: 287, + Name: "topP", + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 291, + Rbrace: 322, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 293, + NameEnd: 320, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 321, + ValueEnd: 322, + Base: 10, + Value: "1", + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT content FROM ML.PREDICT(MODEL TextBison, (SELECT "Is 13 prime\?" AS prompt), STRUCT(256 AS maxOutputTokens, 0.2 AS temperature, 40 AS topK, 0.95 AS topP)) @{remote_udf_max_rows_per_rpc=1} diff --git a/testdata/result/query/select_from_ml_predict_simple.sql.txt b/testdata/result/query/select_from_ml_predict_simple.sql.txt new file mode 100644 index 00000000..06c7e7df --- /dev/null +++ b/testdata/result/query/select_from_ml_predict_simple.sql.txt @@ -0,0 +1,80 @@ +--- select_from_ml_predict_simple.sql +SELECT id, color, value +FROM ML.PREDICT(MODEL DiamondAppraise, TABLE Diamonds) +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 9, + Name: "id", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 11, + NameEnd: 16, + Name: "color", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 18, + NameEnd: 23, + Name: "value", + }, + }, + }, + From: &ast.From{ + From: 24, + Source: &ast.TVFCallExpr{ + Rparen: 77, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 29, + NameEnd: 31, + Name: "ML", + }, + &ast.Ident{ + NamePos: 32, + NameEnd: 39, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 40, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 46, + NameEnd: 61, + Name: "DiamondAppraise", + }, + }, + }, + }, + &ast.TableArg{ + Table: 63, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 69, + NameEnd: 77, + Name: "Diamonds", + }, + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT id, color, value FROM ML.PREDICT(MODEL DiamondAppraise, TABLE Diamonds) diff --git a/testdata/result/query/select_from_ml_predict_textbison.sql.txt b/testdata/result/query/select_from_ml_predict_textbison.sql.txt new file mode 100644 index 00000000..1d267c79 --- /dev/null +++ b/testdata/result/query/select_from_ml_predict_textbison.sql.txt @@ -0,0 +1,370 @@ +--- select_from_ml_predict_textbison.sql +SELECT product_id, product_name, content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT + product.id as product_id, + product.name as product_name, + CONCAT("Is this product safe for infants?", "\n", + "Product Name: ", product.name, "\n", + "Category Name: ", category.name, "\n", + "Product Description:", product.description) AS prompt + FROM + Products AS product JOIN Categories AS category + ON product.category_id = category.id), + STRUCT(100 AS maxOutputTokens) +) @{remote_udf_max_rows_per_rpc=1} +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 17, + Name: "product_id", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 19, + NameEnd: 31, + Name: "product_name", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 33, + NameEnd: 40, + Name: "content", + }, + }, + }, + From: &ast.From{ + From: 41, + Source: &ast.TVFCallExpr{ + Rparen: 580, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 46, + NameEnd: 48, + Name: "ML", + }, + &ast.Ident{ + NamePos: 49, + NameEnd: 56, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 62, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 68, + NameEnd: 77, + Name: "TextBison", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 83, + Rparen: 542, + Query: &ast.Select{ + Select: 84, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 100, + NameEnd: 107, + Name: "product", + }, + &ast.Ident{ + NamePos: 108, + NameEnd: 110, + Name: "id", + }, + }, + }, + As: &ast.AsAlias{ + As: 111, + Alias: &ast.Ident{ + NamePos: 114, + NameEnd: 124, + Name: "product_id", + }, + }, + }, + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 135, + NameEnd: 142, + Name: "product", + }, + &ast.Ident{ + NamePos: 143, + NameEnd: 147, + Name: "name", + }, + }, + }, + As: &ast.AsAlias{ + As: 148, + Alias: &ast.Ident{ + NamePos: 151, + NameEnd: 163, + Name: "product_name", + }, + }, + }, + &ast.Alias{ + Expr: &ast.CallExpr{ + Rparen: 393, + Func: &ast.Ident{ + NamePos: 174, + NameEnd: 180, + Name: "CONCAT", + }, + Args: []ast.Arg{ + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 181, + ValueEnd: 216, + Value: "Is this product safe for infants?", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 218, + ValueEnd: 222, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 240, + ValueEnd: 256, + Value: "Product Name: ", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 258, + NameEnd: 265, + Name: "product", + }, + &ast.Ident{ + NamePos: 266, + NameEnd: 270, + Name: "name", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 272, + ValueEnd: 276, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 294, + ValueEnd: 311, + Value: "Category Name: ", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 313, + NameEnd: 321, + Name: "category", + }, + &ast.Ident{ + NamePos: 322, + NameEnd: 326, + Name: "name", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 328, + ValueEnd: 332, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 350, + ValueEnd: 372, + Value: "Product Description:", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 374, + NameEnd: 381, + Name: "product", + }, + &ast.Ident{ + NamePos: 382, + NameEnd: 393, + Name: "description", + }, + }, + }, + }, + }, + }, + As: &ast.AsAlias{ + As: 395, + Alias: &ast.Ident{ + NamePos: 398, + NameEnd: 404, + Name: "prompt", + }, + }, + }, + }, + From: &ast.From{ + From: 410, + Source: &ast.Join{ + Op: "INNER JOIN", + Left: &ast.TableName{ + Table: &ast.Ident{ + NamePos: 424, + NameEnd: 432, + Name: "Products", + }, + As: &ast.AsAlias{ + As: 433, + Alias: &ast.Ident{ + NamePos: 436, + NameEnd: 443, + Name: "product", + }, + }, + }, + Right: &ast.TableName{ + Table: &ast.Ident{ + NamePos: 449, + NameEnd: 459, + Name: "Categories", + }, + As: &ast.AsAlias{ + As: 460, + Alias: &ast.Ident{ + NamePos: 463, + NameEnd: 471, + Name: "category", + }, + }, + }, + Cond: &ast.On{ + On: 506, + Expr: &ast.BinaryExpr{ + Op: "=", + Left: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 509, + NameEnd: 516, + Name: "product", + }, + &ast.Ident{ + NamePos: 517, + NameEnd: 528, + Name: "category_id", + }, + }, + }, + Right: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 531, + NameEnd: 539, + Name: "category", + }, + &ast.Ident{ + NamePos: 540, + NameEnd: 542, + Name: "id", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.TypelessStructLiteral{ + Struct: 549, + Rparen: 578, + Values: []ast.TypelessStructLiteralArg{ + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 556, + ValueEnd: 559, + Base: 10, + Value: "100", + }, + As: &ast.AsAlias{ + As: 560, + Alias: &ast.Ident{ + NamePos: 563, + NameEnd: 578, + Name: "maxOutputTokens", + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 582, + Rbrace: 613, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 584, + NameEnd: 611, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 612, + ValueEnd: 613, + Base: 10, + Value: "1", + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT product_id, product_name, content FROM ML.PREDICT(MODEL TextBison, (SELECT product.id AS product_id, product.name AS product_name, CONCAT("Is this product safe for infants\?", "\n", "Product Name: ", product.name, "\n", "Category Name: ", category.name, "\n", "Product Description:", product.description) AS prompt FROM Products AS product INNER JOIN Categories AS category ON product.category_id = category.id), STRUCT(100 AS maxOutputTokens)) @{remote_udf_max_rows_per_rpc=1} diff --git a/testdata/result/statement/select_from_change_stream.sql.txt b/testdata/result/statement/select_from_change_stream.sql.txt new file mode 100644 index 00000000..c94185fd --- /dev/null +++ b/testdata/result/statement/select_from_change_stream.sql.txt @@ -0,0 +1,86 @@ +--- select_from_change_stream.sql +SELECT ChangeRecord FROM READ_SingersNameStream ( + start_timestamp => "2022-05-01T09:00:00Z", + end_timestamp => NULL, + partition_token => NULL, + heartbeat_milliseconds => 10000 +) +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 19, + Name: "ChangeRecord", + }, + }, + }, + From: &ast.From{ + From: 20, + Source: &ast.TVFCallExpr{ + Rparen: 181, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 25, + NameEnd: 47, + Name: "READ_SingersNameStream", + }, + }, + }, + NamedArgs: []*ast.NamedArg{ + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 52, + NameEnd: 67, + Name: "start_timestamp", + }, + Value: &ast.StringLiteral{ + ValuePos: 71, + ValueEnd: 93, + Value: "2022-05-01T09:00:00Z", + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 97, + NameEnd: 110, + Name: "end_timestamp", + }, + Value: &ast.NullLiteral{ + Null: 114, + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 122, + NameEnd: 137, + Name: "partition_token", + }, + Value: &ast.NullLiteral{ + Null: 141, + }, + }, + &ast.NamedArg{ + Name: &ast.Ident{ + NamePos: 149, + NameEnd: 171, + Name: "heartbeat_milliseconds", + }, + Value: &ast.IntLiteral{ + ValuePos: 175, + ValueEnd: 180, + Base: 10, + Value: "10000", + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT ChangeRecord FROM READ_SingersNameStream(start_timestamp => "2022-05-01T09:00:00Z", end_timestamp => NULL, partition_token => NULL, heartbeat_milliseconds => 10000) diff --git a/testdata/result/statement/select_from_ml_predict_hint.sql.txt b/testdata/result/statement/select_from_ml_predict_hint.sql.txt new file mode 100644 index 00000000..cc99b9a8 --- /dev/null +++ b/testdata/result/statement/select_from_ml_predict_hint.sql.txt @@ -0,0 +1,175 @@ +--- select_from_ml_predict_hint.sql +-- https://cloud.google.com/spanner/docs/ml-tutorial-generative-ai?hl=en#register_a_generative_ai_model_in_a_schema +SELECT content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT "Is 13 prime?" AS prompt), + STRUCT(256 AS maxOutputTokens, 0.2 AS temperature, 40 as topK, 0.95 AS topP) +) @{remote_udf_max_rows_per_rpc=1} +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Select: 116, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 123, + NameEnd: 130, + Name: "content", + }, + }, + }, + From: &ast.From{ + From: 131, + Source: &ast.TVFCallExpr{ + Rparen: 289, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 136, + NameEnd: 138, + Name: "ML", + }, + &ast.Ident{ + NamePos: 139, + NameEnd: 146, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 152, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 158, + NameEnd: 167, + Name: "TextBison", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 173, + Rparen: 205, + Query: &ast.Select{ + Select: 174, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.StringLiteral{ + ValuePos: 181, + ValueEnd: 195, + Value: "Is 13 prime?", + }, + As: &ast.AsAlias{ + As: 196, + Alias: &ast.Ident{ + NamePos: 199, + NameEnd: 205, + Name: "prompt", + }, + }, + }, + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.TypelessStructLiteral{ + Struct: 212, + Rparen: 287, + Values: []ast.TypelessStructLiteralArg{ + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 219, + ValueEnd: 222, + Base: 10, + Value: "256", + }, + As: &ast.AsAlias{ + As: 223, + Alias: &ast.Ident{ + NamePos: 226, + NameEnd: 241, + Name: "maxOutputTokens", + }, + }, + }, + &ast.Alias{ + Expr: &ast.FloatLiteral{ + ValuePos: 243, + ValueEnd: 246, + Value: "0.2", + }, + As: &ast.AsAlias{ + As: 247, + Alias: &ast.Ident{ + NamePos: 250, + NameEnd: 261, + Name: "temperature", + }, + }, + }, + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 263, + ValueEnd: 265, + Base: 10, + Value: "40", + }, + As: &ast.AsAlias{ + As: 266, + Alias: &ast.Ident{ + NamePos: 269, + NameEnd: 273, + Name: "topK", + }, + }, + }, + &ast.Alias{ + Expr: &ast.FloatLiteral{ + ValuePos: 275, + ValueEnd: 279, + Value: "0.95", + }, + As: &ast.AsAlias{ + As: 280, + Alias: &ast.Ident{ + NamePos: 283, + NameEnd: 287, + Name: "topP", + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 291, + Rbrace: 322, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 293, + NameEnd: 320, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 321, + ValueEnd: 322, + Base: 10, + Value: "1", + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT content FROM ML.PREDICT(MODEL TextBison, (SELECT "Is 13 prime\?" AS prompt), STRUCT(256 AS maxOutputTokens, 0.2 AS temperature, 40 AS topK, 0.95 AS topP)) @{remote_udf_max_rows_per_rpc=1} diff --git a/testdata/result/statement/select_from_ml_predict_simple.sql.txt b/testdata/result/statement/select_from_ml_predict_simple.sql.txt new file mode 100644 index 00000000..06c7e7df --- /dev/null +++ b/testdata/result/statement/select_from_ml_predict_simple.sql.txt @@ -0,0 +1,80 @@ +--- select_from_ml_predict_simple.sql +SELECT id, color, value +FROM ML.PREDICT(MODEL DiamondAppraise, TABLE Diamonds) +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 9, + Name: "id", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 11, + NameEnd: 16, + Name: "color", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 18, + NameEnd: 23, + Name: "value", + }, + }, + }, + From: &ast.From{ + From: 24, + Source: &ast.TVFCallExpr{ + Rparen: 77, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 29, + NameEnd: 31, + Name: "ML", + }, + &ast.Ident{ + NamePos: 32, + NameEnd: 39, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 40, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 46, + NameEnd: 61, + Name: "DiamondAppraise", + }, + }, + }, + }, + &ast.TableArg{ + Table: 63, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 69, + NameEnd: 77, + Name: "Diamonds", + }, + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT id, color, value FROM ML.PREDICT(MODEL DiamondAppraise, TABLE Diamonds) diff --git a/testdata/result/statement/select_from_ml_predict_textbison.sql.txt b/testdata/result/statement/select_from_ml_predict_textbison.sql.txt new file mode 100644 index 00000000..1d267c79 --- /dev/null +++ b/testdata/result/statement/select_from_ml_predict_textbison.sql.txt @@ -0,0 +1,370 @@ +--- select_from_ml_predict_textbison.sql +SELECT product_id, product_name, content +FROM ML.PREDICT( + MODEL TextBison, + (SELECT + product.id as product_id, + product.name as product_name, + CONCAT("Is this product safe for infants?", "\n", + "Product Name: ", product.name, "\n", + "Category Name: ", category.name, "\n", + "Product Description:", product.description) AS prompt + FROM + Products AS product JOIN Categories AS category + ON product.category_id = category.id), + STRUCT(100 AS maxOutputTokens) +) @{remote_udf_max_rows_per_rpc=1} +--- AST +&ast.QueryStatement{ + Query: &ast.Select{ + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 7, + NameEnd: 17, + Name: "product_id", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 19, + NameEnd: 31, + Name: "product_name", + }, + }, + &ast.ExprSelectItem{ + Expr: &ast.Ident{ + NamePos: 33, + NameEnd: 40, + Name: "content", + }, + }, + }, + From: &ast.From{ + From: 41, + Source: &ast.TVFCallExpr{ + Rparen: 580, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 46, + NameEnd: 48, + Name: "ML", + }, + &ast.Ident{ + NamePos: 49, + NameEnd: 56, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 62, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 68, + NameEnd: 77, + Name: "TextBison", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 83, + Rparen: 542, + Query: &ast.Select{ + Select: 84, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 100, + NameEnd: 107, + Name: "product", + }, + &ast.Ident{ + NamePos: 108, + NameEnd: 110, + Name: "id", + }, + }, + }, + As: &ast.AsAlias{ + As: 111, + Alias: &ast.Ident{ + NamePos: 114, + NameEnd: 124, + Name: "product_id", + }, + }, + }, + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 135, + NameEnd: 142, + Name: "product", + }, + &ast.Ident{ + NamePos: 143, + NameEnd: 147, + Name: "name", + }, + }, + }, + As: &ast.AsAlias{ + As: 148, + Alias: &ast.Ident{ + NamePos: 151, + NameEnd: 163, + Name: "product_name", + }, + }, + }, + &ast.Alias{ + Expr: &ast.CallExpr{ + Rparen: 393, + Func: &ast.Ident{ + NamePos: 174, + NameEnd: 180, + Name: "CONCAT", + }, + Args: []ast.Arg{ + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 181, + ValueEnd: 216, + Value: "Is this product safe for infants?", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 218, + ValueEnd: 222, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 240, + ValueEnd: 256, + Value: "Product Name: ", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 258, + NameEnd: 265, + Name: "product", + }, + &ast.Ident{ + NamePos: 266, + NameEnd: 270, + Name: "name", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 272, + ValueEnd: 276, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 294, + ValueEnd: 311, + Value: "Category Name: ", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 313, + NameEnd: 321, + Name: "category", + }, + &ast.Ident{ + NamePos: 322, + NameEnd: 326, + Name: "name", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 328, + ValueEnd: 332, + Value: "\n", + }, + }, + &ast.ExprArg{ + Expr: &ast.StringLiteral{ + ValuePos: 350, + ValueEnd: 372, + Value: "Product Description:", + }, + }, + &ast.ExprArg{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 374, + NameEnd: 381, + Name: "product", + }, + &ast.Ident{ + NamePos: 382, + NameEnd: 393, + Name: "description", + }, + }, + }, + }, + }, + }, + As: &ast.AsAlias{ + As: 395, + Alias: &ast.Ident{ + NamePos: 398, + NameEnd: 404, + Name: "prompt", + }, + }, + }, + }, + From: &ast.From{ + From: 410, + Source: &ast.Join{ + Op: "INNER JOIN", + Left: &ast.TableName{ + Table: &ast.Ident{ + NamePos: 424, + NameEnd: 432, + Name: "Products", + }, + As: &ast.AsAlias{ + As: 433, + Alias: &ast.Ident{ + NamePos: 436, + NameEnd: 443, + Name: "product", + }, + }, + }, + Right: &ast.TableName{ + Table: &ast.Ident{ + NamePos: 449, + NameEnd: 459, + Name: "Categories", + }, + As: &ast.AsAlias{ + As: 460, + Alias: &ast.Ident{ + NamePos: 463, + NameEnd: 471, + Name: "category", + }, + }, + }, + Cond: &ast.On{ + On: 506, + Expr: &ast.BinaryExpr{ + Op: "=", + Left: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 509, + NameEnd: 516, + Name: "product", + }, + &ast.Ident{ + NamePos: 517, + NameEnd: 528, + Name: "category_id", + }, + }, + }, + Right: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 531, + NameEnd: 539, + Name: "category", + }, + &ast.Ident{ + NamePos: 540, + NameEnd: 542, + Name: "id", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.TypelessStructLiteral{ + Struct: 549, + Rparen: 578, + Values: []ast.TypelessStructLiteralArg{ + &ast.Alias{ + Expr: &ast.IntLiteral{ + ValuePos: 556, + ValueEnd: 559, + Base: 10, + Value: "100", + }, + As: &ast.AsAlias{ + As: 560, + Alias: &ast.Ident{ + NamePos: 563, + NameEnd: 578, + Name: "maxOutputTokens", + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 582, + Rbrace: 613, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 584, + NameEnd: 611, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 612, + ValueEnd: 613, + Base: 10, + Value: "1", + }, + }, + }, + }, + }, + }, + }, +} + +--- SQL +SELECT product_id, product_name, content FROM ML.PREDICT(MODEL TextBison, (SELECT product.id AS product_id, product.name AS product_name, CONCAT("Is this product safe for infants\?", "\n", "Product Name: ", product.name, "\n", "Category Name: ", category.name, "\n", "Product Description:", product.description) AS prompt FROM Products AS product INNER JOIN Categories AS category ON product.category_id = category.id), STRUCT(100 AS maxOutputTokens)) @{remote_udf_max_rows_per_rpc=1} diff --git a/testdata/result/statement/update_with_safe_ml_predict.sql.txt b/testdata/result/statement/update_with_safe_ml_predict.sql.txt new file mode 100644 index 00000000..423cf296 --- /dev/null +++ b/testdata/result/statement/update_with_safe_ml_predict.sql.txt @@ -0,0 +1,205 @@ +--- update_with_safe_ml_predict.sql +-- https://cloud.google.com/spanner/docs/backfill-embeddings?hl=en#backfill +UPDATE products +SET + products.desc_embed = ( + SELECT embeddings.values + FROM SAFE.ML.PREDICT( + MODEL gecko_model, + (SELECT products.description AS content) + ) @{remote_udf_max_rows_per_rpc=200} + ), + products.desc_embed_model_version = 3 +WHERE products.desc_embed IS NULL +--- AST +&ast.Update{ + Update: 76, + TableName: &ast.Ident{ + NamePos: 83, + NameEnd: 91, + Name: "products", + }, + Updates: []*ast.UpdateItem{ + &ast.UpdateItem{ + Path: []*ast.Ident{ + &ast.Ident{ + NamePos: 100, + NameEnd: 108, + Name: "products", + }, + &ast.Ident{ + NamePos: 109, + NameEnd: 119, + Name: "desc_embed", + }, + }, + DefaultExpr: &ast.DefaultExpr{ + DefaultPos: -1, + Expr: &ast.ScalarSubQuery{ + Lparen: 122, + Rparen: 333, + Query: &ast.Select{ + Select: 132, + Results: []ast.SelectItem{ + &ast.ExprSelectItem{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 139, + NameEnd: 149, + Name: "embeddings", + }, + &ast.Ident{ + NamePos: 150, + NameEnd: 156, + Name: "values", + }, + }, + }, + }, + }, + From: &ast.From{ + From: 165, + Source: &ast.TVFCallExpr{ + Rparen: 292, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 170, + NameEnd: 174, + Name: "SAFE", + }, + &ast.Ident{ + NamePos: 175, + NameEnd: 177, + Name: "ML", + }, + &ast.Ident{ + NamePos: 178, + NameEnd: 185, + Name: "PREDICT", + }, + }, + }, + Args: []ast.TVFArg{ + &ast.ModelArg{ + Model: 203, + Name: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 209, + NameEnd: 220, + Name: "gecko_model", + }, + }, + }, + }, + &ast.ExprArg{ + Expr: &ast.ScalarSubQuery{ + Lparen: 238, + Rparen: 277, + Query: &ast.Select{ + Select: 239, + Results: []ast.SelectItem{ + &ast.Alias{ + Expr: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 246, + NameEnd: 254, + Name: "products", + }, + &ast.Ident{ + NamePos: 255, + NameEnd: 266, + Name: "description", + }, + }, + }, + As: &ast.AsAlias{ + As: 267, + Alias: &ast.Ident{ + NamePos: 270, + NameEnd: 277, + Name: "content", + }, + }, + }, + }, + }, + }, + }, + }, + Hint: &ast.Hint{ + Atmark: 294, + Rbrace: 327, + Records: []*ast.HintRecord{ + &ast.HintRecord{ + Key: &ast.Ident{ + NamePos: 296, + NameEnd: 323, + Name: "remote_udf_max_rows_per_rpc", + }, + Value: &ast.IntLiteral{ + ValuePos: 324, + ValueEnd: 327, + Base: 10, + Value: "200", + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + &ast.UpdateItem{ + Path: []*ast.Ident{ + &ast.Ident{ + NamePos: 340, + NameEnd: 348, + Name: "products", + }, + &ast.Ident{ + NamePos: 349, + NameEnd: 373, + Name: "desc_embed_model_version", + }, + }, + DefaultExpr: &ast.DefaultExpr{ + DefaultPos: -1, + Expr: &ast.IntLiteral{ + ValuePos: 376, + ValueEnd: 377, + Base: 10, + Value: "3", + }, + }, + }, + }, + Where: &ast.Where{ + Where: 378, + Expr: &ast.IsNullExpr{ + Null: 407, + Left: &ast.Path{ + Idents: []*ast.Ident{ + &ast.Ident{ + NamePos: 384, + NameEnd: 392, + Name: "products", + }, + &ast.Ident{ + NamePos: 393, + NameEnd: 403, + Name: "desc_embed", + }, + }, + }, + }, + }, +} + +--- SQL +UPDATE products SET products.desc_embed = (SELECT embeddings.values FROM SAFE.ML.PREDICT(MODEL gecko_model, (SELECT products.description AS content)) @{remote_udf_max_rows_per_rpc=200}), products.desc_embed_model_version = 3 WHERE products.desc_embed IS NULL