From 23c7e13493f14f1eee6eb759d291e2fe489c3557 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Wed, 20 Oct 2021 14:00:40 +0200 Subject: [PATCH 1/7] add collation name to the typing we get from the schema tracker Signed-off-by: Andres Taylor --- go/mysql/schema.go | 7 ++++-- go/vt/vtgate/schema/tracker.go | 3 ++- go/vt/vtgate/schema/tracker_test.go | 35 ++++++++++++++++------------- go/vt/vtgate/vindexes/vschema.go | 5 +++-- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/go/mysql/schema.go b/go/mysql/schema.go index 218d04d858c..2a09b993db8 100644 --- a/go/mysql/schema.go +++ b/go/mysql/schema.go @@ -97,15 +97,18 @@ select table_schema, table_name, column_name, ordinal_position, character_set_na from information_schema.columns where table_schema = database()` + // fetchColumns are the columns we fetch + fetchColumns = "table_name, column_name, data_type, collation_name" + // FetchUpdatedTables queries fetches all information about updated tables - FetchUpdatedTables = `select table_name, column_name, data_type + FetchUpdatedTables = `select ` + fetchColumns + ` from _vt.schemacopy where table_schema = database() and table_name in ::tableNames order by table_name, ordinal_position` // FetchTables queries fetches all information about tables - FetchTables = `select table_name, column_name, data_type + FetchTables = `select ` + fetchColumns + ` from _vt.schemacopy where table_schema = database() order by table_name, ordinal_position` diff --git a/go/vt/vtgate/schema/tracker.go b/go/vt/vtgate/schema/tracker.go index 82e39ee58d5..cf221a159e8 100644 --- a/go/vt/vtgate/schema/tracker.go +++ b/go/vt/vtgate/schema/tracker.go @@ -196,9 +196,10 @@ func (t *Tracker) updateTables(keyspace string, res *sqltypes.Result) { tbl := row[0].ToString() colName := row[1].ToString() colType := row[2].ToString() + collation := row[3].ToString() cType := sqlparser.ColumnType{Type: colType} - col := vindexes.Column{Name: sqlparser.NewColIdent(colName), Type: cType.SQLType()} + col := vindexes.Column{Name: sqlparser.NewColIdent(colName), Type: cType.SQLType(), CollationName: collation} cols := t.tables.get(keyspace, tbl) t.tables.set(keyspace, tbl, append(cols, col)) diff --git a/go/vt/vtgate/schema/tracker_test.go b/go/vt/vtgate/schema/tracker_test.go index 3d343f2cacf..049270752a2 100644 --- a/go/vt/vtgate/schema/tracker_test.go +++ b/go/vt/vtgate/schema/tracker_test.go @@ -50,7 +50,10 @@ func TestTracking(t *testing.T) { Shard: target.Shard, Type: target.TabletType, } - fields := sqltypes.MakeTestFields("table_name|col_name|col_type", "varchar|varchar|varchar") + fields := sqltypes.MakeTestFields( + "table_name|col_name|col_type|collation_name", + "varchar|varchar|varchar|varchar", + ) type delta struct { result *sqltypes.Result @@ -60,7 +63,7 @@ func TestTracking(t *testing.T) { d0 = delta{ result: sqltypes.MakeTestResult( fields, - "prior|id|int", + "prior|id|int|", ), updTbl: []string{"prior"}, } @@ -68,9 +71,9 @@ func TestTracking(t *testing.T) { d1 = delta{ result: sqltypes.MakeTestResult( fields, - "t1|id|int", - "t1|name|varchar", - "t2|id|varchar", + "t1|id|int|", + "t1|name|varchar|utf8_bin", + "t2|id|varchar|utf8_bin", ), updTbl: []string{"t1", "t2"}, } @@ -78,9 +81,9 @@ func TestTracking(t *testing.T) { d2 = delta{ result: sqltypes.MakeTestResult( fields, - "t2|id|varchar", - "t2|name|varchar", - "t3|id|datetime", + "t2|id|varchar|utf8_bin", + "t2|name|varchar|utf8_bin", + "t3|id|datetime|", ), updTbl: []string{"prior", "t1", "t2", "t3"}, } @@ -88,7 +91,7 @@ func TestTracking(t *testing.T) { d3 = delta{ result: sqltypes.MakeTestResult( fields, - "t4|name|varchar", + "t4|name|varchar|utf8_bin", ), updTbl: []string{"t4"}, } @@ -104,9 +107,9 @@ func TestTracking(t *testing.T) { exp: map[string][]vindexes.Column{ "t1": { {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_INT32}, - {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR}}, + {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}}, "t2": { - {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR}}, + {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}}, "prior": { {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_INT32}}, }, @@ -115,8 +118,8 @@ func TestTracking(t *testing.T) { deltas: []delta{d0, d1, d2}, exp: map[string][]vindexes.Column{ "t2": { - {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR}, - {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR}}, + {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}, + {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}}, "t3": { {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_DATETIME}}, }, @@ -125,12 +128,12 @@ func TestTracking(t *testing.T) { deltas: []delta{d0, d1, d2, d3}, exp: map[string][]vindexes.Column{ "t2": { - {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR}, - {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR}}, + {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}, + {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}}, "t3": { {Name: sqlparser.NewColIdent("id"), Type: querypb.Type_DATETIME}}, "t4": { - {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR}}, + {Name: sqlparser.NewColIdent("name"), Type: querypb.Type_VARCHAR, CollationName: "utf8_bin"}}, }, }, } diff --git a/go/vt/vtgate/vindexes/vschema.go b/go/vt/vtgate/vindexes/vschema.go index 5bd07b8bfc8..d9a5fecab61 100644 --- a/go/vt/vtgate/vindexes/vschema.go +++ b/go/vt/vtgate/vindexes/vschema.go @@ -112,8 +112,9 @@ type ColumnVindex struct { // Column describes a column. type Column struct { - Name sqlparser.ColIdent `json:"name"` - Type querypb.Type `json:"type"` + Name sqlparser.ColIdent `json:"name"` + Type querypb.Type `json:"type"` + CollationName string `json:"collation_name"` } // MarshalJSON returns a JSON representation of Column. From 11f73fc57dc0961d9d49d7ce27d40502f4b7d2af Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Wed, 20 Oct 2021 14:51:22 +0200 Subject: [PATCH 2/7] add collation name to the typer and semantic state Signed-off-by: Andres Taylor --- go/vt/vtgate/semantics/analyzer.go | 5 ++--- go/vt/vtgate/semantics/dependencies.go | 5 ++--- go/vt/vtgate/semantics/real_table.go | 5 ++++- go/vt/vtgate/semantics/semantic_state.go | 15 ++++++++++++--- go/vt/vtgate/semantics/typer.go | 23 ++++++++++++++++------- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 625ff9f8391..478a72bc266 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -19,7 +19,6 @@ package semantics import ( "vitess.io/vitess/go/vt/vtgate/vindexes" - querypb "vitess.io/vitess/go/vt/proto/query" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vterrors" @@ -216,10 +215,10 @@ func isParentSelect(cursor *sqlparser.Cursor) bool { type originable interface { tableSetFor(t *sqlparser.AliasedTableExpr) TableSet - depsForExpr(expr sqlparser.Expr) (direct, recursive TableSet, typ *querypb.Type) + depsForExpr(expr sqlparser.Expr) (direct, recursive TableSet, typ *Type) } -func (a *analyzer) depsForExpr(expr sqlparser.Expr) (direct, recursive TableSet, typ *querypb.Type) { +func (a *analyzer) depsForExpr(expr sqlparser.Expr) (direct, recursive TableSet, typ *Type) { recursive = a.binder.recursive.dependencies(expr) direct = a.binder.direct.dependencies(expr) qt, isFound := a.typer.exprTypes[expr] diff --git a/go/vt/vtgate/semantics/dependencies.go b/go/vt/vtgate/semantics/dependencies.go index 537f43ddce0..86d89900833 100644 --- a/go/vt/vtgate/semantics/dependencies.go +++ b/go/vt/vtgate/semantics/dependencies.go @@ -17,7 +17,6 @@ limitations under the License. package semantics import ( - querypb "vitess.io/vitess/go/vt/proto/query" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/vterrors" ) @@ -33,7 +32,7 @@ type ( dependency struct { direct TableSet recursive TableSet - typ *querypb.Type + typ *Type } nothing struct{} certain struct { @@ -47,7 +46,7 @@ type ( var ambigousErr = vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "ambiguous") -func createCertain(direct TableSet, recursive TableSet, qt *querypb.Type) *certain { +func createCertain(direct TableSet, recursive TableSet, qt *Type) *certain { return &certain{ dependency: dependency{ direct: direct, diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index bfee236251b..ae9f5ea7310 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -104,7 +104,10 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo { for _, col := range tbl.Columns { cols = append(cols, ColumnInfo{ Name: col.Name.String(), - Type: col.Type, + Type: Type{ + typ: col.Type, + collationName: col.CollationName, + }, }) nameMap[col.Name.String()] = nil } diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index c140c3b631f..7c06749e0e0 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -59,7 +59,7 @@ type ( // ColumnInfo contains information about columns ColumnInfo struct { Name string - Type querypb.Type + Type Type } // ExprDependencies stores the tables that an expression depends on as a map @@ -81,7 +81,7 @@ type ( // It does not recurse inside derived tables and the like to find the original dependencies Direct ExprDependencies - exprTypes map[sqlparser.Expr]querypb.Type + exprTypes map[sqlparser.Expr]Type selectScope map[*sqlparser.Select]*scope Comments sqlparser.Comments SubqueryMap map[*sqlparser.Select][]*sqlparser.ExtractedSubquery @@ -202,11 +202,20 @@ func (st *SemTable) AddExprs(tbl *sqlparser.AliasedTableExpr, cols sqlparser.Sel func (st *SemTable) TypeFor(e sqlparser.Expr) *querypb.Type { typ, found := st.exprTypes[e] if found { - return &typ + return &typ.typ } return nil } +// CollationFor returns the collation name of expressions in the query +func (st *SemTable) CollationFor(e sqlparser.Expr) string { + typ, found := st.exprTypes[e] + if found { + return typ.collationName + } + return "" +} + // dependencies return the table dependencies of the expression. This method finds table dependencies recursively func (d ExprDependencies) dependencies(expr sqlparser.Expr) (deps TableSet) { if ValidAsMapKey(expr) { diff --git a/go/vt/vtgate/semantics/typer.go b/go/vt/vtgate/semantics/typer.go index f703631ca03..5f86c11237e 100644 --- a/go/vt/vtgate/semantics/typer.go +++ b/go/vt/vtgate/semantics/typer.go @@ -26,38 +26,47 @@ import ( // typer is responsible for setting the type for expressions // it does it's work after visiting the children (up), since the children types is often needed to type a node. type typer struct { - exprTypes map[sqlparser.Expr]querypb.Type + exprTypes map[sqlparser.Expr]Type +} + +// Type is the normal querypb.Type with collation +type Type struct { + typ querypb.Type + collationName string } func newTyper() *typer { return &typer{ - exprTypes: map[sqlparser.Expr]querypb.Type{}, + exprTypes: map[sqlparser.Expr]Type{}, } } +var typeInt32 = Type{typ: sqltypes.Int32} +var decimal = Type{typ: sqltypes.Decimal} + func (t *typer) up(cursor *sqlparser.Cursor) error { switch node := cursor.Node().(type) { case *sqlparser.Literal: switch node.Type { case sqlparser.IntVal: - t.exprTypes[node] = sqltypes.Int32 + t.exprTypes[node] = typeInt32 case sqlparser.StrVal: - t.exprTypes[node] = sqltypes.VarChar + t.exprTypes[node] = Type{typ: sqltypes.VarChar} // TODO - add system default collation name case sqlparser.FloatVal: - t.exprTypes[node] = sqltypes.Decimal + t.exprTypes[node] = decimal } case *sqlparser.FuncExpr: code, ok := engine.SupportedAggregates[node.Name.Lowered()] if ok { typ, ok := engine.OpcodeType[code] if ok { - t.exprTypes[node] = typ + t.exprTypes[node] = Type{typ: typ} } } } return nil } -func (t *typer) setTypeFor(node *sqlparser.ColName, typ querypb.Type) { +func (t *typer) setTypeFor(node *sqlparser.ColName, typ Type) { t.exprTypes[node] = typ } From 7fd9b95746b536b73798da056bac3237042d6504 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Wed, 20 Oct 2021 15:57:56 +0200 Subject: [PATCH 3/7] use collation ID instead of name Signed-off-by: Andres Taylor --- go/mysql/collations/8bit.go | 10 +++++----- go/mysql/collations/collation.go | 20 +++++++++++++++++--- go/mysql/collations/remote.go | 8 ++++---- go/mysql/collations/uca.go | 10 +++++----- go/mysql/collations/utf8.go | 4 ++-- go/vt/vtgate/semantics/real_table.go | 20 ++++++++++++++++++-- go/vt/vtgate/semantics/semantic_state.go | 9 +++++---- go/vt/vtgate/semantics/typer.go | 13 +++++++------ 8 files changed, 63 insertions(+), 31 deletions(-) diff --git a/go/mysql/collations/8bit.go b/go/mysql/collations/8bit.go index 7b7f765efe3..bd0de162ccd 100644 --- a/go/mysql/collations/8bit.go +++ b/go/mysql/collations/8bit.go @@ -29,7 +29,7 @@ type simpletables struct { } type Collation_8bit_bin struct { - id uint + id ID name string simpletables } @@ -39,7 +39,7 @@ func (c *Collation_8bit_bin) init() {} func (c *Collation_8bit_bin) Name() string { return c.name } -func (c *Collation_8bit_bin) Id() uint { +func (c *Collation_8bit_bin) Id() ID { return c.id } @@ -69,7 +69,7 @@ func (c *Collation_8bit_bin) WeightStringLen(numBytes int) int { } type Collation_8bit_simple_ci struct { - id uint + id ID name string simpletables } @@ -80,7 +80,7 @@ func (c *Collation_8bit_simple_ci) Name() string { return c.name } -func (c *Collation_8bit_simple_ci) Id() uint { +func (c *Collation_8bit_simple_ci) Id() ID { return c.id } @@ -142,7 +142,7 @@ type Collation_binary struct{} func (c *Collation_binary) init() {} -func (c *Collation_binary) Id() uint { +func (c *Collation_binary) Id() ID { return 63 } diff --git a/go/mysql/collations/collation.go b/go/mysql/collations/collation.go index 7b4106f5dbb..577bf79921f 100644 --- a/go/mysql/collations/collation.go +++ b/go/mysql/collations/collation.go @@ -24,6 +24,10 @@ import ( // Generate mysqldata.go from the JSON information dumped from MySQL //go:generate go run ./tools/makemysqldata/ +type ID uint16 + +const Unknown ID = 0 + // Collation implements a MySQL-compatible collation. It defines how to compare // for sorting order and equality two strings with the same encoding. type Collation interface { @@ -33,7 +37,7 @@ type Collation interface { // Id returns the numerical identifier for this collation. This is the same // value that is returned by MySQL in a query's headers to identify the collation // for a given column - Id() uint + Id() ID // Name is the full name of this collation, in the form of "ENCODING_LANG_SENSITIVITY" Name() string @@ -110,7 +114,7 @@ func minInt(i1, i2 int) int { } var collationsByName = make(map[string]Collation) -var collationsById = make(map[uint]Collation) +var collationsById = make(map[ID]Collation) func register(c Collation) { duplicatedCharset := func(old Collation) { @@ -138,9 +142,19 @@ func LookupByName(name string) Collation { return csi } +// LookupIDByName returns the collation ID for the given name +func LookupIDByName(name string) ID { + csi := collationsByName[name] + if csi == nil { + return Unknown + } + + return csi.Id() +} + // LookupById returns the collation with the given numerical identifier. The collation // is initialized if it's the first time being accessed. -func LookupById(id uint) Collation { +func LookupById(id ID) Collation { csi := collationsById[id] if csi != nil { csi.init() diff --git a/go/mysql/collations/remote.go b/go/mysql/collations/remote.go index 139a8d3de4f..c5b9f8bad92 100644 --- a/go/mysql/collations/remote.go +++ b/go/mysql/collations/remote.go @@ -21,7 +21,7 @@ import ( // used as a fallback or as a way to test our native implementations. type RemoteCollation struct { name string - id uint + id ID prefix string suffix string @@ -33,7 +33,7 @@ type RemoteCollation struct { err error } -func makeRemoteCollation(conn *mysql.Conn, collid uint, collname string) *RemoteCollation { +func makeRemoteCollation(conn *mysql.Conn, collid ID, collname string) *RemoteCollation { coll := &RemoteCollation{ name: collname, id: collid, @@ -52,7 +52,7 @@ func makeRemoteCollation(conn *mysql.Conn, collid uint, collname string) *Remote } func RemoteByName(conn *mysql.Conn, collname string) *RemoteCollation { - var collid uint + var collid ID if known, ok := collationsByName[collname]; ok { collid = known.Id() } @@ -67,7 +67,7 @@ func (c *RemoteCollation) LastError() error { func (c *RemoteCollation) init() {} -func (c *RemoteCollation) Id() uint { +func (c *RemoteCollation) Id() ID { return c.id } diff --git a/go/mysql/collations/uca.go b/go/mysql/collations/uca.go index 8fc281f22f1..0d11dde792d 100644 --- a/go/mysql/collations/uca.go +++ b/go/mysql/collations/uca.go @@ -35,7 +35,7 @@ type CollationUCA interface { type Collation_utf8mb4_uca_0900 struct { name string - id uint + id ID weights uca.WeightTable tailoring []uca.WeightPatch @@ -72,7 +72,7 @@ func (c *Collation_utf8mb4_uca_0900) Name() string { return c.name } -func (c *Collation_utf8mb4_uca_0900) Id() uint { +func (c *Collation_utf8mb4_uca_0900) Id() ID { return c.id } @@ -168,7 +168,7 @@ func (c *Collation_utf8mb4_0900_bin) Encoding() encoding.Encoding { return encoding.Encoding_utf8mb4{} } -func (c *Collation_utf8mb4_0900_bin) Id() uint { +func (c *Collation_utf8mb4_0900_bin) Id() ID { return 309 } @@ -197,7 +197,7 @@ func (c *Collation_utf8mb4_0900_bin) WeightStringLen(numBytes int) int { type Collation_uca_legacy struct { name string - id uint + id ID charset encoding.Encoding weights uca.WeightTable @@ -226,7 +226,7 @@ func (c *Collation_uca_legacy) UnicodeWeightsTable() (uca.WeightTable, uca.Table return c.uca.Weights() } -func (c *Collation_uca_legacy) Id() uint { +func (c *Collation_uca_legacy) Id() ID { return c.id } diff --git a/go/mysql/collations/utf8.go b/go/mysql/collations/utf8.go index 6082855d61a..0ca53986131 100644 --- a/go/mysql/collations/utf8.go +++ b/go/mysql/collations/utf8.go @@ -34,7 +34,7 @@ type Collation_utf8mb4_general_ci struct { func (c *Collation_utf8mb4_general_ci) init() {} -func (c *Collation_utf8mb4_general_ci) Id() uint { +func (c *Collation_utf8mb4_general_ci) Id() ID { return 45 } @@ -123,7 +123,7 @@ type Collation_utf8mb4_bin struct{} func (c *Collation_utf8mb4_bin) init() {} -func (c *Collation_utf8mb4_bin) Id() uint { +func (c *Collation_utf8mb4_bin) Id() ID { return 46 } diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index ae9f5ea7310..1abdbc87fa9 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -19,6 +19,10 @@ package semantics import ( "strings" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/log" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vterrors" @@ -102,11 +106,23 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo { nameMap := map[string]interface{}{} cols := make([]ColumnInfo, 0, len(tbl.Columns)) for _, col := range tbl.Columns { + var collation collations.ID + if sqltypes.IsText(col.Type) { + collation = collations.LookupIDByName(col.CollationName) + if collation == collations.Unknown { + log.Warningf( + "Unknown collation name %s on table %s column %s", + col.CollationName, + tbl.Name.String(), + col.Name.String(), + ) + } + } cols = append(cols, ColumnInfo{ Name: col.Name.String(), Type: Type{ - typ: col.Type, - collationName: col.CollationName, + Type: col.Type, + Collation: collation, }, }) nameMap[col.Name.String()] = nil diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index 7c06749e0e0..07ea3841eaa 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -17,6 +17,7 @@ limitations under the License. package semantics import ( + "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/vt/key" querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -202,18 +203,18 @@ func (st *SemTable) AddExprs(tbl *sqlparser.AliasedTableExpr, cols sqlparser.Sel func (st *SemTable) TypeFor(e sqlparser.Expr) *querypb.Type { typ, found := st.exprTypes[e] if found { - return &typ.typ + return &typ.Type } return nil } // CollationFor returns the collation name of expressions in the query -func (st *SemTable) CollationFor(e sqlparser.Expr) string { +func (st *SemTable) CollationFor(e sqlparser.Expr) collations.ID { typ, found := st.exprTypes[e] if found { - return typ.collationName + return typ.Collation } - return "" + return collations.Unknown } // dependencies return the table dependencies of the expression. This method finds table dependencies recursively diff --git a/go/vt/vtgate/semantics/typer.go b/go/vt/vtgate/semantics/typer.go index 5f86c11237e..5eac77b4d00 100644 --- a/go/vt/vtgate/semantics/typer.go +++ b/go/vt/vtgate/semantics/typer.go @@ -17,6 +17,7 @@ limitations under the License. package semantics import ( + "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" querypb "vitess.io/vitess/go/vt/proto/query" "vitess.io/vitess/go/vt/sqlparser" @@ -31,8 +32,8 @@ type typer struct { // Type is the normal querypb.Type with collation type Type struct { - typ querypb.Type - collationName string + Type querypb.Type + Collation collations.ID } func newTyper() *typer { @@ -41,8 +42,8 @@ func newTyper() *typer { } } -var typeInt32 = Type{typ: sqltypes.Int32} -var decimal = Type{typ: sqltypes.Decimal} +var typeInt32 = Type{Type: sqltypes.Int32} +var decimal = Type{Type: sqltypes.Decimal} func (t *typer) up(cursor *sqlparser.Cursor) error { switch node := cursor.Node().(type) { @@ -51,7 +52,7 @@ func (t *typer) up(cursor *sqlparser.Cursor) error { case sqlparser.IntVal: t.exprTypes[node] = typeInt32 case sqlparser.StrVal: - t.exprTypes[node] = Type{typ: sqltypes.VarChar} // TODO - add system default collation name + t.exprTypes[node] = Type{Type: sqltypes.VarChar} // TODO - add system default collation name case sqlparser.FloatVal: t.exprTypes[node] = decimal } @@ -60,7 +61,7 @@ func (t *typer) up(cursor *sqlparser.Cursor) error { if ok { typ, ok := engine.OpcodeType[code] if ok { - t.exprTypes[node] = Type{typ: typ} + t.exprTypes[node] = Type{Type: typ} } } } From 6f30672f0000be61077dc202999082b35916b27b Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 21 Oct 2021 13:31:28 +0200 Subject: [PATCH 4/7] Keeping track of collations in ordered aggregate's GroupByParams to enable better NullsafeComparisons Signed-off-by: Florent Poinsard --- go/vt/vtgate/engine/ordered_aggregate.go | 3 + go/vt/vtgate/planbuilder/collations_test.go | 124 +++++++++++++++++++ go/vt/vtgate/planbuilder/horizon_planning.go | 24 ++-- 3 files changed, 139 insertions(+), 12 deletions(-) create mode 100644 go/vt/vtgate/planbuilder/collations_test.go diff --git a/go/vt/vtgate/engine/ordered_aggregate.go b/go/vt/vtgate/engine/ordered_aggregate.go index 08d9c4d9ad4..ebe0e03b350 100644 --- a/go/vt/vtgate/engine/ordered_aggregate.go +++ b/go/vt/vtgate/engine/ordered_aggregate.go @@ -20,6 +20,8 @@ import ( "fmt" "strconv" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/vt/sqlparser" "google.golang.org/protobuf/proto" @@ -66,6 +68,7 @@ type GroupByParams struct { WeightStringCol int Expr sqlparser.Expr FromGroupBy bool + CollationID collations.ID } // String returns a string. Used for plan descriptions diff --git a/go/vt/vtgate/planbuilder/collations_test.go b/go/vt/vtgate/planbuilder/collations_test.go new file mode 100644 index 00000000000..331961b5f12 --- /dev/null +++ b/go/vt/vtgate/planbuilder/collations_test.go @@ -0,0 +1,124 @@ +/* +Copyright 2021 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package planbuilder + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/vt/vtgate/engine" +) + +// collationInTable allows us to set a collation on a column +type collationInTable struct { + ks, table, collationName string + offsetInTable int +} + +type collationTestCase struct { + query string + check func(t *testing.T, colls []collationInTable, primitive engine.Primitive) + collations []collationInTable +} + +func (tc *collationTestCase) run(t *testing.T) { + vschemaWrapper := &vschemaWrapper{ + v: loadSchema(t, "schema_test.json"), + sysVarEnabled: true, + version: Gen4, + } + + tc.addCollationsToSchema(vschemaWrapper) + plan, err := TestBuilder(tc.query, vschemaWrapper, vschemaWrapper.currentDb()) + require.NoError(t, err) + tc.check(t, tc.collations, plan.Instructions) +} + +func (tc *collationTestCase) addCollationsToSchema(vschema *vschemaWrapper) { + for _, collation := range tc.collations { + vschema.v.Keyspaces[collation.ks].Tables[collation.table].Columns[collation.offsetInTable].CollationName = collation.collationName + } +} + +func TestOrderedAggregateCollations(t *testing.T) { + testCases := []collationTestCase{ + { + collations: []collationInTable{{ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 2}}, + query: "select textcol1 from user group by textcol1", + check: func(t *testing.T, colls []collationInTable, primitive engine.Primitive) { + oa, isOA := primitive.(*engine.OrderedAggregate) + require.True(t, isOA, "should be an OrderedAggregate") + require.Equal(t, collations.LookupByName(colls[0].collationName).Id(), oa.GroupByKeys[0].CollationID) + }, + }, + { + collations: []collationInTable{{ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 2}}, + query: "select distinct textcol1 from user", + check: func(t *testing.T, colls []collationInTable, primitive engine.Primitive) { + oa, isOA := primitive.(*engine.OrderedAggregate) + require.True(t, isOA, "should be an OrderedAggregate") + require.Equal(t, collations.LookupByName(colls[0].collationName).Id(), oa.GroupByKeys[0].CollationID) + }, + }, + { + collations: []collationInTable{ + {ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 2}, + {ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 4}, + }, + query: "select textcol1, textcol2 from user group by textcol1, textcol2", + check: func(t *testing.T, colls []collationInTable, primitive engine.Primitive) { + oa, isOA := primitive.(*engine.OrderedAggregate) + require.True(t, isOA, "should be an OrderedAggregate") + require.Equal(t, collations.LookupByName(colls[0].collationName).Id(), oa.GroupByKeys[0].CollationID) + require.Equal(t, collations.LookupByName(colls[1].collationName).Id(), oa.GroupByKeys[1].CollationID) + }, + }, + { + collations: []collationInTable{ + {ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 4}, + }, + query: "select count(*), textcol2 from user group by textcol2", + check: func(t *testing.T, colls []collationInTable, primitive engine.Primitive) { + oa, isOA := primitive.(*engine.OrderedAggregate) + require.True(t, isOA, "should be an OrderedAggregate") + require.Equal(t, collations.LookupByName(colls[0].collationName).Id(), oa.GroupByKeys[0].CollationID) + }, + }, + { + collations: []collationInTable{ + {ks: "user", table: "user", collationName: "utf8mb4_bin", offsetInTable: 4}, + }, + query: "select count(*) as c, textcol2 from user group by textcol2 order by c", + check: func(t *testing.T, colls []collationInTable, primitive engine.Primitive) { + memSort, isMemSort := primitive.(*engine.MemorySort) + require.True(t, isMemSort, "should be a MemorySort") + oa, isOA := memSort.Input.(*engine.OrderedAggregate) + require.True(t, isOA, "should be an OrderedAggregate") + require.Equal(t, collations.LookupByName(colls[0].collationName).Id(), oa.GroupByKeys[0].CollationID) + }, + }, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("%d %s", i+1, tc.query), func(t *testing.T) { + tc.run(t) + }) + } +} diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index 794c7894bc6..de201ecc403 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -529,7 +529,7 @@ func planGroupByGen4(groupExpr abstract.GroupBy, plan logicalPlan, semTable *sem return false, err } if groupExpr.DistinctAggrIndex == 0 { - node.eaggr.GroupByKeys = append(node.eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: keyCol, WeightStringCol: wsOffset, Expr: groupExpr.WeightStrExpr}) + node.eaggr.GroupByKeys = append(node.eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: keyCol, WeightStringCol: wsOffset, Expr: groupExpr.WeightStrExpr, CollationID: semTable.CollationFor(groupExpr.WeightStrExpr)}) } else { if wsOffset != -1 { node.eaggr.Aggregates[groupExpr.DistinctAggrIndex-1].WAssigned = true @@ -867,13 +867,13 @@ func (hp *horizonPlanning) planDistinct(ctx *planningContext, plan logicalPlan) case *joinGen4, *pulloutSubquery: return hp.addDistinct(ctx, plan) case *orderedAggregate: - return hp.planDistinctOA(p) + return hp.planDistinctOA(ctx.semTable, p) default: return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", plan) } } -func (hp *horizonPlanning) planDistinctOA(currPlan *orderedAggregate) (logicalPlan, error) { +func (hp *horizonPlanning) planDistinctOA(semTable *semantics.SemTable, currPlan *orderedAggregate) (logicalPlan, error) { eaggr := &engine.OrderedAggregate{} oa := &orderedAggregate{ resultsBuilder: resultsBuilder{ @@ -902,7 +902,7 @@ func (hp *horizonPlanning) planDistinctOA(currPlan *orderedAggregate) (logicalPl for _, aggrParam := range currPlan.eaggr.Aggregates { if sqlparser.EqualsExpr(expr, aggrParam.Expr) { found = true - eaggr.GroupByKeys = append(eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: aggrParam.Col, WeightStringCol: -1}) + eaggr.GroupByKeys = append(eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: aggrParam.Col, WeightStringCol: -1, CollationID: semTable.CollationFor(expr)}) break } } @@ -924,7 +924,14 @@ func (hp *horizonPlanning) addDistinct(ctx *planningContext, plan logicalPlan) ( if isAmbiguousOrderBy(index, aliasExpr.As, hp.qp.SelectExprs) { return nil, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "generating order by clause: ambiguous symbol reference: %s", sqlparser.String(aliasExpr.As)) } - grpParam := &engine.GroupByParams{KeyCol: index, WeightStringCol: -1} + var inner sqlparser.Expr + if !aliasExpr.As.IsEmpty() { + inner = sqlparser.NewColName(aliasExpr.As.String()) + ctx.semTable.CopyDependencies(aliasExpr.Expr, inner) + } else { + inner = aliasExpr.Expr + } + grpParam := &engine.GroupByParams{KeyCol: index, WeightStringCol: -1, CollationID: ctx.semTable.CollationFor(inner)} _, wOffset, added, err := wrapAndPushExpr(aliasExpr.Expr, aliasExpr.Expr, plan, ctx.semTable) if err != nil { return nil, err @@ -933,13 +940,6 @@ func (hp *horizonPlanning) addDistinct(ctx *planningContext, plan logicalPlan) ( grpParam.WeightStringCol = wOffset eaggr.GroupByKeys = append(eaggr.GroupByKeys, grpParam) - var inner sqlparser.Expr - if !aliasExpr.As.IsEmpty() { - inner = sqlparser.NewColName(aliasExpr.As.String()) - ctx.semTable.CopyDependencies(aliasExpr.Expr, inner) - } else { - inner = aliasExpr.Expr - } orderExprs = append(orderExprs, abstract.OrderBy{ Inner: &sqlparser.Order{Expr: inner}, WeightStrExpr: aliasExpr.Expr}, From 04176dffa7e35785e4ed33166dd012ed4de515a2 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 21 Oct 2021 13:56:50 +0200 Subject: [PATCH 5/7] Setting a default value to text columns in plan tests schemas Signed-off-by: Florent Poinsard --- go/vt/vtgate/planbuilder/plan_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/go/vt/vtgate/planbuilder/plan_test.go b/go/vt/vtgate/planbuilder/plan_test.go index 1f3953573a6..5a4e5ec8df0 100644 --- a/go/vt/vtgate/planbuilder/plan_test.go +++ b/go/vt/vtgate/planbuilder/plan_test.go @@ -422,6 +422,17 @@ func loadSchema(t testing.TB, filename string) *vindexes.VSchema { if ks.Error != nil { t.Fatal(ks.Error) } + + // setting a default value to all the text columns in the tables of this keyspace + // so that we can "simulate" a real case scenario where the vschema is aware of + // columns' collations. + for _, table := range ks.Tables { + for i, col := range table.Columns { + if sqltypes.IsText(col.Type) { + table.Columns[i].CollationName = "latin1_swedish_ci" + } + } + } } return vschema } From 8afdeed85f16f3d04d3064dcc45305c884c01a3d Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 21 Oct 2021 14:02:00 +0200 Subject: [PATCH 6/7] Use InnerExpr instead of WeightStringExpr to calculate GroupByParams' collation ID Signed-off-by: Florent Poinsard --- go/vt/vtgate/planbuilder/horizon_planning.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index de201ecc403..d1bbb84f8a8 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -529,7 +529,7 @@ func planGroupByGen4(groupExpr abstract.GroupBy, plan logicalPlan, semTable *sem return false, err } if groupExpr.DistinctAggrIndex == 0 { - node.eaggr.GroupByKeys = append(node.eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: keyCol, WeightStringCol: wsOffset, Expr: groupExpr.WeightStrExpr, CollationID: semTable.CollationFor(groupExpr.WeightStrExpr)}) + node.eaggr.GroupByKeys = append(node.eaggr.GroupByKeys, &engine.GroupByParams{KeyCol: keyCol, WeightStringCol: wsOffset, Expr: groupExpr.WeightStrExpr, CollationID: semTable.CollationFor(groupExpr.Inner)}) } else { if wsOffset != -1 { node.eaggr.Aggregates[groupExpr.DistinctAggrIndex-1].WAssigned = true From 4149a501a136662f65a9930add6fddd9f425ffea Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 22 Oct 2021 07:31:14 +0200 Subject: [PATCH 7/7] address review comments Signed-off-by: Andres Taylor --- go/vt/vtgate/planbuilder/horizon_planning.go | 9 ++++++--- go/vt/vtgate/semantics/real_table.go | 10 ---------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/go/vt/vtgate/planbuilder/horizon_planning.go b/go/vt/vtgate/planbuilder/horizon_planning.go index d1bbb84f8a8..a20f9faaefb 100644 --- a/go/vt/vtgate/planbuilder/horizon_planning.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -925,11 +925,14 @@ func (hp *horizonPlanning) addDistinct(ctx *planningContext, plan logicalPlan) ( return nil, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "generating order by clause: ambiguous symbol reference: %s", sqlparser.String(aliasExpr.As)) } var inner sqlparser.Expr - if !aliasExpr.As.IsEmpty() { + if aliasExpr.As.IsEmpty() { + inner = aliasExpr.Expr + } else { + // If we have an alias, we need to use the alias and not the original expression + // to make sure dependencies work correctly, + // we simply copy the dependencies of the original expression here inner = sqlparser.NewColName(aliasExpr.As.String()) ctx.semTable.CopyDependencies(aliasExpr.Expr, inner) - } else { - inner = aliasExpr.Expr } grpParam := &engine.GroupByParams{KeyCol: index, WeightStringCol: -1, CollationID: ctx.semTable.CollationFor(inner)} _, wOffset, added, err := wrapAndPushExpr(aliasExpr.Expr, aliasExpr.Expr, plan, ctx.semTable) diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index 1abdbc87fa9..7305bc216c2 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -21,8 +21,6 @@ import ( "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" - "vitess.io/vitess/go/vt/log" - vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vterrors" @@ -109,14 +107,6 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo { var collation collations.ID if sqltypes.IsText(col.Type) { collation = collations.LookupIDByName(col.CollationName) - if collation == collations.Unknown { - log.Warningf( - "Unknown collation name %s on table %s column %s", - col.CollationName, - tbl.Name.String(), - col.Name.String(), - ) - } } cols = append(cols, ColumnInfo{ Name: col.Name.String(),