Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

collations: Refactor to separate basic collation information from data #13868

Merged
merged 1 commit into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ require (
go.etcd.io/etcd/client/pkg/v3 v3.5.8
go.etcd.io/etcd/client/v3 v3.5.8
go.uber.org/mock v0.2.0
golang.org/x/crypto v0.8.0 // indirect
golang.org/x/mod v0.11.0 // indirect
golang.org/x/net v0.9.0
golang.org/x/crypto v0.12.0 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/net v0.14.0
golang.org/x/oauth2 v0.7.0
golang.org/x/sys v0.8.0 // indirect
golang.org/x/term v0.8.0
golang.org/x/text v0.9.0
golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.11.0
golang.org/x/text v0.12.0
golang.org/x/time v0.3.0
golang.org/x/tools v0.8.0
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846
google.golang.org/api v0.121.0
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
google.golang.org/grpc v1.55.0-dev
Expand All @@ -107,7 +107,7 @@ require (
github.com/spf13/jwalterweatherman v1.1.0
github.com/xlab/treeprint v1.2.0
go.uber.org/goleak v1.2.1
golang.org/x/sync v0.1.0
golang.org/x/sync v0.3.0
modernc.org/sqlite v1.20.3
)

Expand Down
31 changes: 16 additions & 15 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ=
golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk=
golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
Expand Down Expand Up @@ -705,8 +705,8 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU=
golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand Down Expand Up @@ -759,8 +759,8 @@ golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand All @@ -784,8 +784,9 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down Expand Up @@ -856,14 +857,14 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand All @@ -875,8 +876,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc=
golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down Expand Up @@ -936,8 +937,8 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E=
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
212 changes: 5 additions & 207 deletions go/mysql/collations/coercion.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ package collations
import (
"fmt"
"unsafe"

"vitess.io/vitess/go/mysql/collations/charset"
)

func init() {
Expand Down Expand Up @@ -95,11 +93,6 @@ const (
RepertoireUnicode
)

// Coercion is a function that will transform either the given argument
// arguments of the function into a specific character set. The `dst` argument
// will be used as the destination of the coerced argument, but it can be nil.
type Coercion func(dst, in []byte) ([]byte, error)

// TypedCollation is the Collation of a SQL expression, including its coercibility
// and repertoire.
type TypedCollation struct {
Expand All @@ -112,208 +105,13 @@ func (tc TypedCollation) Valid() bool {
return tc.Collation != Unknown
}

func checkCompatibleCollations(
left Collation, leftCoercibility Coercibility, leftRepertoire Repertoire,
right Collation, rightCoercibility Coercibility, rightRepertoire Repertoire,
) bool {
leftCS := left.Charset()
rightCS := right.Charset()

switch leftCS.(type) {
case charset.Charset_utf8mb4:
if leftCoercibility <= rightCoercibility {
return true
}

case charset.Charset_utf32:
switch {
case leftCoercibility < rightCoercibility:
return true
case leftCoercibility == rightCoercibility:
if !charset.IsUnicode(rightCS) {
return true
}
if !left.IsBinary() {
return true
}
}

case charset.Charset_utf8mb3, charset.Charset_ucs2, charset.Charset_utf16, charset.Charset_utf16le:
switch {
case leftCoercibility < rightCoercibility:
return true
case leftCoercibility == rightCoercibility:
if !charset.IsUnicode(rightCS) {
return true
}
}
}

if rightRepertoire == RepertoireASCII {
switch {
case leftCoercibility < rightCoercibility:
return true
case leftCoercibility == rightCoercibility:
if leftRepertoire == RepertoireUnicode {
return true
}
}
}

return false
}

// CoercionOptions is used to configure how aggressive the algorithm can be
// when merging two different collations by transcoding them.
type CoercionOptions struct {
// ConvertToSuperset allows merging two different collations as long
// as the charset of one of them is a strict superset of the other. In
// order to operate on the two expressions, one of them will need to
// be transcoded. This transcoding will always be safe because the string
// with the smallest repertoire will be transcoded to its superset, which
// cannot fail.
ConvertToSuperset bool

// ConvertWithCoercion allows merging two different collations by forcing
// a coercion as long as the coercibility of the two sides is lax enough.
// This will force a transcoding of one of the expressions even if their
// respective charsets are not a strict superset, so the resulting transcoding
// CAN fail depending on the content of their strings.
ConvertWithCoercion bool
}

// MergeCollations returns a Coercion function for a pair of TypedCollation based
// on their coercibility.
//
// The function takes the typed collations for the two sides of a text operation
// (namely, a comparison or concatenation of two textual expressions). These typed
// collations includes the actual collation for the expression on each size, their
// coercibility values (see: Coercibility) and their respective repertoires,
// and returns the target collation (i.e. the collation into which the two expressions
// must be coerced, and a Coercion function. The Coercion function can be called repeatedly
// with the different values for the two expressions and will transcode either
// the left-hand or right-hand value to the appropriate charset so it can be
// collated against the other value.
//
// If the collations for both sides of the expressions are the same, the returned
// Coercion function will be a no-op. Likewise, if the two collations are not the same,
// but they are compatible and have the same charset, the Coercion function will also
// be a no-op.
//
// If the collations for both sides of the expression are not compatible, an error
// will be returned and the returned TypedCollation and Coercion will be nil.
func (env *Environment) MergeCollations(left, right TypedCollation, opt CoercionOptions) (TypedCollation, Coercion, Coercion, error) {
leftColl := left.Collation.Get()
rightColl := right.Collation.Get()
if leftColl == nil || rightColl == nil {
return TypedCollation{}, nil, nil, fmt.Errorf("unsupported TypeCollationID: %v / %v", left.Collation, right.Collation)
}

leftCS := leftColl.Charset()
rightCS := rightColl.Charset()

if left.Coercibility == CoerceExplicit && right.Coercibility == CoerceExplicit {
if left.Collation != right.Collation {
goto cannotCoerce
}
}

if leftCS.Name() == rightCS.Name() {
switch {
case left.Coercibility < right.Coercibility:
left.Repertoire |= right.Repertoire
return left, nil, nil, nil

case left.Coercibility > right.Coercibility:
right.Repertoire |= left.Repertoire
return right, nil, nil, nil

case left.Collation == right.Collation:
left.Repertoire |= right.Repertoire
return left, nil, nil, nil
}

if left.Coercibility == CoerceExplicit {
goto cannotCoerce
}

leftCsBin := leftColl.IsBinary()
rightCsBin := rightColl.IsBinary()

switch {
case leftCsBin && rightCsBin:
left.Coercibility = CoerceNone
return left, nil, nil, nil

case leftCsBin:
return left, nil, nil, nil

case rightCsBin:
return right, nil, nil, nil
}

defaults := env.byCharset[leftCS.Name()]
return TypedCollation{
Collation: defaults.Binary.ID(),
Coercibility: CoerceNone,
Repertoire: left.Repertoire | right.Repertoire,
}, nil, nil, nil
}

if _, leftIsBinary := leftColl.(*Collation_binary); leftIsBinary {
if left.Coercibility <= right.Coercibility {
return left, nil, nil, nil
}
goto coerceToRight
}
if _, rightIsBinary := rightColl.(*Collation_binary); rightIsBinary {
if left.Coercibility >= right.Coercibility {
return right, nil, nil, nil
}
goto coerceToLeft
}

if opt.ConvertToSuperset {
if checkCompatibleCollations(leftColl, left.Coercibility, left.Repertoire, rightColl, right.Coercibility, right.Repertoire) {
goto coerceToLeft
}
if checkCompatibleCollations(rightColl, right.Coercibility, right.Repertoire, leftColl, left.Coercibility, left.Repertoire) {
goto coerceToRight
}
}

if opt.ConvertWithCoercion {
if left.Coercibility < right.Coercibility && right.Coercibility > CoerceImplicit {
goto coerceToLeft
}
if right.Coercibility < left.Coercibility && left.Coercibility > CoerceImplicit {
goto coerceToRight
}
}

cannotCoerce:
return TypedCollation{}, nil, nil, fmt.Errorf("Illegal mix of collations (%s,%s) and (%s,%s)",
leftColl.Name(), left.Coercibility, rightColl.Name(), right.Coercibility)

coerceToLeft:
return left, nil,
func(dst, in []byte) ([]byte, error) {
return charset.Convert(dst, leftCS, in, rightCS)
}, nil

coerceToRight:
return right,
func(dst, in []byte) ([]byte, error) {
return charset.Convert(dst, rightCS, in, leftCS)
}, nil, nil
}

func (env *Environment) EnsureCollate(fromID, toID ID) error {
// these two lookups should never fail
from := fromID.Get()
to := toID.Get()
if from.Charset().Name() != to.Charset().Name() {
return fmt.Errorf("COLLATION '%s' is not valid for CHARACTER SET '%s'", to.Name(), from.Charset().Name())
fromCharsetName := env.LookupCharsetName(fromID)
toCharsetName := env.LookupCharsetName(toID)
if fromCharsetName != toCharsetName {
toCollName := env.LookupName(toID)
return fmt.Errorf("COLLATION '%s' is not valid for CHARACTER SET '%s'", toCollName, fromCharsetName)
}
return nil
}
Loading