From 4a1a6f76c79b2e78a913b032e46b1a4d21d13a45 Mon Sep 17 00:00:00 2001 From: Solon Gordon Date: Tue, 13 Oct 2020 07:48:50 -0400 Subject: [PATCH] sql: anonymize enum values ENUM values were improperly being represented as strings in the AST, which meant that they were not anonymized in telemetry. I created a new AST node type for them which respects anonymization. Fixes #55385 Release note: None --- pkg/sql/alter_type.go | 4 +-- pkg/sql/catalog/typedesc/type_desc.go | 4 +-- pkg/sql/crdb_internal.go | 4 +-- pkg/sql/create_type.go | 4 +-- pkg/sql/parser/sql.y | 25 ++++++++++-------- pkg/sql/rowenc/testutils.go | 4 +-- pkg/sql/sem/tree/alter_type.go | 18 ++++++------- pkg/sql/sem/tree/create.go | 37 +++++++++++++++++++++------ pkg/sql/sem/tree/format_test.go | 8 ++++++ 9 files changed, 69 insertions(+), 39 deletions(-) diff --git a/pkg/sql/alter_type.go b/pkg/sql/alter_type.go index fcf0387aee2e..663eba4a7786 100644 --- a/pkg/sql/alter_type.go +++ b/pkg/sql/alter_type.go @@ -71,7 +71,7 @@ func (n *alterTypeNode) startExec(params runParams) error { case *tree.AlterTypeAddValue: err = params.p.addEnumValue(params.ctx, n, t) case *tree.AlterTypeRenameValue: - err = params.p.renameTypeValue(params.ctx, n, t.OldVal, t.NewVal) + err = params.p.renameTypeValue(params.ctx, n, string(t.OldVal), string(t.NewVal)) case *tree.AlterTypeRename: err = params.p.renameType(params.ctx, n, string(t.NewName)) case *tree.AlterTypeSetSchema: @@ -114,7 +114,7 @@ func (p *planner) addEnumValue( } // See if the value already exists in the enum or not. for _, member := range n.desc.EnumMembers { - if member.LogicalRepresentation == node.NewVal { + if member.LogicalRepresentation == string(node.NewVal) { if node.IfNotExists { p.BufferClientNotice( ctx, diff --git a/pkg/sql/catalog/typedesc/type_desc.go b/pkg/sql/catalog/typedesc/type_desc.go index b61f4a61949e..0a9fc09b4db4 100644 --- a/pkg/sql/catalog/typedesc/type_desc.go +++ b/pkg/sql/catalog/typedesc/type_desc.go @@ -301,7 +301,7 @@ func (desc *Mutable) AddEnumValue(node *tree.AlterTypeAddValue) error { // If the value was requested to be added before or after an existing // value, then find the index of where it should be inserted. foundIndex := -1 - existing := node.Placement.ExistingVal + existing := string(node.Placement.ExistingVal) for i, member := range desc.EnumMembers { if member.LogicalRepresentation == existing { foundIndex = i @@ -324,7 +324,7 @@ func (desc *Mutable) AddEnumValue(node *tree.AlterTypeAddValue) error { // how to decode the physical representation. newPhysicalRep := enum.GenByteStringBetween(getPhysicalRep(pos), getPhysicalRep(pos+1), enum.SpreadSpacing) newMember := descpb.TypeDescriptor_EnumMember{ - LogicalRepresentation: node.NewVal, + LogicalRepresentation: string(node.NewVal), PhysicalRepresentation: newPhysicalRep, Capability: descpb.TypeDescriptor_EnumMember_READ_ONLY, } diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index 81c5fadc2d05..8f6261af5caf 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -1668,11 +1668,11 @@ CREATE TABLE crdb_internal.create_type_statements ( return forEachTypeDesc(ctx, p, db, func(db *dbdesc.Immutable, sc string, typeDesc *typedesc.Immutable) error { switch typeDesc.Kind { case descpb.TypeDescriptor_ENUM: - var enumLabels []string + var enumLabels tree.EnumValueList enumLabelsDatum := tree.NewDArray(types.String) for i := range typeDesc.EnumMembers { rep := typeDesc.EnumMembers[i].LogicalRepresentation - enumLabels = append(enumLabels, rep) + enumLabels = append(enumLabels, tree.EnumValue(rep)) if err := enumLabelsDatum.Append(tree.NewDString(rep)); err != nil { return err } diff --git a/pkg/sql/create_type.go b/pkg/sql/create_type.go index 514ed66565da..790ea16a74e0 100644 --- a/pkg/sql/create_type.go +++ b/pkg/sql/create_type.go @@ -238,7 +238,7 @@ func (p *planner) createEnum(params runParams, n *tree.CreateType) error { sqltelemetry.IncrementEnumCounter(sqltelemetry.EnumCreate) // Ensure there are no duplicates in the input enum values. - seenVals := make(map[string]struct{}) + seenVals := make(map[tree.EnumValue]struct{}) for _, value := range n.EnumLabels { _, ok := seenVals[value] if ok { @@ -265,7 +265,7 @@ func (p *planner) createEnum(params runParams, n *tree.CreateType) error { physReps := enum.GenerateNEvenlySpacedBytes(len(n.EnumLabels)) for i := range n.EnumLabels { members[i] = descpb.TypeDescriptor_EnumMember{ - LogicalRepresentation: n.EnumLabels[i], + LogicalRepresentation: string(n.EnumLabels[i]), PhysicalRepresentation: physReps[i], Capability: descpb.TypeDescriptor_EnumMember_ALL, } diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index ffe29713ef4d..5f574dbbb4ea 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -156,6 +156,9 @@ func (u *sqlSymUnion) shardedIndexDef() *tree.ShardedIndexDef { func (u *sqlSymUnion) nameList() tree.NameList { return u.val.(tree.NameList) } +func (u *sqlSymUnion) enumValueList() tree.EnumValueList { + return u.val.(tree.EnumValueList) +} func (u *sqlSymUnion) unresolvedName() *tree.UnresolvedName { return u.val.(*tree.UnresolvedName) } @@ -2011,7 +2014,7 @@ alter_type_stmt: $$.val = &tree.AlterType{ Type: $3.unresolvedObjectName(), Cmd: &tree.AlterTypeAddValue{ - NewVal: $6, + NewVal: tree.EnumValue($6), IfNotExists: false, Placement: $7.alterTypeAddValuePlacement(), }, @@ -2022,7 +2025,7 @@ alter_type_stmt: $$.val = &tree.AlterType{ Type: $3.unresolvedObjectName(), Cmd: &tree.AlterTypeAddValue{ - NewVal: $9, + NewVal: tree.EnumValue($9), IfNotExists: true, Placement: $10.alterTypeAddValuePlacement(), }, @@ -2033,8 +2036,8 @@ alter_type_stmt: $$.val = &tree.AlterType{ Type: $3.unresolvedObjectName(), Cmd: &tree.AlterTypeRenameValue{ - OldVal: $6, - NewVal: $8, + OldVal: tree.EnumValue($6), + NewVal: tree.EnumValue($8), }, } } @@ -2080,14 +2083,14 @@ opt_add_val_placement: { $$.val = &tree.AlterTypeAddValuePlacement{ Before: true, - ExistingVal: $2, + ExistingVal: tree.EnumValue($2), } } | AFTER SCONST { $$.val = &tree.AlterTypeAddValuePlacement{ Before: false, - ExistingVal: $2, + ExistingVal: tree.EnumValue($2), } } | /* EMPTY */ @@ -6610,7 +6613,7 @@ create_type_stmt: $$.val = &tree.CreateType{ TypeName: $3.unresolvedObjectName(), Variety: tree.Enum, - EnumLabels: $7.strs(), + EnumLabels: $7.enumValueList(), } } | CREATE TYPE error // SHOW HELP: CREATE TYPE @@ -6628,21 +6631,21 @@ create_type_stmt: opt_enum_val_list: enum_val_list { - $$.val = $1.strs() + $$.val = $1.enumValueList() } | /* EMPTY */ { - $$.val = []string(nil) + $$.val = tree.EnumValueList(nil) } enum_val_list: SCONST { - $$.val = []string{$1} + $$.val = tree.EnumValueList{tree.EnumValue($1)} } | enum_val_list ',' SCONST { - $$.val = append($1.strs(), $3) + $$.val = append($1.enumValueList(), tree.EnumValue($3)) } // %Help: CREATE INDEX - create a new index diff --git a/pkg/sql/rowenc/testutils.go b/pkg/sql/rowenc/testutils.go index e5084b4379e9..aa18b59539b8 100644 --- a/pkg/sql/rowenc/testutils.go +++ b/pkg/sql/rowenc/testutils.go @@ -1700,13 +1700,13 @@ func RandString(rng *rand.Rand, length int, alphabet string) string { // be random strings generated from alphabet. func RandCreateType(rng *rand.Rand, name, alphabet string) tree.Statement { numLabels := rng.Intn(6) + 1 - labels := make([]string, numLabels) + labels := make(tree.EnumValueList, numLabels) labelsMap := make(map[string]struct{}) i := 0 for i < numLabels { s := RandString(rng, rng.Intn(6)+1, alphabet) if _, ok := labelsMap[s]; !ok { - labels[i] = s + labels[i] = tree.EnumValue(s) labelsMap[s] = struct{}{} i++ } diff --git a/pkg/sql/sem/tree/alter_type.go b/pkg/sql/sem/tree/alter_type.go index 8cfecff1cb47..c022843cb0c3 100644 --- a/pkg/sql/sem/tree/alter_type.go +++ b/pkg/sql/sem/tree/alter_type.go @@ -12,7 +12,6 @@ package tree import ( "github.com/cockroachdb/cockroach/pkg/server/telemetry" - "github.com/cockroachdb/cockroach/pkg/sql/lex" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" ) @@ -52,7 +51,7 @@ var _ AlterTypeCmd = &AlterTypeOwner{} // AlterTypeAddValue represents an ALTER TYPE ADD VALUE command. type AlterTypeAddValue struct { - NewVal string + NewVal EnumValue IfNotExists bool Placement *AlterTypeAddValuePlacement } @@ -63,14 +62,14 @@ func (node *AlterTypeAddValue) Format(ctx *FmtCtx) { if node.IfNotExists { ctx.WriteString("IF NOT EXISTS ") } - lex.EncodeSQLString(&ctx.Buffer, node.NewVal) + ctx.FormatNode(&node.NewVal) if node.Placement != nil { if node.Placement.Before { ctx.WriteString(" BEFORE ") } else { ctx.WriteString(" AFTER ") } - lex.EncodeSQLString(&ctx.Buffer, node.Placement.ExistingVal) + ctx.FormatNode(&node.Placement.ExistingVal) } } @@ -83,22 +82,21 @@ func (node *AlterTypeAddValue) TelemetryCounter() telemetry.Counter { // TYPE ADD VALUE command ([BEFORE | AFTER] value). type AlterTypeAddValuePlacement struct { Before bool - ExistingVal string + ExistingVal EnumValue } // AlterTypeRenameValue represents an ALTER TYPE RENAME VALUE command. type AlterTypeRenameValue struct { - // TODO? - OldVal string - NewVal string + OldVal EnumValue + NewVal EnumValue } // Format implements the NodeFormatter interface. func (node *AlterTypeRenameValue) Format(ctx *FmtCtx) { ctx.WriteString(" RENAME VALUE ") - lex.EncodeSQLString(&ctx.Buffer, node.OldVal) + ctx.FormatNode(&node.OldVal) ctx.WriteString(" TO ") - lex.EncodeSQLString(&ctx.Buffer, node.NewVal) + ctx.FormatNode(&node.NewVal) } // TelemetryCounter implements the AlterTypeCmd interface. diff --git a/pkg/sql/sem/tree/create.go b/pkg/sql/sem/tree/create.go index 7d423443148f..ac11be23e0fe 100644 --- a/pkg/sql/sem/tree/create.go +++ b/pkg/sql/sem/tree/create.go @@ -261,12 +261,38 @@ const ( Domain ) +// EnumValue represents a single enum value. +type EnumValue string + +// Format implements the NodeFormatter interface. +func (n *EnumValue) Format(ctx *FmtCtx) { + f := ctx.flags + if f.HasFlags(FmtAnonymize) { + ctx.WriteByte('_') + } else { + lex.EncodeSQLString(&ctx.Buffer, string(*n)) + } +} + +// EnumValueList represents a list of enum values. +type EnumValueList []EnumValue + +// Format implements the NodeFormatter interface. +func (l *EnumValueList) Format(ctx *FmtCtx) { + for i := range *l { + if i > 0 { + ctx.WriteString(", ") + } + ctx.FormatNode(&(*l)[i]) + } +} + // CreateType represents a CREATE TYPE statement. type CreateType struct { TypeName *UnresolvedObjectName Variety CreateTypeVariety // EnumLabels is set when this represents a CREATE TYPE ... AS ENUM statement. - EnumLabels []string + EnumLabels EnumValueList } var _ Statement = &CreateType{} @@ -274,17 +300,12 @@ var _ Statement = &CreateType{} // Format implements the NodeFormatter interface. func (node *CreateType) Format(ctx *FmtCtx) { ctx.WriteString("CREATE TYPE ") - ctx.WriteString(node.TypeName.String()) + ctx.FormatNode(node.TypeName) ctx.WriteString(" ") switch node.Variety { case Enum: ctx.WriteString("AS ENUM (") - for i := range node.EnumLabels { - if i > 0 { - ctx.WriteString(", ") - } - lex.EncodeSQLString(&ctx.Buffer, node.EnumLabels[i]) - } + ctx.FormatNode(&node.EnumLabels) ctx.WriteString(")") } } diff --git a/pkg/sql/sem/tree/format_test.go b/pkg/sql/sem/tree/format_test.go index b8dcf9a18a39..2f2ffc06f3d3 100644 --- a/pkg/sql/sem/tree/format_test.go +++ b/pkg/sql/sem/tree/format_test.go @@ -116,6 +116,14 @@ func TestFormatStatement(t *testing.T) { `ALTER DATABASE _ OWNER TO _`}, {`ALTER SCHEMA s OWNER TO o`, tree.FmtAnonymize, `ALTER SCHEMA _ OWNER TO _`}, + + // Test ENUM anonymization. + {`CREATE TYPE a AS ENUM ('a', 'b', 'c')`, tree.FmtAnonymize, + `CREATE TYPE _ AS ENUM (_, _, _)`}, + {`ALTER TYPE a ADD VALUE 'hi' BEFORE 'hello'`, tree.FmtAnonymize, + `ALTER TYPE _ ADD VALUE _ BEFORE _`}, + {`ALTER TYPE a RENAME VALUE 'value1' TO 'value2'`, tree.FmtAnonymize, + `ALTER TYPE _ RENAME VALUE _ TO _`}, } for i, test := range testData {