diff --git a/pkg/sql/lex/encode.go b/pkg/sql/lex/encode.go index 27c8e7060d31..8d956ea74ea3 100644 --- a/pkg/sql/lex/encode.go +++ b/pkg/sql/lex/encode.go @@ -28,7 +28,6 @@ import ( "encoding/base64" "encoding/hex" "fmt" - "unicode" "unicode/utf8" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -130,24 +129,6 @@ func EncodeSQLStringWithFlags(buf *bytes.Buffer, in string, flags EncodeFlags) { } } -// EncodeSQLStringInsideArray writes a string literal to buf using the "string -// within array" formatting. -func EncodeSQLStringInsideArray(buf *bytes.Buffer, in string) { - buf.WriteByte('"') - // Loop through each unicode code point. - for i, r := range in { - ch := byte(r) - if unicode.IsPrint(r) && !stringencoding.NeedEscape(ch) && ch != '"' { - // Character is printable doesn't need escaping - just print it out. - buf.WriteRune(r) - } else { - stringencoding.EncodeEscapedChar(buf, in, r, ch, i, '"') - } - } - - buf.WriteByte('"') -} - // EncodeUnrestrictedSQLIdent writes the identifier in s to buf. // The identifier is only quoted if the flags don't tell otherwise and // the identifier contains special characters. diff --git a/pkg/sql/lex/encode_test.go b/pkg/sql/lex/encode_test.go index 9764dc36efd9..d06f543b8f7c 100644 --- a/pkg/sql/lex/encode_test.go +++ b/pkg/sql/lex/encode_test.go @@ -86,16 +86,9 @@ func testEncodeString(t *testing.T, input []byte, encode func(*bytes.Buffer, str func BenchmarkEncodeSQLString(b *testing.B) { str := strings.Repeat("foo", 10000) - b.Run("old version", func(b *testing.B) { - for i := 0; i < b.N; i++ { - lex.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lex.EncBareStrings) - } - }) - b.Run("new version", func(b *testing.B) { - for i := 0; i < b.N; i++ { - lex.EncodeSQLStringInsideArray(bytes.NewBuffer(nil), str) - } - }) + for i := 0; i < b.N; i++ { + lex.EncodeSQLStringWithFlags(bytes.NewBuffer(nil), str, lex.EncBareStrings) + } } func TestEncodeRestrictedSQLIdent(t *testing.T) { diff --git a/pkg/sql/logictest/testdata/logic_test/array b/pkg/sql/logictest/testdata/logic_test/array index 4270b725ac22..b331419c1171 100644 --- a/pkg/sql/logictest/testdata/logic_test/array +++ b/pkg/sql/logictest/testdata/logic_test/array @@ -76,7 +76,7 @@ SELECT ARRAY['one', 'two', 'fünf'] query T SELECT ARRAY[e'\n', e'g\x10h'] ---- -{"\n","g\x10h"} +{"\x0a","g\x10h"} query T SELECT ARRAY['foo', 'bar'] @@ -582,10 +582,10 @@ query T rowsort SELECT b FROM a ---- {} -{true} -{false} -{true,true} -{false,true} +{t} +{f} +{t,t} +{f,t} statement ok DROP TABLE a diff --git a/pkg/sql/logictest/testdata/logic_test/orms b/pkg/sql/logictest/testdata/logic_test/orms index 4c8af8849c45..8f6fa2758823 100644 --- a/pkg/sql/logictest/testdata/logic_test/orms +++ b/pkg/sql/logictest/testdata/logic_test/orms @@ -68,9 +68,9 @@ GROUP BY i.relname, ix.indkey ORDER BY i.relname ---- -name primary unique indkey column_indexes column_names definition -customers_id_idx false false 2 {1,2} {"name","id"} CREATE INDEX customers_id_idx ON test.public.customers (id ASC) -primary true true 1 {1,2} {"name","id"} CREATE UNIQUE INDEX "primary" ON test.public.customers (name ASC) +name primary unique indkey column_indexes column_names definition +customers_id_idx false false 2 {1,2} {name,id} CREATE INDEX customers_id_idx ON test.public.customers (id ASC) +primary true true 1 {1,2} {name,id} CREATE UNIQUE INDEX "primary" ON test.public.customers (name ASC) query TT colnames diff --git a/pkg/sql/logictest/testdata/logic_test/srfs b/pkg/sql/logictest/testdata/logic_test/srfs index 9dd0a87cd8fb..89400c076326 100644 --- a/pkg/sql/logictest/testdata/logic_test/srfs +++ b/pkg/sql/logictest/testdata/logic_test/srfs @@ -284,13 +284,13 @@ query TTT colnames SELECT 'a' AS a, pg_get_keywords(), 'c' AS c LIMIT 1 ---- a pg_get_keywords c -a ('abort','U','unreserved') c +a ("abort","U","unreserved") c query TTT colnames SELECT 'a' AS a, pg_get_keywords() AS b, 'c' AS c LIMIT 1 ---- a b c -a ('abort','U','unreserved') c +a ("abort","U","unreserved") c subtest unary_table @@ -387,7 +387,7 @@ query T colnames SELECT information_schema._pg_expandarray(ARRAY['a']) ---- information_schema._pg_expandarray -('a',1) +("a",1) query TI colnames SELECT * FROM information_schema._pg_expandarray(ARRAY['a']) @@ -399,8 +399,8 @@ query T colnames SELECT information_schema._pg_expandarray(ARRAY['b', 'a']) ---- information_schema._pg_expandarray -('b',1) -('a',2) +("b",1) +("a",2) query TI colnames SELECT * FROM information_schema._pg_expandarray(ARRAY['b', 'a']) @@ -413,9 +413,9 @@ query T colnames SELECT information_schema._pg_expandarray(ARRAY['c', 'b', 'a']) ---- information_schema._pg_expandarray -('c',1) -('b',2) -('a',3) +("c",1) +("b",2) +("a",3) query TI colnames SELECT * FROM information_schema._pg_expandarray(ARRAY['c', 'b', 'a']) @@ -540,9 +540,9 @@ x n query T SELECT ((i.keys).*, 123) FROM (SELECT information_schema._pg_expandarray(ARRAY[3,2,1]) AS keys) AS i ---- -((3, 1),123) -((2, 2),123) -((1, 3),123) +("(3,1)",123) +("(2,2)",123) +("(1,3)",123) subtest generate_subscripts diff --git a/pkg/sql/logictest/testdata/logic_test/tuple b/pkg/sql/logictest/testdata/logic_test/tuple index d85a91010991..b8da05829267 100644 --- a/pkg/sql/logictest/testdata/logic_test/tuple +++ b/pkg/sql/logictest/testdata/logic_test/tuple @@ -7,7 +7,7 @@ query TT colnames SELECT (1, 2, 'hello', NULL, NULL) AS t, (true, NULL, (false, 6.6, false)) AS u ---- t u -(1,2,'hello',,) (true,,(false, 6.6, false)) +(1,2,"hello",,) (t,,"(f,6.6,f)") query BBBBBBBBB colnames SELECT @@ -634,7 +634,7 @@ SELECT ((1, 2, 'hello', NULL, NULL) AS a1, b2, c3, d4, e5) AS r, ((true, NULL, (false, 6.6, false)) AS a1, b2, c3) AS s ---- r s -(1,2,'hello',,) (true,,(false, 6.6, false)) +(1,2,"hello",,) (t,,"(f,6.6,f)") # Comparing tuples query BBB colnames @@ -709,7 +709,7 @@ query T colnames SELECT ((((((1, '2', 3) AS a, b, c), ((4,'5') AS a, b), (ROW(6) AS a)) AS a, b, c), ((7, 8) AS a, b), (ROW('9') AS a)) AS a, b, c) AS r ---- r -(((1, '2', 3), (4, '5'), (6)),(7, 8),('9')) +("(""(1,""""2"""",3)"",""(4,""""5"""")"",""(6)"")","(7,8)","(""9"")") subtest labeled_tuple_column_access @@ -767,7 +767,7 @@ a b c query T SELECT (((ROW(1,'2',true) AS a,b,c)).*, 456) ---- -((1, '2', true),456) +("(1,""2"",t)",456) query I colnames SELECT ((ROW(1) AS a)).* diff --git a/pkg/sql/pgwire/types.go b/pkg/sql/pgwire/types.go index a176b4000e23..18f7804727db 100644 --- a/pkg/sql/pgwire/types.go +++ b/pkg/sql/pgwire/types.go @@ -79,12 +79,8 @@ func (b *writeBuffer) writeTextDatum( } switch v := tree.UnwrapDatum(nil, d).(type) { case *tree.DBool: - b.putInt32(1) - if *v { - b.writeByte('t') - } else { - b.writeByte('f') - } + b.textFormatter.FormatNode(v) + b.writeLengthPrefixedVariablePutbuf() case *tree.DInt: // Start at offset 4 because `putInt32` clobbers the first 4 bytes. @@ -157,40 +153,24 @@ func (b *writeBuffer) writeTextDatum( b.writeLengthPrefixedString(v.JSON.String()) case *tree.DTuple: - b.variablePutbuf.WriteString("(") - for i, d := range v.D { - if i > 0 { - b.variablePutbuf.WriteString(",") - } - if d == tree.DNull { - // Emit nothing on NULL. - continue - } - b.simpleFormatter.FormatNode(d) - } - b.variablePutbuf.WriteString(")") + b.textFormatter.FormatNode(v) b.writeLengthPrefixedVariablePutbuf() case *tree.DArray: - // Arrays are serialized as a string of comma-separated values, surrounded - // by braces. - begin, sep, end := "{", ",", "}" - switch d.ResolvedType().Oid() { case oid.T_int2vector, oid.T_oidvector: // vectors are serialized as a string of space-separated values. - begin, sep, end = "", " ", "" - } - - b.variablePutbuf.WriteString(begin) - for i, d := range v.Array { - if i > 0 { + sep := "" + // TODO(justin): add a test for nested arrays. + for _, d := range v.Array { b.variablePutbuf.WriteString(sep) + b.textFormatter.FormatNode(d) + sep = " " } - // TODO(justin): add a test for nested arrays. - b.arrayFormatter.FormatNode(d) + default: + // Uses the default pgwire text format for arrays. + b.textFormatter.FormatNode(v) } - b.variablePutbuf.WriteString(end) b.writeLengthPrefixedVariablePutbuf() case *tree.DOid: diff --git a/pkg/sql/pgwire/write_buffer.go b/pkg/sql/pgwire/write_buffer.go index 4ed814d3d23e..b2a88675880a 100644 --- a/pkg/sql/pgwire/write_buffer.go +++ b/pkg/sql/pgwire/write_buffer.go @@ -41,10 +41,9 @@ type writeBuffer struct { // We keep both of these because there are operations that are only possible to // perform (efficiently) with one or the other, such as strconv.AppendInt with // putbuf or Datum.Format with variablePutbuf. - putbuf [64]byte - variablePutbuf bytes.Buffer - simpleFormatter tree.FmtCtx - arrayFormatter tree.FmtCtx + putbuf [64]byte + variablePutbuf bytes.Buffer + textFormatter tree.FmtCtx // bytecount counts the number of bytes written across all pgwire connections, not just this // buffer. This is passed in so that finishMsg can track all messages we've sent to a network @@ -56,8 +55,7 @@ func newWriteBuffer(bytecount *metric.Counter) *writeBuffer { b := &writeBuffer{ bytecount: bytecount, } - b.simpleFormatter = tree.MakeFmtCtx(&b.variablePutbuf, tree.FmtSimple) - b.arrayFormatter = tree.MakeFmtCtx(&b.variablePutbuf, tree.FmtArrays) + b.textFormatter = tree.MakeFmtCtx(&b.variablePutbuf, tree.FmtPgwireText) return b } diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 651a7ba6672b..9335d3895612 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -409,6 +409,14 @@ func (*DBool) AmbiguousFormat() bool { return false } // Format implements the NodeFormatter interface. func (d *DBool) Format(ctx *FmtCtx) { + if ctx.HasFlags(fmtPgwireFormat) { + if bool(*d) { + ctx.WriteByte('t') + } else { + ctx.WriteByte('f') + } + return + } ctx.WriteString(strconv.FormatBool(bool(*d))) } @@ -958,8 +966,6 @@ func (d *DString) Format(ctx *FmtCtx) { buf, f := ctx.Buffer, ctx.flags if f.HasFlags(fmtUnicodeStrings) { buf.WriteString(string(*d)) - } else if f.HasFlags(fmtWithinArray) { - lex.EncodeSQLStringInsideArray(buf, string(*d)) } else { lex.EncodeSQLStringWithFlags(buf, string(*d), f.EncodeFlags()) } @@ -1026,14 +1032,9 @@ func (*DCollatedString) AmbiguousFormat() bool { return false } // Format implements the NodeFormatter interface. func (d *DCollatedString) Format(ctx *FmtCtx) { - buf, f := ctx.Buffer, ctx.flags - if f.HasFlags(fmtWithinArray) { - lex.EncodeSQLStringInsideArray(buf, d.Contents) - } else { - lex.EncodeSQLString(buf, d.Contents) - ctx.WriteString(" COLLATE ") - lex.EncodeUnrestrictedSQLIdent(buf, d.Locale, lex.EncNoFlags) - } + lex.EncodeSQLString(ctx.Buffer, d.Contents) + ctx.WriteString(" COLLATE ") + lex.EncodeUnrestrictedSQLIdent(ctx.Buffer, d.Locale, lex.EncNoFlags) } // ResolvedType implements the TypedExpr interface. @@ -1192,7 +1193,7 @@ func writeAsHexString(ctx *FmtCtx, d *DBytes) { // Format implements the NodeFormatter interface. func (d *DBytes) Format(ctx *FmtCtx) { f := ctx.flags - if f.HasFlags(fmtWithinArray) { + if f.HasFlags(fmtPgwireFormat) { ctx.WriteString(`"\\x`) writeAsHexString(ctx, d) ctx.WriteString(`"`) @@ -2718,14 +2719,24 @@ func (d *DTuple) IsMin(ctx *EvalContext) bool { func (*DTuple) AmbiguousFormat() bool { return false } // Format implements the NodeFormatter interface. -// TODO(bram): We don't format tuples in the same way as postgres. See #25522. // TODO(knz): this is broken if the tuple is labeled. See #26624. func (d *DTuple) Format(ctx *FmtCtx) { - if ctx.HasFlags(FmtParsable) && (len(d.D) == 0) { - ctx.WriteString("ROW()") + if ctx.HasFlags(fmtPgwireFormat) { + d.pgwireFormat(ctx) return } - ctx.FormatNode(&d.D) + + if ctx.HasFlags(FmtParsable) && (len(d.D) == 0) { + ctx.WriteString("ROW") + } + ctx.WriteByte('(') + comma := "" + for _, v := range d.D { + ctx.WriteString(comma) + ctx.FormatNode(v) + comma = ", " + } + ctx.WriteByte(')') } // Sorted returns true if the tuple is known to be sorted (and contains no @@ -2879,6 +2890,11 @@ func (dNull) AmbiguousFormat() bool { return false } // Format implements the NodeFormatter interface. func (dNull) Format(ctx *FmtCtx) { + if ctx.HasFlags(fmtPgwireFormat) { + // NULL sub-expressions in pgwire text values are represented with + // the empty string. + return + } ctx.WriteString("NULL") } @@ -3002,12 +3018,17 @@ func (d *DArray) AmbiguousFormat() bool { // Format implements the NodeFormatter interface. func (d *DArray) Format(ctx *FmtCtx) { + if ctx.HasFlags(fmtPgwireFormat) { + d.pgwireFormat(ctx) + return + } + ctx.WriteString("ARRAY[") - for i, v := range d.Array { - if i > 0 { - ctx.WriteString(",") - } + comma := "" + for _, v := range d.Array { + ctx.WriteString(comma) ctx.FormatNode(v) + comma = "," } ctx.WriteByte(']') } diff --git a/pkg/sql/sem/tree/format.go b/pkg/sql/sem/tree/format.go index 184f15412128..33abb480fbb2 100644 --- a/pkg/sql/sem/tree/format.go +++ b/pkg/sql/sem/tree/format.go @@ -82,9 +82,10 @@ const ( // using numeric notation (@S123). FmtSymbolicSubqueries - // If set, strings will be formatted for being contents of ARRAYs. - // Used internally in combination with FmtArrays defined below. - fmtWithinArray + // If set, strings will be formatted using the postgres datum-to-text + // conversion. See comments in pgwire_encode.go. + // Used internally in combination with FmtPgwireText defined below. + fmtPgwireFormat // If set, datums and placeholders will have type annotations (like // :::interval) as necessary to disambiguate between possible type @@ -113,9 +114,10 @@ const ( // identifiers without wrapping quotes in any case. FmtBareIdentifiers FmtFlags = FmtFlags(lex.EncBareIdentifiers) - // FmtArrays instructs the pretty-printer to print strings without - // wrapping quotes, if the string contains no special characters. - FmtArrays FmtFlags = fmtWithinArray | FmtFlags(lex.EncBareStrings) + // FmtPgwireText instructs the pretty-printer to use + // a pg-compatible conversion to strings. See comments + // in pgwire_encode.go. + FmtPgwireText FmtFlags = fmtPgwireFormat | FmtFlags(lex.EncBareStrings) // FmtParsable instructs the pretty-printer to produce a representation that // can be parsed into an equivalent expression (useful for serialization of diff --git a/pkg/sql/sem/tree/format_test.go b/pkg/sql/sem/tree/format_test.go index 2d9465d00e81..7547cede9645 100644 --- a/pkg/sql/sem/tree/format_test.go +++ b/pkg/sql/sem/tree/format_test.go @@ -302,6 +302,74 @@ func TestFormatExpr2(t *testing.T) { } } +func TestFormatPgwireText(t *testing.T) { + testData := []struct { + expr string + expected string + }{ + {`true`, `t`}, + {`false`, `f`}, + {`ROW(1)`, `(1)`}, + {`ROW(1, NULL)`, `(1,)`}, + {`ROW(1, true, 3)`, `(1,t,3)`}, + {`ROW(1, (2, 3))`, `(1,"(2,3)")`}, + {`ROW(1, (2, 'a b'))`, `(1,"(2,""a b"")")`}, + {`ROW(1, (2, 'a"b'))`, `(1,"(2,""a""""b"")")`}, + {`ROW(1, 2, ARRAY[1,2,3])`, `(1,2,"{1,2,3}")`}, + {`ROW(1, 2, ARRAY[1,NULL,3])`, `(1,2,"{1,NULL,3}")`}, + {`ROW(1, 2, ARRAY['a','b','c'])`, `(1,2,"{""a"",""b"",""c""}")`}, + {`ROW(1, 2, ARRAY[true,false,true])`, `(1,2,"{t,f,t}")`}, + {`ARRAY[(1,2),(3,4)]`, `{"(1,2)","(3,4)"}`}, + {`ARRAY[(false,'a'),(true,'b')]`, `{"(f,\"a\")","(t,\"b\")"}`}, + {`ARRAY[(1,ARRAY[2,NULL])]`, `{"(1,\"{2,NULL}\")"}`}, + {`ARRAY[(1,(1,2)),(2,(3,4))]`, `{"(1,\"(1,2)\")","(2,\"(3,4)\")"}`}, + + {`(((1, 'a b', 3), (4, 'c d'), ROW(6)), (7, 8), ROW('e f'))`, + `("(""(1,""""a b"""",3)"",""(4,""""c d"""")"",""(6)"")","(7,8)","(""e f"")")`}, + + {`(((1, '2', 3), (4, '5'), ROW(6)), (7, 8), ROW('9'))`, + // TODO(knz): if/when we change the sub-string formatter + // to omit double quotes when not needed, the reference results + // needs to become: + // ("(""(1,2,3)"",""(4,5)"",""(6)"")","(7,8)","(9)") + `("(""(1,""""2"""",3)"",""(4,""""5"""")"",""(6)"")","(7,8)","(""9"")")`}, + + {`ARRAY[('a b',ARRAY['c d','e f']), ('g h',ARRAY['i j','k l'])]`, + `{"(\"a b\",\"{\"\"c d\"\",\"\"e f\"\"}\")","(\"g h\",\"{\"\"i j\"\",\"\"k l\"\"}\")"}`}, + + {`ARRAY[('1',ARRAY['2','3']), ('4',ARRAY['5','6'])]`, + // TODO(knz): if/when we change the sub-string formatter + // to omit double quotes when not needed, the reference results + // needs to become: + // {"(1,\"{2,3}\")","(4,\"{5,6}\")"} + `{"(\"1\",\"{\"\"2\"\",\"\"3\"\"}\")","(\"4\",\"{\"\"5\"\",\"\"6\"\"}\")"}`}, + + {`ARRAY[e'\U00002001☃']`, `{" ☃"}`}, + } + var evalCtx tree.EvalContext + for i, test := range testData { + t.Run(fmt.Sprintf("%d %s", i, test.expr), func(t *testing.T) { + expr, err := parser.ParseExpr(test.expr) + if err != nil { + t.Fatal(err) + } + ctx := tree.MakeSemaContext(false) + typeChecked, err := tree.TypeCheck(expr, &ctx, types.Any) + if err != nil { + t.Fatal(err) + } + typeChecked, err = evalCtx.NormalizeExpr(typeChecked) + if err != nil { + t.Fatal(err) + } + exprStr := tree.AsStringWithFlags(typeChecked, tree.FmtPgwireText) + if exprStr != test.expected { + t.Fatalf("expected %s, got %s", test.expected, exprStr) + } + }) + } +} + // BenchmarkFormatRandomStatements measures the time needed to format // 1000 random statements. func BenchmarkFormatRandomStatements(b *testing.B) { diff --git a/pkg/sql/sem/tree/pgwire_encode.go b/pkg/sql/sem/tree/pgwire_encode.go new file mode 100644 index 000000000000..6a7a67d1af41 --- /dev/null +++ b/pkg/sql/sem/tree/pgwire_encode.go @@ -0,0 +1,128 @@ +// Copyright 2018 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package tree + +import ( + "bytes" + "unicode" + + "github.com/cockroachdb/cockroach/pkg/util/stringencoding" +) + +func (d *DTuple) pgwireFormat(ctx *FmtCtx) { + // When converting a tuple to text in "postgres mode" there is + // special behavior: values are printed in "postgres mode" then the + // result string itself is rendered in "postgres mode". + // Immediate NULL tuple elements are printed as the empty string. + // + // In this last conversion, for *tuples* the special double quote + // and backslash characters are *doubled* (not escaped). Other + // special characters from C like \t \n etc are not escaped and + // instead printed as-is. Only non-valid characters get escaped to + // hex. So we delegate this formatting to a tuple-specific + // string printer called pgwireFormatStringInTuple(). + ctx.WriteByte('(') + comma := "" + for _, v := range d.D { + ctx.WriteString(comma) + switch dv := v.(type) { + case *DTuple, *DArray: + s := AsStringWithFlags(v, ctx.flags) + pgwireFormatStringInTuple(ctx.Buffer, s) + case *DString: + pgwireFormatStringInTuple(ctx.Buffer, string(*dv)) + case *DCollatedString: + pgwireFormatStringInTuple(ctx.Buffer, dv.Contents) + default: + ctx.FormatNode(v) + } + comma = "," + } + ctx.WriteByte(')') +} + +func pgwireFormatStringInTuple(buf *bytes.Buffer, in string) { + // TODO(knz): to be fully pg-compliant, this function should avoid + // enclosing the string in double quotes if there is no special + // character inside. + buf.WriteByte('"') + // Loop through each unicode code point. + for i, r := range in { + if r == '"' || r == '\\' { + // Strings in tuples double " and \. + buf.WriteByte(byte(r)) + buf.WriteByte(byte(r)) + } else if unicode.IsGraphic(r) { + buf.WriteRune(r) + } else { + stringencoding.EncodeChar(buf, in, r, i) + } + } + buf.WriteByte('"') +} + +func (d *DArray) pgwireFormat(ctx *FmtCtx) { + // When converting an array to text in "postgres mode" there is + // special behavior: values are printed in "postgres mode" then the + // result string itself is rendered in "postgres mode". + // Immediate NULL array elements are printed as "NULL". + // + // In this last conversion, for *arrays* the special double quote + // and backslash characters are *escaped* (not doubled). Other + // special characters from C like \t \n etc are not escaped and + // instead printed as-is. Only non-valid characters get escaped to + // hex. So we delegate this formatting to a tuple-specific + // string printer called pgwireFormatStringInArray(). + ctx.WriteByte('{') + comma := "" + for _, v := range d.Array { + ctx.WriteString(comma) + switch dv := v.(type) { + case dNull: + ctx.WriteString("NULL") + case *DTuple, *DArray: + s := AsStringWithFlags(v, ctx.flags) + pgwireFormatStringInArray(ctx.Buffer, s) + case *DString: + pgwireFormatStringInArray(ctx.Buffer, string(*dv)) + case *DCollatedString: + pgwireFormatStringInArray(ctx.Buffer, dv.Contents) + default: + ctx.FormatNode(v) + } + comma = "," + } + ctx.WriteByte('}') +} + +func pgwireFormatStringInArray(buf *bytes.Buffer, in string) { + // TODO(knz): to be fully pg-compliant, this function should avoid + // enclosing the string in double quotes if there is no special + // character inside. + buf.WriteByte('"') + // Loop through each unicode code point. + for i, r := range in { + if r == '"' || r == '\\' { + // Strings in arrays escape " and \. + buf.WriteByte('\\') + buf.WriteByte(byte(r)) + } else if unicode.IsGraphic(r) { + buf.WriteRune(r) + } else { + stringencoding.EncodeChar(buf, in, r, i) + } + } + buf.WriteByte('"') +} diff --git a/pkg/util/stringencoding/string_encoding.go b/pkg/util/stringencoding/string_encoding.go index 9374729914eb..fe93799e6a5d 100644 --- a/pkg/util/stringencoding/string_encoding.go +++ b/pkg/util/stringencoding/string_encoding.go @@ -81,6 +81,28 @@ func init() { } } +// EncodeChar is used internally to write out a character from +// a larger string to a buffer. +func EncodeChar(buf *bytes.Buffer, entireString string, currentRune rune, currentIdx int) { + ln := utf8.RuneLen(currentRune) + if currentRune == utf8.RuneError { + // Errors are due to invalid unicode points, so escape the bytes. + // Make sure this is run at least once in case ln == -1. + buf.Write(HexMap[entireString[currentIdx]]) + for ri := 1; ri < ln; ri++ { + buf.Write(HexMap[entireString[currentIdx+ri]]) + } + } else if ln == 1 { + // Escape non-printable characters. + buf.Write(HexMap[byte(currentRune)]) + } else if ln == 2 { + // For multi-byte runes, print them based on their width. + fmt.Fprintf(buf, `\u%04X`, currentRune) + } else { + fmt.Fprintf(buf, `\U%08X`, currentRune) + } +} + // EncodeEscapedChar is used internally to write out a character from a larger // string that needs to be escaped to a buffer. func EncodeEscapedChar(