diff --git a/pkg/sql/copy.go b/pkg/sql/copy.go index b4e80d967652..8446076899fb 100644 --- a/pkg/sql/copy.go +++ b/pkg/sql/copy.go @@ -514,15 +514,42 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er return false, err } } - // At this point, we know fullLine ends in '\n'. Keep track of the total - // number of QUOTE chars in fullLine -- if it is even, then it means that - // the quotes are balanced and '\n' is not in a quoted field. - // Currently, the QUOTE char and ESCAPE char are both always equal to '"' - // and are not configurable. As per the COPY spec, any appearance of the - // QUOTE or ESCAPE characters in an actual value must be preceded by an - // ESCAPE character. This means that an escaped '"' also results in an even - // number of '"' characters. - quoteCharsSeen += bytes.Count(line, []byte{'"'}) + + // Now we need to calculate if we are have reached the end of the quote. + // If so, break out. + if c.csvEscape == 0 { + // CSV escape is not specified and hence defaults to '"'.¥ + // At this point, we know fullLine ends in '\n'. Keep track of the total + // number of QUOTE chars in fullLine -- if it is even, then it means that + // the quotes are balanced and '\n' is not in a quoted field. + // Currently, the QUOTE char and ESCAPE char are both always equal to '"' + // and are not configurable. As per the COPY spec, any appearance of the + // QUOTE or ESCAPE characters in an actual value must be preceded by an + // ESCAPE character. This means that an escaped '"' also results in an even + // number of '"' characters. + // This branch is kept in the interests of "backporting safely" - this + // was the old code. Users who use COPY ... ESCAPE will be the only + // ones hitting the new code below. + quoteCharsSeen += bytes.Count(line, []byte{'"'}) + } else { + // Otherwise, we have to do a manual count of double quotes and + // ignore any escape characters preceding quotes for counting. + // For example, if the escape character is '\', we should ignore + // the intermediate quotes in a string such as `"start"\"\"end"`. + skipNextChar := false + for _, ch := range line { + if skipNextChar { + skipNextChar = false + continue + } + if ch == '"' { + quoteCharsSeen++ + } + if rune(ch) == c.csvEscape { + skipNextChar = true + } + } + } if quoteCharsSeen%2 == 0 { break } diff --git a/pkg/sql/pgwire/testdata/pgtest/copy b/pkg/sql/pgwire/testdata/pgtest/copy index 2a811f627a6e..51e2ee03784a 100644 --- a/pkg/sql/pgwire/testdata/pgtest/copy +++ b/pkg/sql/pgwire/testdata/pgtest/copy @@ -473,7 +473,8 @@ send Query {"String": "DELETE FROM t"} Query {"String": "COPY t FROM STDIN CSV ESCAPE 'x'"} CopyData {"Data": "1,\"x\"\"\n"} -CopyData {"Data": "1,\"xxx\",xx\"\n"} +CopyData {"Data": "2,\"xxx\",\"\n"} +CopyData {"Data": "3,\"xxx\",xx\"\n"} CopyData {"Data": "\\.\n"} CopyDone Query {"String": "SELECT * FROM t ORDER BY i"} @@ -487,11 +488,12 @@ ReadyForQuery {"Type":"CommandComplete","CommandTag":"DELETE 3"} {"Type":"ReadyForQuery","TxStatus":"I"} {"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} -{"Type":"CommandComplete","CommandTag":"COPY 2"} +{"Type":"CommandComplete","CommandTag":"COPY 3"} {"Type":"ReadyForQuery","TxStatus":"I"} {"Type":"DataRow","Values":[{"text":"1"},{"text":"\""}]} -{"Type":"DataRow","Values":[{"text":"1"},{"text":"x\",x"}]} -{"Type":"CommandComplete","CommandTag":"SELECT 2"} +{"Type":"DataRow","Values":[{"text":"2"},{"text":"x\","}]} +{"Type":"DataRow","Values":[{"text":"3"},{"text":"x\",x"}]} +{"Type":"CommandComplete","CommandTag":"SELECT 3"} {"Type":"ReadyForQuery","TxStatus":"I"} send @@ -522,7 +524,7 @@ ReadyForQuery ReadyForQuery ReadyForQuery ---- -{"Type":"CommandComplete","CommandTag":"DELETE 2"} +{"Type":"CommandComplete","CommandTag":"DELETE 3"} {"Type":"ReadyForQuery","TxStatus":"I"} {"Type":"CopyInResponse","ColumnFormatCodes":[0,0]} {"Type":"CommandComplete","CommandTag":"COPY 3"}