Skip to content

Commit

Permalink
Merge #86929
Browse files Browse the repository at this point in the history
86929: sql: also handle ESCAPE characters properly in COPY machine r=otan a=otan

Release justification: critical bug fix to existing functionality

Release note (bug fix): Previously, escaping a double quote (`"`) with
`COPY` in `CSV` mode could ignore all subsequent lines in the same COPY
if an `ESCAPE` clause were specified. This is now resolved.

Co-authored-by: Oliver Tan <[email protected]>
  • Loading branch information
craig[bot] and otan committed Aug 27, 2022
2 parents 4e84ce7 + 5b491f0 commit 770ff3c
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 14 deletions.
45 changes: 36 additions & 9 deletions pkg/sql/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,15 +514,42 @@ func (c *copyMachine) readCSVData(ctx context.Context, final bool) (brk bool, er
return false, err
}
}
// At this point, we know fullLine ends in '\n'. Keep track of the total
// number of QUOTE chars in fullLine -- if it is even, then it means that
// the quotes are balanced and '\n' is not in a quoted field.
// Currently, the QUOTE char and ESCAPE char are both always equal to '"'
// and are not configurable. As per the COPY spec, any appearance of the
// QUOTE or ESCAPE characters in an actual value must be preceded by an
// ESCAPE character. This means that an escaped '"' also results in an even
// number of '"' characters.
quoteCharsSeen += bytes.Count(line, []byte{'"'})

// Now we need to calculate if we are have reached the end of the quote.
// If so, break out.
if c.csvEscape == 0 {
// CSV escape is not specified and hence defaults to '"'.¥
// At this point, we know fullLine ends in '\n'. Keep track of the total
// number of QUOTE chars in fullLine -- if it is even, then it means that
// the quotes are balanced and '\n' is not in a quoted field.
// Currently, the QUOTE char and ESCAPE char are both always equal to '"'
// and are not configurable. As per the COPY spec, any appearance of the
// QUOTE or ESCAPE characters in an actual value must be preceded by an
// ESCAPE character. This means that an escaped '"' also results in an even
// number of '"' characters.
// This branch is kept in the interests of "backporting safely" - this
// was the old code. Users who use COPY ... ESCAPE will be the only
// ones hitting the new code below.
quoteCharsSeen += bytes.Count(line, []byte{'"'})
} else {
// Otherwise, we have to do a manual count of double quotes and
// ignore any escape characters preceding quotes for counting.
// For example, if the escape character is '\', we should ignore
// the intermediate quotes in a string such as `"start"\"\"end"`.
skipNextChar := false
for _, ch := range line {
if skipNextChar {
skipNextChar = false
continue
}
if ch == '"' {
quoteCharsSeen++
}
if rune(ch) == c.csvEscape {
skipNextChar = true
}
}
}
if quoteCharsSeen%2 == 0 {
break
}
Expand Down
12 changes: 7 additions & 5 deletions pkg/sql/pgwire/testdata/pgtest/copy
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@ send
Query {"String": "DELETE FROM t"}
Query {"String": "COPY t FROM STDIN CSV ESCAPE 'x'"}
CopyData {"Data": "1,\"x\"\"\n"}
CopyData {"Data": "1,\"xxx\",xx\"\n"}
CopyData {"Data": "2,\"xxx\",\"\n"}
CopyData {"Data": "3,\"xxx\",xx\"\n"}
CopyData {"Data": "\\.\n"}
CopyDone
Query {"String": "SELECT * FROM t ORDER BY i"}
Expand All @@ -487,11 +488,12 @@ ReadyForQuery
{"Type":"CommandComplete","CommandTag":"DELETE 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 2"}
{"Type":"CommandComplete","CommandTag":"COPY 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"DataRow","Values":[{"text":"1"},{"text":"\""}]}
{"Type":"DataRow","Values":[{"text":"1"},{"text":"x\",x"}]}
{"Type":"CommandComplete","CommandTag":"SELECT 2"}
{"Type":"DataRow","Values":[{"text":"2"},{"text":"x\","}]}
{"Type":"DataRow","Values":[{"text":"3"},{"text":"x\",x"}]}
{"Type":"CommandComplete","CommandTag":"SELECT 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}

send
Expand Down Expand Up @@ -522,7 +524,7 @@ ReadyForQuery
ReadyForQuery
ReadyForQuery
----
{"Type":"CommandComplete","CommandTag":"DELETE 2"}
{"Type":"CommandComplete","CommandTag":"DELETE 3"}
{"Type":"ReadyForQuery","TxStatus":"I"}
{"Type":"CopyInResponse","ColumnFormatCodes":[0,0]}
{"Type":"CommandComplete","CommandTag":"COPY 3"}
Expand Down

0 comments on commit 770ff3c

Please sign in to comment.