-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
expression: add check for utf8 charset when decode to avoid invalid character #29765
Changes from all commits
52766df
8a8d4c6
98ed2dc
59b3fc4
9e96ec4
2d64254
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import ( | |
"reflect" | ||
"strings" | ||
"unicode" | ||
"unicode/utf8" | ||
"unsafe" | ||
|
||
"github.com/cznic/mathutil" | ||
|
@@ -140,8 +141,40 @@ func (e *Encoding) EncodeInternal(dest, src []byte) []byte { | |
return dest | ||
} | ||
|
||
// validUTF8 checks whether there are illegal utf8 characters in []byte. | ||
func (e *Encoding) validUTF8(src []byte) ([]byte, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implement this func by ourself is for compatible |
||
resultBytes := src | ||
for len(src) > 0 { | ||
r, size := utf8.DecodeRune(src) | ||
if r == utf8.RuneError && size == 1 { | ||
return resultBytes, e.generateErr(src, characterLengthUTF8(src)) | ||
} | ||
src = src[size:] | ||
} | ||
|
||
return resultBytes, nil | ||
} | ||
|
||
// validUTF8String checks whether there are illegal utf8 characters in string. | ||
func (e *Encoding) validUTF8String(src string) (string, error) { | ||
resultStr := src | ||
for len(src) > 0 { | ||
r, size := utf8.DecodeRuneInString(src) | ||
if r == utf8.RuneError && size == 1 { | ||
srcBytes := []byte(src) | ||
return resultStr, e.generateErr(srcBytes, characterLengthUTF8(srcBytes)) | ||
} | ||
src = src[size:] | ||
} | ||
|
||
return resultStr, nil | ||
} | ||
|
||
// Decode convert bytes from a specific charset to utf-8 charset. | ||
func (e *Encoding) Decode(dest, src []byte) ([]byte, error) { | ||
if e.name == encodings[CharsetUTF8].name { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tangenta PTAL, Do we need to check the utf8 here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason is #29685 |
||
return e.validUTF8(src) | ||
} | ||
if !e.enabled() { | ||
return src, nil | ||
} | ||
|
@@ -150,6 +183,9 @@ func (e *Encoding) Decode(dest, src []byte) ([]byte, error) { | |
|
||
// DecodeString convert a string from a specific charset to utf-8 charset. | ||
func (e *Encoding) DecodeString(src string) (string, error) { | ||
if e.name == encodings[CharsetUTF8].name { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WIll an extra check cause a performance regression? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DecodeString will called by the lexer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tangenta @Defined2014 Do you still want to go on with this issue, or leave it with #30288? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I will close it. |
||
return e.validUTF8String(src) | ||
} | ||
if !e.enabled() { | ||
return src, nil | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it ok to return err directly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's fine. But not sure. PTAL @xiongjiwei