From b5bdfd2e5a4896d8ba615ed793c8cdbbb29f8d72 Mon Sep 17 00:00:00 2001 From: GuptaManan100 Date: Tue, 2 Mar 2021 14:32:06 +0530 Subject: [PATCH] added peek functionality and used it to correct the comment handling Signed-off-by: GuptaManan100 --- go/vt/sqlparser/parse_next_test.go | 2 +- go/vt/sqlparser/parse_test.go | 5 +++- go/vt/sqlparser/token.go | 37 +++++++++++------------------- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/go/vt/sqlparser/parse_next_test.go b/go/vt/sqlparser/parse_next_test.go index 9f4e9c486d9..493afa4a698 100644 --- a/go/vt/sqlparser/parse_next_test.go +++ b/go/vt/sqlparser/parse_next_test.go @@ -32,7 +32,7 @@ func TestParseNextValid(t *testing.T) { sql.WriteRune(';') } - tokens := NewTokenizer(&sql) + tokens := NewStringTokenizer(sql.String()) for i, tcase := range validSQL { input := tcase.input + ";" want := tcase.output diff --git a/go/vt/sqlparser/parse_test.go b/go/vt/sqlparser/parse_test.go index 1266bad3a92..f4028547062 100644 --- a/go/vt/sqlparser/parse_test.go +++ b/go/vt/sqlparser/parse_test.go @@ -77,7 +77,7 @@ var ( input: "select 1 from t # aa\n", output: "select 1 from t", }, { - input: "select 1 --aa\nfrom t", + input: "select 1 -- aa\nfrom t", output: "select 1 from t", }, { input: "select 1 #aa\nfrom t", @@ -840,6 +840,9 @@ var ( }, { input: "set character set 'utf8'", output: "set charset 'utf8'", + }, { + input: "set s = 1--4", + output: "set s = 1 - -4", }, { input: "set character set \"utf8\"", output: "set charset 'utf8'", diff --git a/go/vt/sqlparser/token.go b/go/vt/sqlparser/token.go index 46382591429..42ba28a2623 100644 --- a/go/vt/sqlparser/token.go +++ b/go/vt/sqlparser/token.go @@ -19,7 +19,6 @@ package sqlparser import ( "bytes" "fmt" - "io" "strings" "vitess.io/vitess/go/bytes2" @@ -27,14 +26,12 @@ import ( ) const ( - defaultBufSize = 4096 - eofChar = 0x100 + eofChar = 0x100 ) // Tokenizer is the struct used to generate SQL // tokens for the parser. type Tokenizer struct { - InStream io.Reader AllowComments bool SkipSpecialComments bool SkipToEnd bool @@ -64,15 +61,6 @@ func NewStringTokenizer(sql string) *Tokenizer { } } -// NewTokenizer creates a new Tokenizer reading a sql -// string from the io.Reader. -func NewTokenizer(r io.Reader) *Tokenizer { - return &Tokenizer{ - InStream: r, - buf: make([]byte, defaultBufSize), - } -} - // keywords is a map of mysql keywords that fall into two categories: // 1) keywords considered reserved by MySQL // 2) keywords for us to handle specially in sql.y @@ -691,8 +679,11 @@ func (tkn *Tokenizer) Scan() (int, []byte) { case '-': switch tkn.lastChar { case '-': - tkn.next() - return tkn.scanCommentType1("--") + nextChar := tkn.peek(0) + if nextChar == ' ' || nextChar == '\n' || nextChar == '\t' || nextChar == '\r' || nextChar == eofChar { + tkn.next() + return tkn.scanCommentType1("--") + } case '>': tkn.next() if tkn.lastChar == '>' { @@ -1052,15 +1043,6 @@ func (tkn *Tokenizer) consumeNext(buffer *bytes2.Buffer) { } func (tkn *Tokenizer) next() { - if tkn.bufPos >= tkn.bufSize && tkn.InStream != nil { - // Try and refill the buffer - var err error - tkn.bufPos = 0 - if tkn.bufSize, err = tkn.InStream.Read(tkn.buf); err != io.EOF && err != nil { - tkn.LastError = err - } - } - if tkn.bufPos >= tkn.bufSize { if tkn.lastChar != eofChar { tkn.Position++ @@ -1073,6 +1055,13 @@ func (tkn *Tokenizer) next() { } } +func (tkn *Tokenizer) peek(dist int) uint16 { + if tkn.bufPos+dist >= tkn.bufSize { + return eofChar + } + return uint16(tkn.buf[tkn.bufPos+dist]) +} + // reset clears any internal state. func (tkn *Tokenizer) reset() { tkn.ParseTree = nil