diff --git a/gqlerrors/syntax.go b/gqlerrors/syntax.go index e3a3cc5a..abad6ade 100644 --- a/gqlerrors/syntax.go +++ b/gqlerrors/syntax.go @@ -3,7 +3,6 @@ package gqlerrors import ( "fmt" "regexp" - "strings" "github.com/graphql-go/graphql/language/ast" diff --git a/language/lexer/lexer.go b/language/lexer/lexer.go index 806f3546..865c9d6e 100644 --- a/language/lexer/lexer.go +++ b/language/lexer/lexer.go @@ -83,10 +83,6 @@ type Token struct { Value string } -func (t *Token) String() string { - return fmt.Sprintf("%s", tokenDescription[t.Kind]) -} - type Lexer func(resetPosition int) (Token, error) func Lex(s *source.Source) Lexer { @@ -106,24 +102,28 @@ func Lex(s *source.Source) Lexer { // Reads an alphanumeric + underscore name from the source. // [_A-Za-z][_0-9A-Za-z]* -func readName(source *source.Source, position int) Token { +// position: Points to the byte position in the byte array +// runePosition: Points to the rune position in the byte array +func readName(source *source.Source, position, runePosition int) Token { body := source.Body bodyLength := len(body) - end := position + 1 + endByte := position + 1 + endRune := runePosition + 1 for { - code, n := runeAt(body, end) - if (end != bodyLength) && + code, _ := runeAt(body, endByte) + if (endByte != bodyLength) && (code == '_' || // _ code >= '0' && code <= '9' || // 0-9 code >= 'A' && code <= 'Z' || // A-Z code >= 'a' && code <= 'z') { // a-z - end += n + endByte++ + endRune++ continue } else { break } } - return makeToken(TokenKind[NAME], position, end, string(body[position:end])) + return makeToken(TokenKind[NAME], runePosition, endRune, string(body[position:endByte])) } // Reads a number token from the source file, either a float @@ -212,6 +212,7 @@ func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (in func readString(s *source.Source, start int) (Token, error) { body := s.Body position := start + 1 + runePosition := start + 1 chunkStart := position var code rune var n int @@ -226,9 +227,10 @@ func readString(s *source.Source, start int) (Token, error) { // SourceCharacter if code < 0x0020 && code != 0x0009 { - return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code))) + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code))) } position += n + runePosition++ if code == '\\' { // \ valueBuffer.Write(body[chunkStart : position-1]) code, n = runeAt(body, position) @@ -260,9 +262,9 @@ func readString(s *source.Source, start int) (Token, error) { case 'u': // Check if there are at least 4 bytes available if len(body) <= position+4 { - return Token{}, gqlerrors.NewSyntaxError(s, position, + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf("Invalid character escape sequence: "+ - "\\u%v", body[position+1:])) + "\\u%v", string(body[position+1:]))) } charCode := uniCharCode( rune(body[position+1]), @@ -271,18 +273,20 @@ func readString(s *source.Source, start int) (Token, error) { rune(body[position+4]), ) if charCode < 0 { - return Token{}, gqlerrors.NewSyntaxError(s, position, + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf("Invalid character escape sequence: "+ - "\\u%v", body[position+1:position+5])) + "\\u%v", string(body[position+1:position+5]))) } valueBuffer.WriteRune(charCode) position += 4 + runePosition += 4 break default: - return Token{}, gqlerrors.NewSyntaxError(s, position, + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code)) } position += n + runePosition++ chunkStart = position } continue @@ -291,7 +295,7 @@ func readString(s *source.Source, start int) (Token, error) { } } if code != '"' { // quote (") - return Token{}, gqlerrors.NewSyntaxError(s, position, "Unterminated string.") + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.") } stringContent := body[chunkStart:position] valueBuffer.Write(stringContent) @@ -346,7 +350,7 @@ func printCharCode(code rune) string { func readToken(s *source.Source, fromPosition int) (Token, error) { body := s.Body bodyLength := len(body) - position := positionAfterWhitespace(body, fromPosition) + position, runePosition := positionAfterWhitespace(body, fromPosition) if position >= bodyLength { return makeToken(TokenKind[EOF], position, position, ""), nil } @@ -354,7 +358,7 @@ func readToken(s *source.Source, fromPosition int) (Token, error) { // SourceCharacter if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D { - return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character %v`, printCharCode(code))) + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code))) } switch code { @@ -405,12 +409,12 @@ func readToken(s *source.Source, fromPosition int) (Token, error) { // A-Z case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': - return readName(s, position), nil + return readName(s, position, runePosition), nil // _ // a-z case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z': - return readName(s, position), nil + return readName(s, position, runePosition), nil // - // 0-9 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': @@ -428,12 +432,14 @@ func readToken(s *source.Source, fromPosition int) (Token, error) { return token, nil } description := fmt.Sprintf("Unexpected character %v.", printCharCode(code)) - return Token{}, gqlerrors.NewSyntaxError(s, position, description) + return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description) } +// Gets the rune from the byte array at given byte position and it's width in bytes func runeAt(body []byte, position int) (code rune, charWidth int) { if len(body) <= position { - return 0, utf8.RuneError + // + return -1, utf8.RuneError } c := body[position] @@ -448,9 +454,11 @@ func runeAt(body []byte, position int) (code rune, charWidth int) { // Reads from body starting at startPosition until it finds a non-whitespace // or commented character, then returns the position of that character for lexing. // lexing. -func positionAfterWhitespace(body []byte, startPosition int) int { +// Returns both byte positions and rune position +func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) { bodyLength := len(body) - position := startPosition + position = startPosition + runePosition = startPosition for { if position < bodyLength { code, n := runeAt(body, position) @@ -466,8 +474,10 @@ func positionAfterWhitespace(body []byte, startPosition int) int { // Comma code == 0x002C { position += n + runePosition++ } else if code == 35 { // # position += n + runePosition++ for { code, n := runeAt(body, position) if position < bodyLength && @@ -475,6 +485,7 @@ func positionAfterWhitespace(body []byte, startPosition int) int { // SourceCharacter but not LineTerminator (code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D { position += n + runePosition++ continue } else { break @@ -488,7 +499,7 @@ func positionAfterWhitespace(body []byte, startPosition int) int { break } } - return position + return position, runePosition } func GetTokenDesc(token Token) string { diff --git a/language/lexer/lexer_test.go b/language/lexer/lexer_test.go index f18a8b66..ac59c846 100644 --- a/language/lexer/lexer_test.go +++ b/language/lexer/lexer_test.go @@ -13,10 +13,57 @@ type Test struct { } func createSource(body string) *source.Source { - return source.NewSource(&source.Source{Body: body}) + return source.NewSource(&source.Source{Body: []byte(body)}) } -func TestDisallowsUncommonControlCharacters(t *testing.T) { +func TestLexer_GetTokenDesc(t *testing.T) { + expected := `Name "foo"` + tokenDescription := GetTokenDesc(Token{ + Kind: TokenKind[NAME], + Start: 2, + End: 5, + Value: "foo", + }) + if expected != tokenDescription { + t.Errorf("Expected %v, got %v", expected, tokenDescription) + } + + expected = `Name` + tokenDescription = GetTokenDesc(Token{ + Kind: TokenKind[NAME], + Start: 0, + End: 0, + Value: "", + }) + if expected != tokenDescription { + t.Errorf("Expected %v, got %v", expected, tokenDescription) + } + + expected = `String "foo"` + tokenDescription = GetTokenDesc(Token{ + Kind: TokenKind[STRING], + Start: 2, + End: 5, + Value: "foo", + }) + if expected != tokenDescription { + t.Errorf("Expected %v, got %v", expected, tokenDescription) + } + + expected = `String` + tokenDescription = GetTokenDesc(Token{ + Kind: TokenKind[STRING], + Start: 0, + End: 0, + Value: "", + }) + if expected != tokenDescription { + t.Errorf("Expected %v, got %v", expected, tokenDescription) + } + +} + +func TestLexer_DisallowsUncommonControlCharacters(t *testing.T) { tests := []Test{ { Body: "\u0007", @@ -30,15 +77,15 @@ func TestDisallowsUncommonControlCharacters(t *testing.T) { for _, test := range tests { _, err := Lex(createSource(test.Body))(0) if err == nil { - t.Fatalf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) + t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) } if err.Error() != test.Expected { - t.Fatalf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) + t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) } } } -func TestAcceptsBOMHeader(t *testing.T) { +func TestLexer_AcceptsBOMHeader(t *testing.T) { tests := []Test{ { Body: "\uFEFF foo", @@ -51,17 +98,17 @@ func TestAcceptsBOMHeader(t *testing.T) { }, } for _, test := range tests { - token, err := Lex(&source.Source{Body: test.Body})(0) + token, err := Lex(&source.Source{Body: []byte(test.Body)})(0) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Errorf("unexpected error: %v", err) } if !reflect.DeepEqual(token, test.Expected) { - t.Fatalf("unexpected token, expected: %v, got: %v", test.Expected, token) + t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token) } } } -func TestSkipsWhiteSpace(t *testing.T) { +func TestLexer_SkipsWhiteSpace(t *testing.T) { tests := []Test{ { Body: ` @@ -97,19 +144,28 @@ func TestSkipsWhiteSpace(t *testing.T) { Value: "foo", }, }, + { + Body: ``, + Expected: Token{ + Kind: TokenKind[EOF], + Start: 0, + End: 0, + Value: "", + }, + }, } for _, test := range tests { - token, err := Lex(&source.Source{Body: test.Body})(0) + token, err := Lex(&source.Source{Body: []byte(test.Body)})(0) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Errorf("unexpected error: %v", err) } if !reflect.DeepEqual(token, test.Expected) { - t.Fatalf("unexpected token, expected: %v, got: %v, body: %s", test.Expected, token, test.Body) + t.Errorf("unexpected token, expected: %v, got: %v, body: %s", test.Expected, token, test.Body) } } } -func TestErrorsRespectWhitespace(t *testing.T) { +func TestLexer_ErrorsRespectWhitespace(t *testing.T) { body := ` ? @@ -125,7 +181,39 @@ func TestErrorsRespectWhitespace(t *testing.T) { } } -func TestLexesStrings(t *testing.T) { +func TestLexer_LexesNames(t *testing.T) { + tests := []Test{ + { + Body: "simple", + Expected: Token{ + Kind: TokenKind[NAME], + Start: 0, + End: 6, + Value: "simple", + }, + }, + { + Body: "Capital", + Expected: Token{ + Kind: TokenKind[NAME], + Start: 0, + End: 7, + Value: "Capital", + }, + }, + } + for _, test := range tests { + token, err := Lex(&source.Source{Body: []byte(test.Body)})(0) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !reflect.DeepEqual(token, test.Expected) { + t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token) + } + } +} + +func TestLexer_LexesStrings(t *testing.T) { tests := []Test{ { Body: "\"simple\"", @@ -169,7 +257,7 @@ func TestLexesStrings(t *testing.T) { Kind: TokenKind[STRING], Start: 0, End: 15, - Value: "slashes \\ \\/", + Value: "slashes \\ /", }, }, { @@ -181,19 +269,46 @@ func TestLexesStrings(t *testing.T) { Value: "unicode \u1234\u5678\u90AB\uCDEF", }, }, + { + Body: "\"unicode фы世界\"", + Expected: Token{ + Kind: TokenKind[STRING], + Start: 0, + End: 20, + Value: "unicode фы世界", + }, + }, + { + Body: "\"фы世界\"", + Expected: Token{ + Kind: TokenKind[STRING], + Start: 0, + End: 12, + Value: "фы世界", + }, + }, + { + Body: "\"Has a фы世界 multi-byte character.\"", + Expected: Token{ + Kind: TokenKind[STRING], + Start: 0, + End: 40, + Value: "Has a фы世界 multi-byte character.", + }, + }, } for _, test := range tests { - token, err := Lex(&source.Source{Body: test.Body})(0) + token, err := Lex(&source.Source{Body: []byte(test.Body)})(0) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Errorf("unexpected error: %v", err) } if !reflect.DeepEqual(token, test.Expected) { - t.Fatalf("unexpected token, expected: %v, got: %v", test.Expected, token) + t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token) } } } -func TestLexReportsUsefulStringErrors(t *testing.T) { +func TestLexer_ReportsUsefulStringErrors(t *testing.T) { tests := []Test{ { Body: "\"", @@ -299,21 +414,40 @@ func TestLexReportsUsefulStringErrors(t *testing.T) { 1: "bad \uXXXF esc" ^ +`, + }, + { + Body: "\"bad \\u123", + Expected: `Syntax Error GraphQL (1:7) Invalid character escape sequence: \u123 + +1: "bad \u123 + ^ +`, + }, + { + // some unicode chars take more than one column of text + // current implementation does not handle this + Body: "\"bфы世ыы𠱸d \\uXXXF esc\"", + Expected: `Syntax Error GraphQL (1:12) Invalid character escape sequence: \uXXXF + +1: "bфы世ыы𠱸d \uXXXF esc" + ^ `, }, } for _, test := range tests { _, err := Lex(createSource(test.Body))(0) if err == nil { - t.Fatalf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) + t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) } + if err.Error() != test.Expected { - t.Fatalf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) + t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) } } } -func TestLexesNumbers(t *testing.T) { +func TestLexer_LexesNumbers(t *testing.T) { tests := []Test{ { Body: "4", @@ -463,15 +597,15 @@ func TestLexesNumbers(t *testing.T) { for _, test := range tests { token, err := Lex(createSource(test.Body))(0) if err != nil { - t.Fatalf("unexpected error: %v, test: %s", err, test) + t.Errorf("unexpected error: %v, test: %s", err, test) } if !reflect.DeepEqual(token, test.Expected) { - t.Fatalf("unexpected token, expected: %v, got: %v, test: %v", test.Expected, token, test) + t.Errorf("unexpected token, expected: %v, got: %v, test: %v", test.Expected, token, test) } } } -func TestLexReportsUsefulNumbeErrors(t *testing.T) { +func TestLexer_ReportsUsefulNumberErrors(t *testing.T) { tests := []Test{ { Body: "00", @@ -542,15 +676,15 @@ func TestLexReportsUsefulNumbeErrors(t *testing.T) { for _, test := range tests { _, err := Lex(createSource(test.Body))(0) if err == nil { - t.Fatalf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) + t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) } if err.Error() != test.Expected { - t.Fatalf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) + t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) } } } -func TestLexesPunctuation(t *testing.T) { +func TestLexer_LexesPunctuation(t *testing.T) { tests := []Test{ { Body: "!", @@ -673,15 +807,15 @@ func TestLexesPunctuation(t *testing.T) { for _, test := range tests { token, err := Lex(createSource(test.Body))(0) if err != nil { - t.Fatalf("unexpected error :%v, test: %v", err, test) + t.Errorf("unexpected error :%v, test: %v", err, test) } if !reflect.DeepEqual(token, test.Expected) { - t.Fatalf("unexpected token, expected: %v, got: %v, test: %v", test.Expected, token, test) + t.Errorf("unexpected token, expected: %v, got: %v, test: %v", test.Expected, token, test) } } } -func TestLexReportsUsefulUnknownCharacterError(t *testing.T) { +func TestLexer_ReportsUsefulUnknownCharacterError(t *testing.T) { tests := []Test{ { Body: "..", @@ -713,21 +847,29 @@ func TestLexReportsUsefulUnknownCharacterError(t *testing.T) { 1: ※ ^ +`, + }, + { + Body: "ф", + Expected: `Syntax Error GraphQL (1:1) Unexpected character "\\u0444". + +1: ф + ^ `, }, } for _, test := range tests { _, err := Lex(createSource(test.Body))(0) if err == nil { - t.Fatalf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) + t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err) } if err.Error() != test.Expected { - t.Fatalf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) + t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error()) } } } -func TestLexRerportsUsefulInformationForDashesInNames(t *testing.T) { +func TestLexer_ReportsUsefulInformationForDashesInNames(t *testing.T) { q := "a-b" lexer := Lex(createSource(q)) firstToken, err := lexer(0) diff --git a/language/parser/parser_test.go b/language/parser/parser_test.go index f8697310..e2198f22 100644 --- a/language/parser/parser_test.go +++ b/language/parser/parser_test.go @@ -17,7 +17,7 @@ import ( func TestBadToken(t *testing.T) { _, err := Parse(ParseParams{ Source: &source.Source{ - Body: "query _ {\n me {\n id`\n }\n}", + Body: []byte("query _ {\n me {\n id`\n }\n}"), Name: "GraphQL", }, }) @@ -137,7 +137,10 @@ fragment MissingOn Type func TestParseProvidesUsefulErrorsWhenUsingSource(t *testing.T) { test := errorMessageTest{ - source.NewSource(&source.Source{Body: "query", Name: "MyQuery.graphql"}), + source.NewSource(&source.Source{ + Body: []byte("query"), + Name: "MyQuery.graphql", + }), `Syntax Error MyQuery.graphql (1:6) Expected {, found EOF`, false, } @@ -189,7 +192,7 @@ func TestDoesNotAllowNullAsValue(t *testing.T) { testErrorMessage(t, test) } -func TestParsesMultiByteCharacters(t *testing.T) { +func TestParsesMultiByteCharacters_Unicode(t *testing.T) { doc := ` # This comment has a \u0A0A multi-byte character. @@ -266,6 +269,83 @@ func TestParsesMultiByteCharacters(t *testing.T) { } } +func TestParsesMultiByteCharacters_UnicodeText(t *testing.T) { + + doc := ` + # This comment has a фы世界 multi-byte character. + { field(arg: "Has a фы世界 multi-byte character.") } + ` + astDoc := parse(t, doc) + + expectedASTDoc := ast.NewDocument(&ast.Document{ + Loc: ast.NewLocation(&ast.Location{ + Start: 67, + End: 121, + }), + Definitions: []ast.Node{ + ast.NewOperationDefinition(&ast.OperationDefinition{ + Loc: ast.NewLocation(&ast.Location{ + Start: 67, + End: 119, + }), + Operation: "query", + SelectionSet: ast.NewSelectionSet(&ast.SelectionSet{ + Loc: ast.NewLocation(&ast.Location{ + Start: 67, + End: 119, + }), + Selections: []ast.Selection{ + ast.NewField(&ast.Field{ + Loc: ast.NewLocation(&ast.Location{ + Start: 67, + End: 117, + }), + Name: ast.NewName(&ast.Name{ + Loc: ast.NewLocation(&ast.Location{ + Start: 69, + End: 74, + }), + Value: "field", + }), + Arguments: []*ast.Argument{ + ast.NewArgument(&ast.Argument{ + Loc: ast.NewLocation(&ast.Location{ + Start: 75, + End: 116, + }), + Name: ast.NewName(&ast.Name{ + + Loc: ast.NewLocation(&ast.Location{ + Start: 75, + End: 78, + }), + Value: "arg", + }), + Value: ast.NewStringValue(&ast.StringValue{ + + Loc: ast.NewLocation(&ast.Location{ + Start: 80, + End: 116, + }), + Value: "Has a фы世界 multi-byte character.", + }), + }), + }, + }), + }, + }), + }), + }, + }) + + astDocQuery := printer.Print(astDoc) + expectedASTDocQuery := printer.Print(expectedASTDoc) + + if !reflect.DeepEqual(astDocQuery, expectedASTDocQuery) { + t.Fatalf("unexpected document, expected: %v, got: %v", astDocQuery, expectedASTDocQuery) + } +} + func TestParsesKitchenSink(t *testing.T) { b, err := ioutil.ReadFile("../../kitchen-sink.graphql") if err != nil { @@ -309,18 +389,17 @@ func TestAllowsNonKeywordsAnywhereNameIsAllowed(t *testing.T) { } } -// -//func TestParsesExperimentalSubscriptionFeature(t *testing.T) { -// source := ` -// subscription Foo { -// subscriptionField -// } -// ` -// _, err := Parse(ParseParams{Source: source}) -// if err != nil { -// t.Fatalf("unexpected error: %v", err) -// } -//} +func TestParsesExperimentalSubscriptionFeature(t *testing.T) { + source := ` + subscription Foo { + subscriptionField + } + ` + _, err := Parse(ParseParams{Source: source}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} func TestParsesAnonymousMutationOperations(t *testing.T) { source := ` @@ -378,7 +457,9 @@ func TestParseCreatesAst(t *testing.T) { } } ` - source := source.NewSource(&source.Source{Body: body}) + source := source.NewSource(&source.Source{ + Body: []byte(body), + }) document, err := Parse( ParseParams{ Source: source, diff --git a/language/parser/schema_parser_test.go b/language/parser/schema_parser_test.go index ce6e552c..510c7d26 100644 --- a/language/parser/schema_parser_test.go +++ b/language/parser/schema_parser_test.go @@ -739,10 +739,10 @@ input Hello { `, Nodes: []ast.Node{}, Source: &source.Source{ - Body: ` + Body: []byte(` input Hello { world(foo: Int): String -}`, +}`), Name: "GraphQL", }, Positions: []int{22},