diff --git a/src/GraphQLParser.Tests/Validation/LexerValidationTests.cs b/src/GraphQLParser.Tests/Validation/LexerValidationTests.cs index 98ea11a2..1c684524 100644 --- a/src/GraphQLParser.Tests/Validation/LexerValidationTests.cs +++ b/src/GraphQLParser.Tests/Validation/LexerValidationTests.cs @@ -370,10 +370,10 @@ public void Lex_UnescapedControlChar_Blockstring_ThrowsExceptionWithCorrectMessa var exception = Should.Throw(() => "\"\"\"contains unescaped \u0007 control char".Lex()); exception.Message.ShouldBe( - "Syntax Error GraphQL (1:23) Invalid character within BlockString: \\u0007.\n" + + "Syntax Error GraphQL (1:23) Invalid character within block string: \\u0007.\n" + "1: \"\"\"contains unescaped \\u0007 control char\n" + " ^\n"); - exception.Description.ShouldBe("Invalid character within BlockString: \\u0007."); + exception.Description.ShouldBe("Invalid character within block string: \\u0007."); exception.Line.ShouldBe(1); exception.Column.ShouldBe(23); } @@ -412,10 +412,10 @@ public void Lex_UnterminatedBlockString_ThrowsExceptionWithCorrectMessage() var exception = Should.Throw(() => "\"\"\"".Lex()); exception.Message.ShouldBe( - "Syntax Error GraphQL (1:4) Unterminated string.\n" + + "Syntax Error GraphQL (1:4) Unterminated block string.\n" + "1: \"\"\"\n" + " ^\n"); - exception.Description.ShouldBe("Unterminated string."); + exception.Description.ShouldBe("Unterminated block string."); exception.Line.ShouldBe(1); exception.Column.ShouldBe(4); } @@ -426,10 +426,10 @@ public void Lex_UnterminatedBlockStringWithText_ThrowsExceptionWithCorrectMessag var exception = Should.Throw(() => "\"\"\"no end triple-quote\"\"".Lex()); exception.Message.ShouldBe( - "Syntax Error GraphQL (1:25) Unterminated string.\n" + + "Syntax Error GraphQL (1:25) Unterminated block string.\n" + "1: \"\"\"no end triple-quote\"\"\n" + " ^\n"); - exception.Description.ShouldBe("Unterminated string."); + exception.Description.ShouldBe("Unterminated block string."); exception.Line.ShouldBe(1); exception.Column.ShouldBe(25); } diff --git a/src/GraphQLParser/LexerContext.cs b/src/GraphQLParser/LexerContext.cs index 7e42a343..9b2b6640 100644 --- a/src/GraphQLParser/LexerContext.cs +++ b/src/GraphQLParser/LexerContext.cs @@ -48,14 +48,9 @@ public Token GetToken() if (code == '"') { - if (_currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"') - { - return ReadBlockString(); - } - else - { - return ReadString(); - } + return _currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"' + ? ReadBlockString() + : ReadString(); } return Throw_From_GetToken2(code); @@ -133,7 +128,11 @@ private Token ReadComment() int start = _currentIndex; char code = NextCode(); - Span buffer = stackalloc char[4096]; + // The buffer on the stack allows to get rid of intermediate heap allocations if the string + // 1) not too long + // or + // 2) does not contain escape sequences. + Span buffer = stackalloc char[Math.Min(_source.Length - _currentIndex + 32, 4096)]; StringBuilder? sb = null; int index = 0; @@ -149,8 +148,7 @@ private Token ReadComment() } catch (IndexOutOfRangeException) // fallback to StringBuilder in case of buffer overflow { - if (sb == null) - sb = new StringBuilder(buffer.Length * 2); + sb ??= new StringBuilder(buffer.Length * 2); for (int i = 0; i < buffer.Length; ++i) sb.Append(buffer[i]); @@ -181,16 +179,25 @@ private Token ReadComment() ); } + // TODO: this method can still be optimized no not allocate at all if block string: + // + // 1) not too long + // 2) has no escape sequences + // 3) has no '\r' characters + // 4) has no initial whitespace on each line, ignoring the first line (or, has no '\n' characters) + // + // In this case, ROM for the returned token represents unmodified part of the source ROM, + // so it can be just sliced from '_source' as you can see in more simple ReadString method. private Token ReadBlockString() { - int start = _currentIndex += 2; + int start = _currentIndex += 2; // skip "" char code = NextCode(); - Span buffer = stackalloc char[4096]; + Span buffer = stackalloc char[Math.Min(_source.Length - _currentIndex + 32, 4096)]; StringBuilder? sb = null; int index = 0; - bool escape = false; //when the last character was \ + bool escape = false; // when the last character was \ bool lastWasCr = false; while (_currentIndex < _source.Length) @@ -200,30 +207,30 @@ private Token ReadBlockString() Throw_From_ReadBlockString1(code); } - //check for """ + // check for """ if (code == '"' && _currentIndex + 2 < _source.Length && _source.Span[_currentIndex + 1] == '"' && _source.Span[_currentIndex + 2] == '"') { - //if last character was \ then go ahead and write out the """, skipping the \ + // if last character was \ then go ahead and write out the """, skipping the \ if (escape) { escape = false; } else { - //end of blockstring + // end of block string break; } } else if (escape) { - //last character was \ so write the \ and then retry this character with escaped = false + // last character was \ so write the \ and then retry this character with escaped = false code = '\\'; _currentIndex--; escape = false; } else if (code == '\\') { - //this character is a \ so don't write anything yet, but check the next character + // this character is a \ so don't write anything yet, but check the next character escape = true; code = NextCode(); lastWasCr = false; @@ -237,15 +244,14 @@ private Token ReadBlockString() if (!(lastWasCr && code == '\n')) { - //write code + // write code if (index < buffer.Length) { buffer[index++] = code == '\r' ? '\n' : code; } else // fallback to StringBuilder in case of buffer overflow { - if (sb == null) - sb = new StringBuilder(buffer.Length * 2); + sb ??= new StringBuilder(buffer.Length * 2); for (int i = 0; i < buffer.Length; ++i) sb.Append(buffer[i]); @@ -262,9 +268,9 @@ private Token ReadBlockString() if (_currentIndex >= _source.Length) { - Throw_From_ReadString2(); + Throw_From_ReadBlockString2(); } - _currentIndex += 2; + _currentIndex += 2; // skip "" if (sb != null) { @@ -272,8 +278,8 @@ private Token ReadBlockString() sb.Append(buffer[i]); } - //at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string - //also, all line termination combinations have been replaced with LF + // at this point, if sb != null, then sb has the whole string, otherwise buffer (of length index) has the whole string + // also, all line termination combinations have been replaced with LF ROM value; if (sb != null) @@ -297,11 +303,11 @@ private Token ReadBlockString() static ROM ProcessBuffer(Span buffer) { - //scan string to determine maximum valid commonIndent value, - //number of initial blank lines, and number of trailing blank lines + // scan string to determine maximum valid commonIndent value, + // number of initial blank lines, and number of trailing blank lines int commonIndent = int.MaxValue; int initialBlankLines = 1; - int skipLinesAfter; //skip all text after line ###, as determined by the number of trailing blank lines + int skipLinesAfter; // skip all text after line ###, as determined by the number of trailing blank lines { int trailingBlankLines = 0; int line = 0; @@ -347,8 +353,8 @@ static ROM ProcessBuffer(Span buffer) skipLinesAfter = lines - trailingBlankLines; } - //step through the input, skipping the initial blank lines and the trailing blank lines, - //and skipping the initial blank characters from the start of each line + // step through the input, skipping the initial blank lines and the trailing blank lines, + // and skipping the initial blank characters from the start of each line Span output = buffer.Length <= 4096 ? stackalloc char[buffer.Length] : new char[buffer.Length]; int outputIndex = 0; { @@ -373,7 +379,7 @@ static ROM ProcessBuffer(Span buffer) } } - //return the string value from the output buffer + // return the string value from the output buffer return output.Slice(0, outputIndex).ToString(); } } @@ -383,7 +389,7 @@ private Token ReadString() int start = _currentIndex; char code = NextCode(); - Span buffer = stackalloc char[4096]; + Span buffer = stackalloc char[Math.Min(_source.Length - _currentIndex + 32, 4096)]; StringBuilder? sb = null; int index = 0; @@ -404,8 +410,7 @@ private Token ReadString() } catch (IndexOutOfRangeException) // fallback to StringBuilder in case of buffer overflow { - if (sb == null) - sb = new StringBuilder(buffer.Length * 2); + sb ??= new StringBuilder(buffer.Length * 2); for (int i = 0; i < buffer.Length; ++i) sb.Append(buffer[i]); @@ -453,7 +458,12 @@ private void Throw_From_ReadString2() private void Throw_From_ReadBlockString1(char code) { - throw new GraphQLSyntaxErrorException($"Invalid character within BlockString: \\u{(int)code:D4}.", _source, _currentIndex); + throw new GraphQLSyntaxErrorException($"Invalid character within block string: \\u{(int)code:D4}.", _source, _currentIndex); + } + + private void Throw_From_ReadBlockString2() + { + throw new GraphQLSyntaxErrorException("Unterminated block string.", _source, _currentIndex); } // sets escaped only to true