From 1e8a1899e5dad43a52278d95db9a11f3e6c7e4f7 Mon Sep 17 00:00:00 2001 From: Atif Aziz Date: Thu, 28 Mar 2019 08:07:44 +0100 Subject: [PATCH] Stricter pre-processor directive recognition (#2) Make pre-processor directive recognition stricter Recognise a pre-processor directive either at start of a line or when preceded only by horizontal whitespace. --- src/CSharpMinifier/Scanner.cs | 60 +++++++++++++++++++++++++---------- tests/ScannerTests.cs | 28 ++++++++++++++++ 2 files changed, 71 insertions(+), 17 deletions(-) diff --git a/src/CSharpMinifier/Scanner.cs b/src/CSharpMinifier/Scanner.cs index 9c090f1..548e4c8 100644 --- a/src/CSharpMinifier/Scanner.cs +++ b/src/CSharpMinifier/Scanner.cs @@ -29,6 +29,8 @@ public static IEnumerable Scan(string source) enum State { + NewLine, + LeadingWhiteSpace, Text, WhiteSpace, Cr, @@ -61,14 +63,13 @@ enum State static IEnumerable ScanImpl(string source) { - var state = State.Text; + var state = State.NewLine; var si = 0; var pos = (Line: 1, Col: 0); var spos = (Line: 1, Col: 1); var ppdtwssi = -1; var ppdtwscol = 0; int i; - var lastTokenKind = (TokenKind?)null; var interpolated = new Stack<(bool Verbatim, int Parens)>(); bool Interpolated() => interpolated.Count > 0; @@ -87,12 +88,9 @@ T TransitReturn(State newState, int offset, T token) return token; } - Token CreateToken(TokenKind kind, int offset = 0) - { - lastTokenKind = kind; - return new Token(kind, new Position(si, spos.Line, spos.Col), - new Position(i + offset, pos.Line, pos.Col + offset)); - } + Token CreateToken(TokenKind kind, int offset = 0) => + new Token(kind, new Position(si, spos.Line, spos.Col), + new Position(i + offset, pos.Line, pos.Col + offset)); Token? TextTransit(State newState, int offset = 0) => TransitReturn(newState, offset, @@ -132,6 +130,39 @@ Exception SyntaxError(string message) => restart: switch (state) { + case State.NewLine: + { + switch (ch) + { + case ' ': + case '\t': + state = State.LeadingWhiteSpace; + break; + case '#': + state = State.PreprocessorDirective; + break; + default: + state = State.Text; + goto restart; + } + break; + } + case State.LeadingWhiteSpace: + { + switch (ch) + { + case ' ': + case '\t': + break; + case '#': + yield return Transit(TokenKind.WhiteSpace, State.PreprocessorDirective); + break; + default: + yield return Transit(TokenKind.WhiteSpace, State.Text); + goto restart; + } + break; + } case State.Text: { switch (ch) @@ -175,11 +206,6 @@ Exception SyntaxError(string message) => throw SyntaxError("Parentheses mismatch in interpolated string expression."); break; } - case '#' when lastTokenKind is null // BOF - || lastTokenKind is TokenKind k - && (k == TokenKind.WhiteSpace || k == TokenKind.NewLine): - state = State.PreprocessorDirective; - goto restart; case ' ': case '\t': { @@ -198,7 +224,7 @@ Exception SyntaxError(string message) => if (TextTransit(State.Text) is Token text) yield return text; pos = (pos.Line + 1, 0); - yield return Transit(TokenKind.NewLine, State.Text, 1); + yield return Transit(TokenKind.NewLine, State.NewLine, 1); break; } } @@ -227,11 +253,11 @@ Exception SyntaxError(string message) => break; case '\n': pos = (pos.Line + 1, 0); - yield return Transit(TokenKind.NewLine, State.Text, 1); + yield return Transit(TokenKind.NewLine, State.NewLine, 1); break; default: pos = (pos.Line + 1, 1); - yield return Transit(TokenKind.NewLine, State.Text); + yield return Transit(TokenKind.NewLine, State.NewLine); goto restart; } break; @@ -612,7 +638,7 @@ Exception SyntaxError(string message) => { var token = state == State.SingleLineComment ? TokenKind.SingleLineComment - : state == State.WhiteSpace ? TokenKind.WhiteSpace + : state == State.WhiteSpace || state == State.LeadingWhiteSpace ? TokenKind.WhiteSpace : state == State.Cr ? TokenKind.NewLine : state == State.PreprocessorDirective || state == State.PreprocessorDirectiveSlash ? TokenKind.PreprocessorDirective : state == State.VerbatimStringQuote ? TokenKind.VerbatimStringLiteral diff --git a/tests/ScannerTests.cs b/tests/ScannerTests.cs index d9b777d..cbef850 100644 --- a/tests/ScannerTests.cs +++ b/tests/ScannerTests.cs @@ -150,6 +150,17 @@ public void SyntaxError(string source) @"NewLine 1 1 =1 ""\r""", @"Text 2 0 2 ""42""")] + // A pre-processing directive always occupies a separate line of + // source code and always begins with a # character and a + // pre-processing directive name. + + [TestCase("foo #bar baz", // not technically valid C# + @"Text 3 0 3 ""foo""", + @"WhiteSpace 1 0 1 "" """, + @"Text 4 0 4 ""#bar""", + @"WhiteSpace 1 0 1 "" """, + @"Text 3 0 3 ""baz""")] + [TestCase("#line 42" , @"PreprocessorDirective 8 0 8 ""#line 42""")] [TestCase("#line 42 / / comment", @"PreprocessorDirective 20 0 20 ""#line 42 / / comment""")] [TestCase("#line 42/ /comment" , @"PreprocessorDirective 18 0 18 ""#line 42/ /comment""")] @@ -207,6 +218,23 @@ public void SyntaxError(string source) @"PreprocessorDirective 11 0 11 ""#error 42 /""", @"NewLine 1 1 -11 ""\n""")] + // White space may occur before + // the # character and between the # character and the directive + // name. + + [TestCase("# error 42", + @"PreprocessorDirective 10 0 10 ""# error 42""")] + + // Delimited comments (the /* */ style of comments) are not permitted + // on source lines containing pre-processing directives. + + [TestCase("/* foo */ #bar /* baz */", + @"MultiLineComment 9 0 9 ""/* foo */""", + @"WhiteSpace 1 0 1 "" """, + @"Text 4 0 4 ""#bar""", + @"WhiteSpace 1 0 1 "" """, + @"MultiLineComment 9 0 9 ""/* baz */""")] + [TestCase("@\"\"", @"VerbatimString 3 0 3 ""@\""\""""")]