ninja-build · pkitszel · Apr 22, 2024 · digit-google · Mar 19, 2024 · digit-google
diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc
@@ -861,7 +861,13 @@ one_word_with_no_space = foo$
 Other whitespace is only significant if it's at the beginning of a
 line.  If a line is indented more than the previous one, it's
 considered part of its parent's scope; if it is indented less than the
-previous one, it closes the previous scope.
+previous one, it closes the previous scope. Note that the physical line that is
+a line continuation is not considered as a separate line for the scope
+determination purposes.
+
+_Available since Ninja 1.13._
+Tab character can be used for indentation. Earlier Ninja versions allow only
+space character to be used for such purpose.
 
 [[ref_toplevel]]
 Top-level variables

diff --git a/misc/output_test.py b/misc/output_test.py
@@ -191,6 +191,30 @@ def test_tool_inputs(self) -> None:
 out2
 ''')
 
+    def test_tabs_indent(self):
+        content = '''
+rule exec
+<TAB>command = $cmd
+
+var_hello = hell$
+<TAB>o
+
+build foo: exec
+<TAB>cmd = touch foo
+
+build bar: exec $
+<TAB>foo
+<TAB>cmd = touch bar
+
+build $var_hello: exec
+<TAB>cmd = touch $var_hello
+
+build baz: exec $
+<TAB>bar $var_hello
+<TAB>cmd = touch baz
+'''.replace('<TAB>', '\t')
+        run(content)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/lexer.cc b/src/lexer.cc
@@ -105,12 +105,9 @@ const char* Lexer::TokenErrorHint(Token expected) {
 
 string Lexer::DescribeLastError() {
   if (last_token_) {
-    switch (last_token_[0]) {
-    case '\t':
-      return "tabs are not allowed, use spaces";
-    }
+	return "lexing error <last token:"+string(last_token_)+">";
   }
-  return "lexing error";
+  return "lexing error (EOF?)";
 }
 
 void Lexer::UnreadToken() {
@@ -130,7 +127,7 @@ Lexer::Token Lexer::ReadToken() {
 	unsigned int yyaccept = 0;
 	static const unsigned char yybm[] = {
 		  0, 128, 128, 128, 128, 128, 128, 128, 
-		128, 128,   0, 128, 128, 128, 128, 128, 
+		128, 160,   0, 128, 128, 128, 128, 128, 
 		128, 128, 128, 128, 128, 128, 128, 128, 
 		128, 128, 128, 128, 128, 128, 128, 128, 
 		160, 128, 128, 128, 128, 128, 128, 128, 
@@ -164,16 +161,17 @@ Lexer::Token Lexer::ReadToken() {
 	};
 	yych = *p;
 	if (yybm[0+yych] & 32) {
-		goto yy9;
+		goto yy6;
 	}
 	if (yych <= '^') {
 		if (yych <= ',') {
 			if (yych <= '\f') {
 				if (yych <= 0x00) goto yy2;
-				if (yych == '\n') goto yy6;
+				if (yych <= 0x08) goto yy4;
+				if (yych <= '\n') goto yy9;
 				goto yy4;
 			} else {
-				if (yych <= '\r') goto yy8;
+				if (yych <= '\r') goto yy11;
 				if (yych == '#') goto yy12;
 				goto yy4;
 			}
@@ -228,31 +226,32 @@ Lexer::Token Lexer::ReadToken() {
 yy5:
 	{ token = ERROR;    break; }
 yy6:
-	++p;
-	{ token = NEWLINE;  break; }
-yy8:
-	yych = *++p;
-	if (yych == '\n') goto yy28;
-	goto yy5;
-yy9:
 	yyaccept = 0;
 	yych = *(q = ++p);
 	if (yybm[0+yych] & 32) {
-		goto yy9;
+		goto yy6;
 	}
 	if (yych <= '\f') {
-		if (yych == '\n') goto yy6;
+		if (yych <= 0x08) goto yy8;
+		if (yych <= '\n') goto yy9;
 	} else {
-		if (yych <= '\r') goto yy30;
-		if (yych == '#') goto yy32;
+		if (yych <= '\r') goto yy28;
+		if (yych == '#') goto yy30;
 	}
-yy11:
+yy8:
 	{ token = INDENT;   break; }
+yy9:
+	++p;
+	{ token = NEWLINE;  break; }
+yy11:
+	yych = *++p;
+	if (yych == '\n') goto yy32;
+	goto yy5;
 yy12:
 	yyaccept = 1;
 	yych = *(q = ++p);
 	if (yych <= 0x00) goto yy5;
-	goto yy33;
+	goto yy31;
 yy13:
 	yych = *++p;
 yy14:
@@ -296,25 +295,27 @@ Lexer::Token Lexer::ReadToken() {
 	if (yych == '|') goto yy44;
 	{ token = PIPE;     break; }
 yy28:
-	++p;
-	{ token = NEWLINE;  break; }
-yy30:
 	yych = *++p;
-	if (yych == '\n') goto yy28;
-yy31:
+	if (yych == '\n') goto yy32;
+yy29:
 	p = q;
 	if (yyaccept == 0) {
-		goto yy11;
+		goto yy8;
 	} else {
 		goto yy5;
 	}
-yy32:
+yy30:
 	yych = *++p;
-yy33:
+yy31:
 	if (yybm[0+yych] & 128) {
-		goto yy32;
+		goto yy30;
 	}
-	if (yych <= 0x00) goto yy31;
+	if (yych <= 0x00) goto yy29;
+	goto yy34;
+yy32:
+	++p;
+	{ token = NEWLINE;  break; }
+yy34:
 	++p;
 	{ continue; }
 yy36:
@@ -478,7 +479,7 @@ void Lexer::EatWhitespace() {
 	unsigned char yych;
 	static const unsigned char yybm[] = {
 		  0,   0,   0,   0,   0,   0,   0,   0, 
-		  0,   0,   0,   0,   0,   0,   0,   0, 
+		  0, 128,   0,   0,   0,   0,   0,   0, 
 		  0,   0,   0,   0,   0,   0,   0,   0, 
 		  0,   0,   0,   0,   0,   0,   0,   0, 
 		128,   0,   0,   0,   0,   0,   0,   0, 
@@ -631,7 +632,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
 	unsigned char yych;
 	static const unsigned char yybm[] = {
 		  0,  16,  16,  16,  16,  16,  16,  16, 
-		 16,  16,   0,  16,  16,   0,  16,  16, 
+		 16,  48,   0,  16,  16,   0,  16,  16, 
 		 16,  16,  16,  16,  16,  16,  16,  16, 
 		 16,  16,  16,  16,  16,  16,  16,  16, 
 		 32,  16,  16,  16,   0,  16,  16,  16, 
@@ -797,6 +798,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
 	goto yy113;
 yy128:
 	yych = *++p;
+	if (yych == '\t') goto yy128;
 	if (yych == ' ') goto yy128;
 	{
       continue;

diff --git a/src/lexer.in.cc b/src/lexer.in.cc
@@ -104,12 +104,9 @@ const char* Lexer::TokenErrorHint(Token expected) {
 
 string Lexer::DescribeLastError() {
   if (last_token_) {
-    switch (last_token_[0]) {
-    case '\t':
-      return "tabs are not allowed, use spaces";
-    }
+	return "lexing error <last token:"+string(last_token_)+">";
   }
-  return "lexing error";
+  return "lexing error (EOF?)";
 }
 
 void Lexer::UnreadToken() {
@@ -133,10 +130,10 @@ Lexer::Token Lexer::ReadToken() {
     simple_varname = [a-zA-Z0-9_-]+;
     varname = [a-zA-Z0-9_.-]+;
 
-    [ ]*"#"[^\000\n]*"\n" { continue; }
-    [ ]*"\r\n" { token = NEWLINE;  break; }
-    [ ]*"\n"   { token = NEWLINE;  break; }
-    [ ]+       { token = INDENT;   break; }
+    [ \t]*"#"[^\000\n]*"\n" { continue; }
+    [ \t]*"\r\n" { token = NEWLINE;  break; }
+    [ \t]*"\n"   { token = NEWLINE;  break; }
+    [ \t]+       { token = INDENT;   break; }
     "build"    { token = BUILD;    break; }
     "pool"     { token = POOL;     break; }
     "rule"     { token = RULE;     break; }
@@ -175,7 +172,7 @@ void Lexer::EatWhitespace() {
   for (;;) {
     ofs_ = p;
     /*!re2c
-    [ ]+    { continue; }
+    [ \t]+  { continue; }
     "$\r\n" { continue; }
     "$\n"   { continue; }
     nul     { break; }
@@ -241,10 +238,10 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
       eval->AddText(StringPiece(" ", 1));
       continue;
     }
-    "$\r\n"[ ]* {
+    "$\r\n"[ \t]* {
       continue;
     }
-    "$\n"[ ]* {
+    "$\n"[ \t]* {
       continue;
     }
     "${"varname"}" {

diff --git a/src/lexer_test.cc b/src/lexer_test.cc
@@ -17,12 +17,20 @@
 #include "eval_env.h"
 #include "test.h"
 
-using namespace std;
+std::string tok(Lexer::Token t) {
+  const char *str = Lexer::TokenName(t);
+  if (!str)
+    return "TokenOutOfRange: " + std::to_string(t);
+  return str;
+}
+
+#define EXPECT_EQ_TOK(t1, t2) \
+  EXPECT_EQ(tok(t1), tok(t2))
 
 TEST(Lexer, ReadVarValue) {
   Lexer lexer("plain text $var $VaR ${x}\n");
   EvalString eval;
-  string err;
+  std::string err;
   EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
   EXPECT_EQ("", err);
   EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]",
@@ -32,7 +40,7 @@ TEST(Lexer, ReadVarValue) {
 TEST(Lexer, ReadEvalStringEscapes) {
   Lexer lexer("$ $$ab c$: $\ncde\n");
   EvalString eval;
-  string err;
+  std::string err;
   EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
   EXPECT_EQ("", err);
   EXPECT_EQ("[ $ab c: cde]",
@@ -41,7 +49,7 @@ TEST(Lexer, ReadEvalStringEscapes) {
 
 TEST(Lexer, ReadIdent) {
   Lexer lexer("foo baR baz_123 foo-bar");
-  string ident;
+  std::string ident;
   EXPECT_TRUE(lexer.ReadIdent(&ident));
   EXPECT_EQ("foo", ident);
   EXPECT_TRUE(lexer.ReadIdent(&ident));
@@ -56,12 +64,12 @@ TEST(Lexer, ReadIdentCurlies) {
   // Verify that ReadIdent includes dots in the name,
   // but in an expansion $bar.dots stops at the dot.
   Lexer lexer("foo.dots $bar.dots ${bar.dots}\n");
-  string ident;
+  std::string ident;
   EXPECT_TRUE(lexer.ReadIdent(&ident));
   EXPECT_EQ("foo.dots", ident);
 
   EvalString eval;
-  string err;
+  std::string err;
   EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
   EXPECT_EQ("", err);
   EXPECT_EQ("[$bar][.dots ][$bar.dots]",
@@ -71,7 +79,7 @@ TEST(Lexer, ReadIdentCurlies) {
 TEST(Lexer, Error) {
   Lexer lexer("foo$\nbad $");
   EvalString eval;
-  string err;
+  std::string err;
   ASSERT_FALSE(lexer.ReadVarValue(&eval, &err));
   EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
             "bad $\n"
@@ -83,16 +91,28 @@ TEST(Lexer, CommentEOF) {
   // Verify we don't run off the end of the string when the EOF is
   // mid-comment.
   Lexer lexer("# foo");
-  Lexer::Token token = lexer.ReadToken();
-  EXPECT_EQ(Lexer::ERROR, token);
+  EXPECT_EQ_TOK(Lexer::ERROR, lexer.ReadToken());
 }
 
 TEST(Lexer, Tabs) {
-  // Verify we print a useful error on a disallowed character.
-  Lexer lexer("   \tfoobar");
-  Lexer::Token token = lexer.ReadToken();
-  EXPECT_EQ(Lexer::INDENT, token);
-  token = lexer.ReadToken();
-  EXPECT_EQ(Lexer::ERROR, token);
-  EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError());
+  Lexer lexer("rule foo\n"
+              "\tcommand = foobin $in");
+
+  EXPECT_EQ_TOK(Lexer::RULE, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
+}
+
+TEST(Lexer, TabsInVars) {
+  Lexer lexer("cflags =\n"
+              "\t-std=c11");
+
+  EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
+  EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
 }