Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexer: allow TABS for indentation #2392

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion doc/manual.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,13 @@ one_word_with_no_space = foo$
Other whitespace is only significant if it's at the beginning of a
line. If a line is indented more than the previous one, it's
considered part of its parent's scope; if it is indented less than the
previous one, it closes the previous scope.
previous one, it closes the previous scope. Note that the physical line that is
a line continuation is not considered as a separate line for the scope
determination purposes.

_Available since Ninja 1.13._
Tab character can be used for indentation. Earlier Ninja versions allow only
space character to be used for such purpose.

[[ref_toplevel]]
Top-level variables
Expand Down
24 changes: 24 additions & 0 deletions misc/output_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,30 @@ def test_tool_inputs(self) -> None:
out2
''')

def test_tabs_indent(self):
content = '''
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It is really difficult to see the tabs in this input. I suggest you use explicit <TAB> markers and str.replace() to replace them with real tabs, as in:

    # For clarity, tabs are written as <TAB> in the literal string below, then
    # replaced with a real "\t" value.
    content = '''
    rule exec
    <TAB>command = $cmd
    ....
    """.replace("<TAB>", "\t")

rule exec
<TAB>command = $cmd

var_hello = hell$
<TAB>o

build foo: exec
<TAB>cmd = touch foo

build bar: exec $
<TAB>foo
<TAB>cmd = touch bar

build $var_hello: exec
<TAB>cmd = touch $var_hello

build baz: exec $
<TAB>bar $var_hello
<TAB>cmd = touch baz
'''.replace('<TAB>', '\t')
run(content)


if __name__ == '__main__':
unittest.main()
70 changes: 36 additions & 34 deletions src/lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <last token:"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -130,7 +127,7 @@ Lexer::Token Lexer::ReadToken() {
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
0, 128, 128, 128, 128, 128, 128, 128,
128, 128, 0, 128, 128, 128, 128, 128,
128, 160, 0, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128,
160, 128, 128, 128, 128, 128, 128, 128,
Expand Down Expand Up @@ -164,16 +161,17 @@ Lexer::Token Lexer::ReadToken() {
};
yych = *p;
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '^') {
if (yych <= ',') {
if (yych <= '\f') {
if (yych <= 0x00) goto yy2;
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy4;
if (yych <= '\n') goto yy9;
goto yy4;
} else {
if (yych <= '\r') goto yy8;
if (yych <= '\r') goto yy11;
if (yych == '#') goto yy12;
goto yy4;
}
Expand Down Expand Up @@ -228,31 +226,32 @@ Lexer::Token Lexer::ReadToken() {
yy5:
{ token = ERROR; break; }
yy6:
++p;
{ token = NEWLINE; break; }
yy8:
yych = *++p;
if (yych == '\n') goto yy28;
goto yy5;
yy9:
yyaccept = 0;
yych = *(q = ++p);
if (yybm[0+yych] & 32) {
goto yy9;
goto yy6;
}
if (yych <= '\f') {
if (yych == '\n') goto yy6;
if (yych <= 0x08) goto yy8;
if (yych <= '\n') goto yy9;
} else {
if (yych <= '\r') goto yy30;
if (yych == '#') goto yy32;
if (yych <= '\r') goto yy28;
if (yych == '#') goto yy30;
}
yy11:
yy8:
{ token = INDENT; break; }
yy9:
++p;
{ token = NEWLINE; break; }
yy11:
yych = *++p;
if (yych == '\n') goto yy32;
goto yy5;
yy12:
yyaccept = 1;
yych = *(q = ++p);
if (yych <= 0x00) goto yy5;
goto yy33;
goto yy31;
yy13:
yych = *++p;
yy14:
Expand Down Expand Up @@ -296,25 +295,27 @@ Lexer::Token Lexer::ReadToken() {
if (yych == '|') goto yy44;
{ token = PIPE; break; }
yy28:
++p;
{ token = NEWLINE; break; }
yy30:
yych = *++p;
if (yych == '\n') goto yy28;
yy31:
if (yych == '\n') goto yy32;
yy29:
p = q;
if (yyaccept == 0) {
goto yy11;
goto yy8;
} else {
goto yy5;
}
yy32:
yy30:
yych = *++p;
yy33:
yy31:
if (yybm[0+yych] & 128) {
goto yy32;
goto yy30;
}
if (yych <= 0x00) goto yy31;
if (yych <= 0x00) goto yy29;
goto yy34;
yy32:
++p;
{ token = NEWLINE; break; }
yy34:
++p;
{ continue; }
yy36:
Expand Down Expand Up @@ -478,7 +479,7 @@ void Lexer::EatWhitespace() {
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 128, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
128, 0, 0, 0, 0, 0, 0, 0,
Expand Down Expand Up @@ -631,7 +632,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
unsigned char yych;
static const unsigned char yybm[] = {
0, 16, 16, 16, 16, 16, 16, 16,
16, 16, 0, 16, 16, 0, 16, 16,
16, 48, 0, 16, 16, 0, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16,
32, 16, 16, 16, 0, 16, 16, 16,
Expand Down Expand Up @@ -797,6 +798,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
goto yy113;
yy128:
yych = *++p;
if (yych == '\t') goto yy128;
if (yych == ' ') goto yy128;
{
continue;
Expand Down
21 changes: 9 additions & 12 deletions src/lexer.in.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,9 @@ const char* Lexer::TokenErrorHint(Token expected) {

string Lexer::DescribeLastError() {
if (last_token_) {
switch (last_token_[0]) {
case '\t':
return "tabs are not allowed, use spaces";
}
return "lexing error <last token:"+string(last_token_)+">";
}
return "lexing error";
return "lexing error (EOF?)";
}

void Lexer::UnreadToken() {
Expand All @@ -133,10 +130,10 @@ Lexer::Token Lexer::ReadToken() {
simple_varname = [a-zA-Z0-9_-]+;
varname = [a-zA-Z0-9_.-]+;

[ ]*"#"[^\000\n]*"\n" { continue; }
[ ]*"\r\n" { token = NEWLINE; break; }
[ ]*"\n" { token = NEWLINE; break; }
[ ]+ { token = INDENT; break; }
[ \t]*"#"[^\000\n]*"\n" { continue; }
[ \t]*"\r\n" { token = NEWLINE; break; }
[ \t]*"\n" { token = NEWLINE; break; }
[ \t]+ { token = INDENT; break; }
pkitszel marked this conversation as resolved.
Show resolved Hide resolved
"build" { token = BUILD; break; }
"pool" { token = POOL; break; }
"rule" { token = RULE; break; }
Expand Down Expand Up @@ -175,7 +172,7 @@ void Lexer::EatWhitespace() {
for (;;) {
ofs_ = p;
/*!re2c
[ ]+ { continue; }
[ \t]+ { continue; }
"$\r\n" { continue; }
"$\n" { continue; }
nul { break; }
Expand Down Expand Up @@ -241,10 +238,10 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
eval->AddText(StringPiece(" ", 1));
continue;
}
"$\r\n"[ ]* {
"$\r\n"[ \t]* {
continue;
}
"$\n"[ ]* {
"$\n"[ \t]* {
continue;
}
"${"varname"}" {
Expand Down
52 changes: 36 additions & 16 deletions src/lexer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,20 @@
#include "eval_env.h"
#include "test.h"

using namespace std;
std::string tok(Lexer::Token t) {
const char *str = Lexer::TokenName(t);
if (!str)
return "TokenOutOfRange: " + std::to_string(t);
return str;
}

#define EXPECT_EQ_TOK(t1, t2) \
EXPECT_EQ(tok(t1), tok(t2))

TEST(Lexer, ReadVarValue) {
Lexer lexer("plain text $var $VaR ${x}\n");
EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]",
Expand All @@ -32,7 +40,7 @@ TEST(Lexer, ReadVarValue) {
TEST(Lexer, ReadEvalStringEscapes) {
Lexer lexer("$ $$ab c$: $\ncde\n");
EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[ $ab c: cde]",
Expand All @@ -41,7 +49,7 @@ TEST(Lexer, ReadEvalStringEscapes) {

TEST(Lexer, ReadIdent) {
Lexer lexer("foo baR baz_123 foo-bar");
string ident;
std::string ident;
EXPECT_TRUE(lexer.ReadIdent(&ident));
EXPECT_EQ("foo", ident);
EXPECT_TRUE(lexer.ReadIdent(&ident));
Expand All @@ -56,12 +64,12 @@ TEST(Lexer, ReadIdentCurlies) {
// Verify that ReadIdent includes dots in the name,
// but in an expansion $bar.dots stops at the dot.
Lexer lexer("foo.dots $bar.dots ${bar.dots}\n");
string ident;
std::string ident;
EXPECT_TRUE(lexer.ReadIdent(&ident));
EXPECT_EQ("foo.dots", ident);

EvalString eval;
string err;
std::string err;
EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("", err);
EXPECT_EQ("[$bar][.dots ][$bar.dots]",
Expand All @@ -71,7 +79,7 @@ TEST(Lexer, ReadIdentCurlies) {
TEST(Lexer, Error) {
Lexer lexer("foo$\nbad $");
EvalString eval;
string err;
std::string err;
ASSERT_FALSE(lexer.ReadVarValue(&eval, &err));
EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
"bad $\n"
Expand All @@ -83,16 +91,28 @@ TEST(Lexer, CommentEOF) {
// Verify we don't run off the end of the string when the EOF is
// mid-comment.
Lexer lexer("# foo");
Lexer::Token token = lexer.ReadToken();
EXPECT_EQ(Lexer::ERROR, token);
EXPECT_EQ_TOK(Lexer::ERROR, lexer.ReadToken());
}

TEST(Lexer, Tabs) {
// Verify we print a useful error on a disallowed character.
Lexer lexer(" \tfoobar");
Lexer::Token token = lexer.ReadToken();
EXPECT_EQ(Lexer::INDENT, token);
token = lexer.ReadToken();
EXPECT_EQ(Lexer::ERROR, token);
EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError());
Copy link
Contributor

@digit-google digit-google Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add tests to also verify that:

  • line continuations with tabs actually work. E.g.:
build foo: rule_name $
<tab>input1 $
<tab>input2
  • Tabs that appear after indentation are still not allowed, except when they are in comments. E.g.
rule correct_rule
<tab>command = foo bar

rule incorrect_rule
<tab>command = foo<tab>bar

build incorrect_build: rule_name<tab>input1<tab>input2

The most important reason to avoid tabs is to avoid using them in command = definitions because this will be equally confusing to users.

Finally, please update manual.doc appropriately to document the new behavior.

Copy link
Author

@pkitszel pkitszel Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I have indeed need to tweak the indentation after a linebreak.

However, even without my changes command = foo<tab>bar is fine to use.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, then no need to test for this then. Thank you for double-checking.

Lexer lexer("rule foo\n"
"\tcommand = foobin $in");

EXPECT_EQ_TOK(Lexer::RULE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
}

TEST(Lexer, TabsInVars) {
Lexer lexer("cflags =\n"
"\t-std=c11");

EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::EQUALS, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::NEWLINE, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::INDENT, lexer.ReadToken());
EXPECT_EQ_TOK(Lexer::IDENT, lexer.ReadToken());
}
Loading