From de6cf7b6aa72a989df5b67704eee6297cf801292 Mon Sep 17 00:00:00 2001 From: Anantha Kumaran Date: Sun, 16 May 2021 10:29:32 +0530 Subject: [PATCH 1/3] add unused_identifier and special_identifier --- grammar.js | 23 +++++++++++++++-------- queries/highlights.scm | 12 +++--------- src/scanner.cc | 20 ++++++++++++++++++++ test/corpus/call.txt | 8 ++++---- test/corpus/case.txt | 2 +- test/corpus/cond.txt | 2 +- test/corpus/function.txt | 2 +- test/corpus/struct.txt | 4 ++-- test/corpus/try.txt | 2 +- test/highlight/sandbox.ex | 22 ++++++++++++++++++++++ 10 files changed, 70 insertions(+), 27 deletions(-) diff --git a/grammar.js b/grammar.js index e55b657..d7721ea 100644 --- a/grammar.js +++ b/grammar.js @@ -131,6 +131,8 @@ module.exports = grammar({ $.string_content, $.string_end, $.identifier, + $.unused_identifier, + $.special_identifier, $.keyword_literal, $.atom_literal, $.atom_start, @@ -166,6 +168,8 @@ module.exports = grammar({ [$.block, $.stab_expression] ], + inline: $ => [$._identifier], + word: $ => $.identifier, rules: { @@ -201,9 +205,12 @@ module.exports = grammar({ $.tuple, $._literal, $.char, - $.identifier + $._identifier ), + _identifier: $ => + choice($.identifier, $.unused_identifier, $.special_identifier), + block: $ => seq( "(", @@ -224,7 +231,7 @@ module.exports = grammar({ paren_call: $ => seq( - field("function", alias($.identifier, $.function_identifier)), + field("function", alias($._identifier, $.function_identifier)), $.arguments ), @@ -236,7 +243,7 @@ module.exports = grammar({ field( "function", choice( - alias($.identifier, $.function_identifier), + alias($._identifier, $.function_identifier), $.dot_call, alias($.paren_call, $.call) ) @@ -248,7 +255,7 @@ module.exports = grammar({ field( "function", choice( - alias($.identifier, $.function_identifier), + alias($._identifier, $.function_identifier), $.dot_call, alias($.paren_call, $.call) ) @@ -358,7 +365,7 @@ module.exports = grammar({ choice( ...aliases( [ - $.identifier, + $._identifier, $.true, $.false, $.nil, @@ -395,7 +402,7 @@ module.exports = grammar({ "remote", choice( $.module, - alias($.identifier, $.remote_identifier), + $._identifier, $.atom, alias($._simple_dot_call, $.dot_call), alias($.paren_call, $.call), @@ -507,11 +514,11 @@ module.exports = grammar({ "%", choice( $.module, - $.identifier, + $._identifier, $.atom, alias($._simple_dot_call, $.dot_call), alias($.paren_call, $.call), - seq("^", $.identifier) + seq("^", $._identifier) ), "{", optional($._terminator), diff --git a/queries/highlights.scm b/queries/highlights.scm index b9ec021..c97301c 100644 --- a/queries/highlights.scm +++ b/queries/highlights.scm @@ -17,7 +17,8 @@ (atom_content) (atom_end)] @tag -(comment) @comment +[(comment) + (unused_identifier)] @comment (escape_sequence) @escape @@ -134,13 +135,6 @@ ">>" ] @punctuation.bracket -[(identifier) @function.special - (#match? @function.special "^__.+__$")] - -[(remote_identifier) @function.special - (#match? @function.special "^__.+__$")] - -[(identifier) @comment - (#match? @comment "^_")] +(special_identifier) @function.special (ERROR) @warning diff --git a/src/scanner.cc b/src/scanner.cc index 55b14c0..7296587 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -28,6 +28,8 @@ enum TokenType { STRING_END, IDENTIFIER, + UNUSED_IDENTIFIER, + SPECIAL_IDENTIFIER, KEYWORD_LITERAL, ATOM_LITERAL, @@ -146,6 +148,14 @@ struct Scanner { return !is_identifier_body(c) && c != '?' && c != '!' && c != ':'; } + bool starts_with(std::string s, std::string needle) { + return s.rfind(needle, 0) == 0; + } + + bool ends_with(std::string s, std::string needle) { + return s.length() >= needle.length() && + (0 == s.compare(s.length() - needle.length(), needle.length(), needle)); + } int32_t sigil_terminator(int32_t c) { switch (c) { @@ -667,6 +677,12 @@ struct Scanner { } } else if (!is_identifier_body(lexer->lookahead)) { lexer->mark_end(lexer); + if (starts_with(token, std::string("__")) && ends_with(token, std::string("__"))) { + return is_identifier && is_valid(lexer, valid_symbols, SPECIAL_IDENTIFIER); + } + if (starts_with(token, std::string("_"))) { + return is_identifier && is_valid(lexer, valid_symbols, UNUSED_IDENTIFIER); + } if (token == std::string("true")) { return is_valid(lexer, valid_symbols, TRUE); } @@ -1340,6 +1356,8 @@ struct Scanner { valid_symbols[SIGIL_START] || valid_symbols[KEYWORD_LITERAL] || valid_symbols[IDENTIFIER] || + valid_symbols[UNUSED_IDENTIFIER] || + valid_symbols[SPECIAL_IDENTIFIER] || valid_symbols[ATOM_LITERAL] || valid_symbols[ATOM_START] || valid_symbols[LINE_BREAK] || @@ -1386,6 +1404,8 @@ struct Scanner { } if ((valid_symbols[IDENTIFIER] || + valid_symbols[UNUSED_IDENTIFIER] || + valid_symbols[SPECIAL_IDENTIFIER] || valid_symbols[KEYWORD_LITERAL] || valid_symbols[TRUE] || valid_symbols[FALSE] || diff --git a/test/corpus/call.txt b/test/corpus/call.txt index 58f8a40..c31ecce 100644 --- a/test/corpus/call.txt +++ b/test/corpus/call.txt @@ -82,7 +82,7 @@ a.()() (program (call (dot_call - (remote_identifier) + (identifier) (arguments)) (arguments))) @@ -112,7 +112,7 @@ a.and (program (dot_call - (remote_identifier) + (identifier) (function_identifier))) ================================================================================ @@ -157,7 +157,7 @@ end (call (function_identifier) (arguments - (identifier))) + (special_identifier))) (function_identifier) (arguments (anonymous_function @@ -607,7 +607,7 @@ inspect(&__MODULE__."weirdly named/fun-"/0) (unary_op (binary_op (dot_call - (remote_identifier) + (special_identifier) (string (string_start) (string_content) diff --git a/test/corpus/case.txt b/test/corpus/case.txt index 6217a11..862dc41 100644 --- a/test/corpus/case.txt +++ b/test/corpus/case.txt @@ -82,7 +82,7 @@ end (bare_arguments (integer)) (dot_call - (remote_identifier) + (identifier) (function_identifier) (arguments)) (integer))))) diff --git a/test/corpus/cond.txt b/test/corpus/cond.txt index c817dcf..3195767 100644 --- a/test/corpus/cond.txt +++ b/test/corpus/cond.txt @@ -79,7 +79,7 @@ end (bare_arguments (integer)) (dot_call - (remote_identifier) + (identifier) (function_identifier) (arguments)) (integer))))) diff --git a/test/corpus/function.txt b/test/corpus/function.txt index 2208731..e58cce8 100644 --- a/test/corpus/function.txt +++ b/test/corpus/function.txt @@ -31,7 +31,7 @@ end (identifier))) (identifier) (identifier) - (identifier))))))) + (special_identifier))))))) ================================================================================ multine def diff --git a/test/corpus/struct.txt b/test/corpus/struct.txt index 3f8f410..e9fb231 100644 --- a/test/corpus/struct.txt +++ b/test/corpus/struct.txt @@ -20,7 +20,7 @@ identifier (program (struct - (identifier))) + (unused_identifier))) ================================================================================ with fields @@ -55,5 +55,5 @@ with dot call (program (struct (dot_call - (remote_identifier) + (special_identifier) (module)))) diff --git a/test/corpus/try.txt b/test/corpus/try.txt index 4f5486c..50c6134 100644 --- a/test/corpus/try.txt +++ b/test/corpus/try.txt @@ -200,7 +200,7 @@ end (atom_literal))) (stab_expression (bare_arguments - (identifier)) + (unused_identifier)) (atom (atom_literal))))))) diff --git a/test/highlight/sandbox.ex b/test/highlight/sandbox.ex index efe5dab..6ad315c 100644 --- a/test/highlight/sandbox.ex +++ b/test/highlight/sandbox.ex @@ -1088,6 +1088,28 @@ defprotocol Useless do # ^ punctuation.bracket def func3(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) +# ^ keyword +# ^ function +# ^ punctuation.bracket +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.delimiter +# ^ variable.parameter +# ^ punctuation.bracket + end defimpl Useless, for: Atom do From 851cfbd8cc8d0baa49d641e92771256be859d8c9 Mon Sep 17 00:00:00 2001 From: Anantha Kumaran Date: Sun, 16 May 2021 16:21:03 +0530 Subject: [PATCH 2/3] handle identifier kinds for all cases --- src/scanner.cc | 24 +++++++++++++++--------- test/highlight/sandbox.ex | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 7296587..8e1a50e 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -590,6 +590,16 @@ struct Scanner { return false; } + bool is_valid_identifier(TSLexer *lexer, const bool *valid_symbols, std::string token) { + if (starts_with(token, std::string("__")) && ends_with(token, std::string("__"))) { + return is_valid(lexer, valid_symbols, SPECIAL_IDENTIFIER, false); + } + if (starts_with(token, std::string("_"))) { + return is_valid(lexer, valid_symbols, UNUSED_IDENTIFIER, false); + } + return is_valid(lexer, valid_symbols, IDENTIFIER, false); + } + bool scan_identifier_or_keyword(TSLexer *lexer, const bool *valid_symbols) { std::string token= ""; @@ -640,6 +650,7 @@ struct Scanner { return false; } + // ... return is_valid(lexer, valid_symbols, IDENTIFIER, false); } @@ -652,6 +663,7 @@ struct Scanner { if (lexer->lookahead == '?' || lexer->lookahead == '!') { + token.push_back(lexer->lookahead); advance(lexer); lexer->mark_end(lexer); @@ -663,7 +675,7 @@ struct Scanner { } } - return is_identifier && is_valid(lexer, valid_symbols, IDENTIFIER, false); + return is_identifier && is_valid_identifier(lexer, valid_symbols, token); } else if (lexer->lookahead == '@') { is_identifier = false; } else if (lexer->lookahead == ':') { @@ -673,16 +685,10 @@ struct Scanner { is_whitespace(lexer->lookahead)) { return is_valid(lexer, valid_symbols, KEYWORD_LITERAL); } else { - return is_identifier && is_valid(lexer, valid_symbols, IDENTIFIER, false); + return is_identifier && is_valid_identifier(lexer, valid_symbols, token); } } else if (!is_identifier_body(lexer->lookahead)) { lexer->mark_end(lexer); - if (starts_with(token, std::string("__")) && ends_with(token, std::string("__"))) { - return is_identifier && is_valid(lexer, valid_symbols, SPECIAL_IDENTIFIER); - } - if (starts_with(token, std::string("_"))) { - return is_identifier && is_valid(lexer, valid_symbols, UNUSED_IDENTIFIER); - } if (token == std::string("true")) { return is_valid(lexer, valid_symbols, TRUE); } @@ -733,7 +739,7 @@ struct Scanner { return is_valid(lexer, valid_symbols, ELSE); } - return is_identifier && is_valid(lexer, valid_symbols, IDENTIFIER); + return is_identifier && is_valid_identifier(lexer, valid_symbols, token); } } } diff --git a/test/highlight/sandbox.ex b/test/highlight/sandbox.ex index 6ad315c..248b41d 100644 --- a/test/highlight/sandbox.ex +++ b/test/highlight/sandbox.ex @@ -563,6 +563,25 @@ defmodule Long.Module.Name do # ^ tag # ^ tag + def func(_foo?, _bar!, <<_baz::binary>>), do: :ok +# ^ keyword +# ^ function +# ^ punctuation.bracket +# ^ comment +# ^ comment +# ^ punctuation.delimiter +# ^ comment +# ^ punctuation.delimiter +# ^ punctuation.bracket +# ^ comment +# ^ operator +# ^ variable.parameter +# ^ punctuation.bracket +# ^ punctuation.bracket +# ^ punctuation.delimiter +# ^ tag +# ^ tag + # Function def f(x), do: x # ^ keyword From 62869eddfb83fd8db19db57f98cd90e49a8d7975 Mon Sep 17 00:00:00 2001 From: Anantha Kumaran Date: Sun, 16 May 2021 19:55:15 +0530 Subject: [PATCH 3/3] remove (#match? @variable.parameter "^[^_]") --- queries/highlights.scm | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/queries/highlights.scm b/queries/highlights.scm index c97301c..6bf93a2 100644 --- a/queries/highlights.scm +++ b/queries/highlights.scm @@ -51,8 +51,7 @@ left: (identifier) @variable.parameter operator: _ @function right: (identifier) @variable.parameter)] - (#match? @keyword "^(defp|def|defmacrop|defmacro|defguardp|defguard|defdelegate)$") - (#match? @variable.parameter "^[^_]")) + (#match? @keyword "^(defp|def|defmacrop|defmacro|defguardp|defguard|defdelegate)$")) (call (function_identifier) @keyword [(call @@ -74,8 +73,7 @@ (_ (_ (identifier) @variable.parameter)) (_ (_ (_ (identifier) @variable.parameter))) (_ (_ (_ (_ (identifier) @variable.parameter)))) - (_ (_ (_ (_ (_ (identifier) @variable.parameter)))))])) - (#match? @variable.parameter "^[^_]")) + (_ (_ (_ (_ (_ (identifier) @variable.parameter)))))]))) (unary_op operator: "@"