[Erlang] Rewrite Syntax #1878

deathaxe · 2019-02-10T20:09:59Z

According to #481 this PR provides an Oniguruma free Erlang syntax definition written from scratch.

General Notes

The syntax definition is based on the Erlang Specification and includes all sematic features of Erlang/OTP 21.2.

It follows Sublime Text Syntax Guidelines and includes concepts from sublimehq/Packages

Nearly every aspect and detail of the syntax definition was designed from ground up making use of the latest features of ST's syntax engine and with the intent to apply the most recent best practice examples found in other recently updated syntax definitions.

Changes

Add support for Erlang ShellScript (escript)
Add support for Erlang Type Language (-type, -spec directive)
Add lots of detailed test cases
Add some invalid illegal matching where appropriate
Add builtin constants and macros highlighting
Improve the list of builtin functions (BIFs)
Improve indention settings
Improve shebang/editorconfig detection in first_line_match
Improve support for Goto Definition and Goto Reference
Improve parsing performance by round about 50%
Fix quoted atom highlighting (function names, module names, field names, ...)
Fix bitstring data type highlighting
Fix string escape highlighting
Fix string placeholders highlighting
Fix identifier break handling (@ is no word-break)
Fix lots of context pop offs
...

Code Examples

This chapter tries to give an impression of the changes in pictures. Please note that some of the scope naming changes may look some kind of broken compared to the old syntax and others may not bee too obvious anyway.

Preprocessor

%% Erlang test module
-module('my-test-module').
-compile(export_all).

-ifndef(HAVE_DEBUG).
- define(HAVE_DEBUG, 1).
-endif.

-include_lib("common_test/include/ct.hrl").

-export([parse_result/2]).

-on_load(run_loader/0).

-type my_data() :: >=10 | millisecond | integer() | other.

-record('rec-name', {
    status   = undefined  :: integer(),
    name     = "foo"      :: string() | atom() | foo,
    code     = []         :: list(),
    callback = none       :: fun((arg1 :: binary(), atom()) -> int())
    }).

Functions

-spec parse_result(binary(), resulthandler(T)) -> resulthandler_result(T).

parse_result(<<ResultCode:32/little-unsigned, Rest/binary>>, ResultHandler) ->

  % anonymous functions
  Fun = fun
      % first clause
      (Arg1,Arg2) when Arg1 > 0, is_binary(Arg1); is_reference(Arg2) ->
          % nested anonyomous functions
          Fun1 = fun ?CUSTOM:name/?ARI,
          Fun3 = fun Fact(1) -> 1; Fact(X) when X > 1 -> X * Fact(X - 1) end,

          case Arg1 of
            0 -> ResultHandler(Arg2);
            1 when is_integer(Arg1); Arg2==0 -> no_magic;
            _ -> erlang:throw()
          end;
      % second clause
      (Arg1,Arg2) ->
        error
  end,
  % call anonymous function
  Fun(ResultCode, Rest),

Binaries

  Bin  = << << (X*2) >> || <<X>> <= << 1,2,3 >> >>.

Placeholders

  Fmt1 = "bin: ~B ~.16B ~4.16B ~4.16.#B ~# ~.10# ~+ ~.10+ ~X ~.16X ~-5.16X ~-5.16. X",
  Fmt2 = "str: ~p ~tp ~lp ~tlp ~ltp ~15p ~15lp ~15tp ~15tlp ~15ltp",
  Fmt3 = "flt: ~-*.*.*f",

Records

  Expr#record{field1="val1", Field2=3, field3, 'Field-4'={}, _=atom},
  N0n = N2#nrec2.nrec1#nrec1.nrec0#nrec0{name = "nested0a"},

Screenshots

ST 3189

New Syntax

Benchmarks

The whole design and developement process included benchmarking of different solutions to find the best possible performance for the different sets of rules. The following examples provide an impression of the performance improvements achieved with the new design.

10k binary

  Bin  = << << (X*2) >> || <<X>> <= << 1,2,3 >> >>.

10k maps

  Expr#{name=>"adam",{age,24}:=fct(),4.0=>{july,29}}

10k records

  #record{field1="val1", Field2=3, field3, 'Field-4'={}, _=atom}

10k fun types

  -type fun() :: fun(() -> int()).

10k spec

  -spec is_foo(Pid) -> boolean() when Pid :: pid() | if(); (Pid) -> ok.

ErlangOTP files

lib\wx\include\gl.hrl

lib\stdlib\test\re_testoutput1_split_test.erl

lib\xmerl\test\xmerl_sax_std_SUITE.erl

lib\crypto\test\crypto_SUITE.erl

Results

Type	before	after	difference
10k binaries	303ms	140ms	-53.8%
10k maps	326ms	156ms	-52.1%
10k records	399ms	130ms	-67.4%
10k fun type	n.a.	79ms
10k spec	n.a.	176ms
gl.hrl	47ms	24ms	-48.9%
re_testoutput1_split_test.erl	448ms	305ms	-31.9%
xmerl_sax_std_SUITE.erl	389ms	127ms	-67.4%
crypto_SUITE.erl	79ms	27ms	-65.8%

Fixes sublimehq#1609 Fixes sublimehq#1724 According to sublimehq#481 this PR provides an Oniguruma free Erlang syntax definition written from scratch. Outline ======= The syntax definition is based on the http://erlang.org/doc and includes all sematic features of Erlang/OTP 21.2 which are required for highlighting. It follows https://www.sublimetext.com/docs/3/syntax.html and includes concepts from https://github.com/sublimehq/Packages. Nearly every aspect and detail of the syntax definition was designed from ground up making use of the latest features of ST's syntax engine and with the intent to apply the most recent best practice examples found in other recently updated syntax definitions. Changes ======= * Add support for Erlang ShellScript (escript) * Add support for Erlang Type Language (`-type`, `-spec` directive) * Add lots of detailed test cases * Add some invalid illegal matching where appropriate * Add builtin constants and macros highlighting * Improve the list of builtin functions ([BIFs](http://erlang.org/doc/man/erlang.html#exports)) * Improve indention settings * Improve shebang/editorconfig detection in `first_line_match` * Improve support for _Goto Definition_ and _Goto Reference_ * Improve parsing performance by round about 50% * Fix quoted atom highlighting (function names, module names, field names, ...) * Fix bitstring data type highlighting * Fix string escape highlighting * Fix string placeholders highlighting * Fix identifier break handling (`@` is no word-break) * Fix lots of context pop offs * ... Benchmarks ========== The whole design and development process included benchmarking of different solutions to find the best possible performance for the different sets of rules. The following examples provide an impression of the performance improvements achieved with the new design. **10k binary** Bin = << << (X*2) >> || <<X>> <= << 1,2,3 >> >>. **10k maps** Expr#{name=>"adam",{age,24}:=fct(),4.0=>{july,29}} **10k records** #record{field1="val1", Field2=3, field3, 'Field-4'={}, _=atom} **10k fun types** -type fun() :: fun(() -> int()). **10k spec** -spec is_foo(Pid) -> boolean() when Pid :: pid() | if(); (Pid) -> ok. **ErlangOTP files** https://github.com/erlang/otp/blob/master/lib/wx/include/gl.hrl https://github.com/erlang/otp/blob/master/lib/stdlib/test/re_testoutput1_split_test.erl https://github.com/erlang/otp/blob/master/lib/xmerl/test/xmerl_sax_std_SUITE.erl https://github.com/erlang/otp/blob/master/lib/crypto/test/crypto_SUITE.erl Results ======= Type | before | after | difference -------------------------------|------------|-------------|------------ 10k binaries | 303ms | 140ms | -53.8% 10k maps | 326ms | 156ms | -52.1% 10k records | 399ms | 130ms | -67.4% 10k fun type | n.a. | 79ms | 10k spec | n.a. | 176ms | gl.hrl | 47ms | 24ms | -48.9% re_testoutput1_split_test.erl | 448ms | 305ms | -31.9% xmerl_sax_std_SUITE.erl | 389ms | 127ms | -67.4% crypto_SUITE.erl | 79ms | 27ms | -65.8%

deathaxe · 2019-02-10T20:20:55Z

@StevieSong, @tiaanvz: This PR is dedicated to all our network backbone engineers and may need some review from those experienced with Erlang.

wbond · 2019-02-10T20:23:24Z

@mroach I think I've noticed you doing a bit with Erlang recently. Perhaps you'd be interested in trying this out?

mroach · 2019-02-11T07:08:53Z

@wbond I've been working with Elixir, but I've had cause to read Erlang packages used by Elixir. So I'll install this and give it a go.

This commit applies numeric constant scope names with regards to the updated guideline which is a result of a proposal at sublimehq#1877.

eproxus · 2019-05-07T08:48:09Z

I'll start reviewing this right away. Thanks @deathaxe!

eproxus · 2019-05-07T09:19:33Z

@deathaxe I have a couple of issues:

Variables change color in different situations:
1. Variables are colored differently when they're arguments:
2. Variables are colored differently when they're called:
3. Variable are colored differently when they're assigned different values:
Records and fields are colored differently when they're nested:
Macros are colored differently when they're defined and when they're used:

Basically, my problems boil down to not being able to use color to identify the type of entity I'm looking at.

Not sure how many of these are because of the color scheme. Realistically, could they be fixed by a color scheme? Not every scheme wants language specific adaptations. How to deal with this in Sublime Text in general?

eproxus · 2019-05-07T09:22:00Z

The specific problem with 1.iii seems to be that the Fun variable in the first line doesn't even get the scope variable.other.erlang at all, thus the color scheme doesn't know it's a variable at all.

eproxus · 2019-05-07T09:24:52Z

Indexing seems to be problematic as well. It can't find function definitions in the module at all.

FichteFoll · 2019-05-07T15:43:46Z

Identifiers being scoped (and usually colored) differently when they are defined and used is the intended behavior and its details are thoroughly discussed in #1842, if you are curious.

And yes, everything color is specified by the color scheme. Syntax definitions only tag names onto tokens with as much detail as possible so that the color scheme has enough power to express the desired coloring.

eproxus · 2019-05-07T16:18:03Z

@FichteFoll Are you referring to 1.iii? The variable (which it is, in the language) gets the entity.name.function.erlang scope which should not have in my opinion. In Erlang a variable is never a function name (it can be bound to one though).

deathaxe · 2019-05-07T18:16:22Z

Thanks for taking the time to help us.

Not every scheme wants language specific adaptations.

This is actually one of the main goals of all the recent changes made to the builtin syntaxes of ST. The community tries to make all syntaxes share as much common scopes for compareable expressions/statements as possible to enable the same user experience for all syntaxes.

Due to heavy conceptual differences of all the syntaxes this effort requires some "abstractions", which might look inaccurate in some situations though.

Example:

What Erlang calls a module is known as package in Perl and maybe compared to a namespace in C. These kinds of comparisons are required to find common scope names resulting in common colors without syntax-specific color-scheme-extensions.

Variables change color in different situations

To be able to drive the Goto Definition vs. Goto Reference feature of ST, we need to distinguish between the definition of a variable and the usage. The most common example is how function names are scoped. ST uses entity.name.function to name the definition while variable.function is used to scope the usage (function-call). Both may be applied different colors to.

This concept is tried to be applied to other situations as well.

i. Variables are colored differently when they're arguments:

The formal argumets are scoped as variable.parameter, while the actual arguments are scoped variable.other. This is how most other default syntaxes scope them like in order to distiush definition and usage.

Notes:

With regards to functions variable.parameter would probably need to be replaced by entity.name.parameter.
If a color-scheme would address variable only, both would be colored the same way.

iii. Variable are colored differently when they're assigned different values:

This is a good point, we might discuss about. I scoped the Fun as entity.name.function as the documentation calls such statements an "anonymous function declaration". I intended to include them into the Goto Definition feature, but found them to bring too much noise in the end.

I am fine to turn it into an ordinary variable.other.

Records and fields are colored differently when they're nested:

I am not an Erlang expert, so please excuse the stupid question(s):

According 10.2 of http://erlang.org/doc/reference_manual/records.html #record{field_name=value} describes the createion (local definition?) of a record? Hence the last #nrec0 is the new name of a record with one field named field_name initialized with the value?

Therefore the last #nrec0 is assigned the entity.name.record so far, while the rest in front is understood as existing set of records defining the path?

If this assumption is incorrect and the last #nrec0 can be scoped as normal variable.other.record, things would even get simpler.

Macros are colored differently when they're defined and when they're used:

Basically the same here:

In -ifndef(MACRO) the MACRO is "used". Hence it is scoped constant.other.macro.
In -define(MACRO, "something") MACRO is defined. Hence scoped entity.name.constant.macro.

... to be able to distinguish them.

Conclusion

I also noticed the different colors, which were equl before, but tried to follow some of the contecpts discussed in some RFCs or found in other recently updated syntaxes. It's still hard to find a "best practice". This is why I opened some RFC issues like #1861 and #1842. But the discussions just proove how hard it is so far.

Maybe we don't even matter about definition/usage distinction in times of evolving language-server-protocols which are superior to ST's indexing capabilities by nature?

Not sure.

wbond · 2019-05-07T18:24:14Z

Maybe we don't even matter about definition/usage distinction in times of evolving language-server-protocols which are superior to ST's indexing capabilities by nature?

In general, we at Sublime HQ do care about this. The user experience for language servers tends to only work for people who are looking to invest significant time into configuring their environment, and even then the performance seems sub-par. Obviously we don't have a built-in LSP host, and there are some rough edges that will need to be polished to make it possible for anyone to write LSP host without some annoying edge cases.

We obviously won't ever get to 100% of the accuracy of a parser for a specific language, but for the most part I believe we can hit the 99% use case for our users.

I foresee the information found via the indexer to continue be useful to build general-purpose features off of, and foresee the .sublime-syntax definitions to only get more powerful over time.

eproxus · 2019-05-08T08:29:10Z

@deathaxe

Thanks for taking the time to help us.

Thanks for replying. Have been wanting to make a new syntax for Erlang for long time now but never had the time. 😄

i. Variables are colored differently when they're arguments:

The formal argumets are scoped as variable.parameter, while the actual arguments are scoped variable.other. This is how most other default syntaxes scope them like in order to distiush definition and usage.

Ah, I see. That would be up to the color scheme to differentiate or not.

iii. Variable are colored differently when they're assigned different values:

This is a good point, we might discuss about. I scoped the Fun as entity.name.function as the documentation calls such statements an "anonymous function declaration". I intended to include them into the Goto Definition feature, but found them to bring too much noise in the end.

I am fine to turn it into an ordinary variable.other.

In Erlang, functions created using fun are anonymous functions. They don't have names. Thus, any concept of goto definition can't be applied to them. Once the function is created and bound to a variable, that's the only handle you have. And it is a dynamic handle, not a static one. Consider the following code:

f() ->
    Var = fun() -> ok end,
    Foo = Var,
    Foo. % What happens when you run goto definition here?

x() ->
    Var = 1,
    Var. % What happens when you run goto definition here?

Including anonymous functions Goto Definition makes no sense, I think. 🙂

Another problem I have with the current scope is that the variable isn't even scoped as a variable. Thus any plug-in or tool that uses scopes to work with variables would break.

Records and fields are colored differently when they're nested:

I am not an Erlang expert, so please excuse the stupid question(s):

According 10.2 of http://erlang.org/doc/reference_manual/records.html #record{field_name=value} describes the createion (local definition?) of a record? Hence the last #nrec0 is the new name of a record with one field named field_name initialized with the value?

Therefore the last #nrec0 is assigned the entity.name.record so far, while the rest in front is understood as existing set of records defining the path?

If this assumption is incorrect and the last #nrec0 can be scoped as normal variable.other.record, things would even get simpler.

I think it is incorrect. Records are only defined via the -record(name, {field}). preprocessor directive. Then they are either created or referred to.

-record(foo, {bar}).

f() ->
    NewFoo = #foo{bar = 1}, % created
    NewFoo#foo.bar. % referred to

Macros are colored differently when they're defined and when they're used:

Basically the same here:

In -ifndef(MACRO) the MACRO is "used". Hence it is scoped constant.other.macro.
In -define(MACRO, "something") MACRO is defined. Hence scoped entity.name.constant.macro.

... to be able to distinguish them.

That's also fine, I think. I understand the difference better now. So for definitions having the scope entity.name.constant.macro.erlang and usage having the scope constant.language.macro.erlang one could still target the scope macro and get both?

Is there any concern that things that are two things only get assigned one thing? 😛 What I mean is, that e.g. record field names in Erlang are both record field names and atoms. But in the current syntax proposal there's no way to see that they are atoms (they only have the scope entity.name.field.erlang).

I also don't see a difference in scope where a record field is defined (-record(foo, {my_field}). and where it is used (#foo{my_field = 1}). Is that an issue?

Another question I have is, did you settle on the syntax in Goto Reference for Erlang specifically or is there a standard here? I mean this:

@
< parse_result(...)
:: my_data(...)
#'rec_name'{...}
:: parse_result(...)

I have a custom modification of the old Erlang plug-in which gives this as reference list:

?HAVE_DEBUG
-export([parse_result/…])
my_data() ::
#'rec_name'{…}
parse_result(…) ->

I found it mapped better to what was in the actual file and easier to read.

@wbond

a built-in LSP host

😍 Would love for that to exist at some point!

This commit applies suggestions from the review process. Including anonymous function definitions into goto definition features makes not much sense on the one hand and brings too much noise on the other. The anonymous function maybe assigned to a variable, but this is a dynamic reference to the function then, which can be redefined at any point. Hence scoping the l-value of an anonymous function definition as `entity.name.function` is removed. It's scoped as ordinary `variable.other` instead.

This commit applies suggestions from the review process. A record is defined only by `-define(record_name, {...})` preprocessor. Everything else can be considered usage. This simplifies the `record` context a little bit.

deathaxe · 2019-05-08T18:40:17Z

Is there any concern that things that are two things only get assigned one thing?

Basically yes and it was actually one reason for the already mensioned RFCs.

Macros for instance: A macro can be used as identifier for a function to replace the actual name. So the question arose, whether to scope it as constant.other.macro or variable.function. How to express that the macro is used in this way?

Example:

    - define(FUNMACRO, theRealFun);

    ?FUNMACRO(X);

    FunVar = ?FUNMACRO;
    FunVar(Y);

Erlang currently solves that issue by scoping it meta.function-call.name constant.other.macro.

But in the current syntax proposal there's no way to see that they are atoms (they only have the scope entity.name.field.erlang).

I actually removed meta.atom.erlang scopes as I was not sure whether it is useful to have them. Basically any "smallest word/identifier" in any language is an atom.

The punctuations for quoted identifiers are actually still scoped as punctuation.definition.atom. Would be easy to re-add the meta.atom scopes.

I also don't see a difference in scope where a record field is defined (-record(foo, {my_field}). and where it is used (#foo{my_field = 1}). Is that an issue?

both braced blocks share the same context at the moment as I interpreted the creation of a record as local definition of it. Hence the #foo{my_field = 1} was interpreted as definition of a record with a new my_field. But as this expression just assignes a different value to the field, we can also change the scopes to normal "usage" as I did with the nested records in the recent commit.

Another question I have is, did you settle on the syntax in Goto Reference for Erlang specifically or is there a standard here?

You mean the prefixes / symbol transformation?

Only the entity.name. ... is to be indexed and therefore is displayed in the goto definition quick panel, but not complete expressions like -export([parse_result/…]). As a result each function in the export list is displayed separately with the prefix <. The character is just a personal choice.
The syntax tries to distinguish the different kinds of objects in the Goto Definition quick panel by prefix to efficiently filter by type and to be able to clearly see what it is. Unfortunatelly the goto definition panel doesn't provide the right handed descriptions to do so.

The current suggestion:

:: type or function specification
< export
> import
# record
Ξ function
Ξ ?macro function   -> -define(?fun(), ...

? macro    -> ooops, is actually missing

This commit adds macro definitions to the symbol list.

As the local creation/copy of records is no longer scoped as definition the logical result is to handle the field access within the creation expression as normal field-write-access instead of a field definition as well. To keep the field definition within preprocessor expressions intact, the `record-body` is now assigned to record definitions only. A copy and modified `record-fields` context is used within record creation expressions. Example: Expr#rec{field=1} ^^^^^ variable.other.field Note: We might probably think about more general scopes for fields and records? Maybe: `variable.other.field` -> `variable.other.member` `variable.other.record` -> `variable.other.struct` or `variable.qualifier`??

eproxus · 2019-05-10T09:43:29Z

Hmm, I think I have a problem with the base types not being assigned first hand and some secondary analyzed "usage" being assigned instead. I think (and I think I think this for all syntax modes) that first and foremost the base tokens should be assigned proper standard contexts (like variable, function, constant, symbol etc.). Then, and only then, can some overlying meaning be assigned to them if and only if it can be properly deduced in all cases.

What in my opinion cannot be properly done on a syntax parsing level is deducing things such as what a variable is going to be used for (as we saw with the fun assignment). In the same way, you could never know what a macro is going to be used for so assigning meaning such as variable.function is just not feasible. Macros in Erlang are parameterized string substations, they can be use for anything really. No syntax parsing could realistically deduce what the programmer intended.

The current suggestion:

:: type or function specification
< export
> import
# record
Ξ function
Ξ ?macro function   -> -define(?fun(), ...

? macro    -> ooops, is actually missing

Is the user intended to type e.g. Ξ to show functions? Depending on keyboard layout, that could become problematic. As I said above, I would prefer entries that look as similar as possible to the code in the file itself. At least, I find that much more logical.

deathaxe · 2019-05-10T15:33:31Z

Hmm, I think I have a problem with the base types not being assigned first hand and some secondary analyzed "usage" being assigned instead.

Not really sure about what you mean. Is it about the scope names?

The scope names comply with the original TextMate scoping guidelines, which are the base for Sublime Text's scope naming guideline. Hence you won't find top-level scopes like function.

What in my opinion cannot be properly done on a syntax parsing level is deducing things such as what a variable is going to be used for

Agree.

you could never know what a macro is going to be used for so assigning meaning such as variable.function is just not feasible

Agree. This is actually the reason for scoping function-calls using macros as identifier as meta.function-call.name constant.other.macro

  ?FUNNAME(arguments)
% ^^^^^^^^ meta.function-call.name constant.other.macro

It was scoped variable.function.macro in the old Erlang syntax, which I find wrong.

The distinction between macro constants and macro functions in the symbol index is handled by the style of definition.

A macro constant is -define(name, "value") while a macro function is -define(name(args), {args})

Is the user intended to type e.g. Ξ to show functions?

Was just looking for a symbol to be used for functions.

I would prefer a solution without any additions to the name, but with a description text like we have it in completions. The description could be defined in the *.tmPreference which is used to define the index.

 ?macro          (Constant)
 func1           (Function)
 func2           (Function)
 ...

I would prefer entries that look as similar as possible to the code in the file itself

Basically aggree.

IMHO encapsulating imports/exports into -import([ ]), -export([]) might cause too much noise?
Instead of using (...) to suffix imports/exports, we could use the arity. < fun_name/4.
The usage of records always starts with #. Why not use it to identifiy records in the symbol list?
Macros start with ? in the code, which they do in the symbol list as well.
We can argue about removing the Ξ from functions though.

deathaxe · 2019-05-10T15:39:07Z

As a side note: You may need to remove the builtin Erlang.sublime-package from ST's install dir as it removes the functions from the symbol list and may cause some quirks with this new syntax.

This commit fixes an issue with clearing too many scopes in the -spec() and -type() preprocessor expressions, which caused the main scope `source.erlang` to be removed by accident.

As anonymous function definitions were removed recently, the corresponding and therefore obsolete symbol list definition can be removed as well.

This commit... 1. removes `entity.name.record` from the references this scope was recently removed from all "usage" use-cases. 2. cleans the indexed symbol list a bit.

This commit adds the `meta.atom.erlang` scope to all atoms. Note: It's handled optional and therefore no tests are added.

This commit fixes an issue with clearing too many scopes in `fun()` expressions.

This commit extends the test cases of `-type` expressions to verify not to clear too many scopes in any situation.

eproxus · 2019-05-10T18:08:00Z

Hmm, I think I have a problem with the base types not being assigned first hand and some secondary analyzed "usage" being assigned instead.

Not really sure about what you mean. Is it about the scope names?

I was mainly referring to the meta.function-call.name scope.

A macro constant is -define(name, "value") while a macro function is -define(name(args), {args})

Macros in Erlang are just macros. There no such thing as a constant (the macro contents is actually inlined everywhere at compile time) and there are no macro "functions". A macro without arguments would be equivalent to ?MACRO() (although the syntax is not valid). Functions have arguments and can be called in runtime, macros are string substitution performed at compile time. A macro doesn't even have to contain valid Erlang syntax.

Yeah, maybe you are right
Agree
Agree
Then it's alright
It's just that it isn't easy to type on a normal keyboard. I think I'd prefer just funcname(…)

eproxus · 2019-06-20T16:52:56Z

I've might have found a possible bug. The last parenthesis is incorrectly highlighted in red, whereas the code compiles without problems:

-define(CALL(Rec, Func, Args),
    erlang:apply(Rec.mod, Func, Args ++ [Rec.mod_state])
). % <-- Error highlighted here

deathaxe · 2019-06-20T18:52:01Z

The culprits are Rec.mod and Rec.mod_state. Erlang docs (http://erlang.org/doc/reference_manual/records.html) dont tell anything about Rec.mod to be valid rexpressions, but as Rec is a macro parameter, I guess it is a special case here?

eproxus · 2019-06-21T06:38:49Z

It might be a tricky edge case. Macros in Erlang can contain invalid syntax, since it's just pure string replacement.

I later use the macro as ?CALL(State#state, func, [Arg1, Arg2]) where the full record expression gets expanded.

wbond · 2019-07-29T19:25:42Z

It seems there has been a lot of concern over making the syntax definition fit 100% (or very close to) with Erlang implementation semantics. While we obviously want to try and do a reasonable job, we don't want perfect to be the enemy of progress.

Sublime syntax definitions aren't ever going to be 100% for a very large number of languages. That said, I do think it is worth adding things we have a pretty high confidence in to the definition. The syntax definitions help the user to understand the code better, and help provide tools to navigate and manipulate the code. Since we aren't a language-specific IDE, we are not going to be able to cover everything, but it would be a shame to toss out things that work 80-90% of the time because there are a few edge cases that don't work. For instance, see the C syntax definition and the preprocessor. 🤦‍♂ There is no way to handle all kinds of things with the preprocessor, however we can use heuristics to do the right thing most of the time, and thus provide fairly sane Goto Definition and symbol lists, etc. It would be a huge shame to just punt on a bunch of that since we can't always get it right.

That's just a high-level comment, not specifically directed at any specific decision, but just from reading over the review and comments so far.

In terms of the state of this, @deathaxe do we need to come up with an alternative way to deal with macros since it is "free-form"?

deathaxe · 2019-07-30T06:34:51Z

The macros in Erlang suffer from exactly the same issues in manner of syntax highlighting as the C/C++ ones - they can contain litterly everything - a token, incomplete expression, ... .

The most critical section is the -define() statement as it is impossible to reliably detect the meaning/content macro parameters. They look like normal variables but can contain any (incomplete) expression.

I guess, it was possible to add some more heuristics to try to catch some things, but this would be the 1% of the already 99% of the syntax definition, I think. (Erlang is a great syntax in manner of highlighting and many other asspects!)

I double checked the syntax definition against Erlang's standard library. Even found some bugs/syntax errors with its help ;-) but I did not see (m)any of such macros. Can't say anything about user projects.

The syntax definition should be robust enough to keep possible syntax highlighting issues limited to the -define() statement where they are most likely. Several bailouts were added to ensure to limit possible issues to an as small as possible junk of code.

Trying to fix all edge cases with heuristics would probably mean to add another bunch of code just for the single -define() statement.

As I already doubled the code size just to support the -spec and -type annotations, I hesitate to do the same for catching macro edge cases as the syntax is otherwise quite complete.

I'd suggest to have a look at such edge cases in case we get related feedback from users.

eproxus · 2019-08-22T13:45:12Z

Just wanted to chime in that I've been using this syntax since I found out about the PR and it's been working really well in my daily work. In my opinion, it's ready to replace the older Erlang syntax!

eproxus · 2019-09-11T10:03:08Z

Found one more oddity:

#{foo => bar}.
#{foo := bar}. % <- 'foo' is colored like a variable here

I would expect atoms to always be colored like atoms in all situations.

This commit fixes an issue reported during the review process, which caused the `:=` key-value separator not to be scoped correctly.

eproxus · 2019-10-08T13:35:07Z

Not sure why it happened, but with build 3211 Goto Symbol (⌘+r) only lists exports and not functions. I've symlinked the Erlang folder from the fork into ~/Library/Application Support/Sublime Text 3/Packages (and tested without the symlink, where I get no symbols at all).

Does the syntax need to be updated somehow?

deathaxe · 2019-10-08T16:19:11Z

You just need to delete the "Erlang.sublime-package" which is shipped with ST. It contains some preferences, which prevent functions from being added to the symbol list.

eproxus · 2019-10-16T08:56:25Z

@deathaxe Found another tiny oddity. The atom erlang is colored differently than other atoms.

deathaxe · 2019-10-16T15:41:09Z

As far as I understood the documentation, the erlang atom has a special meaning as it describes the core namespace of the language. Therefore I decided to always scope it as support.namespace even if it is used as normal atom (not followed by ::).

Therefore in both of the following examples erlang is scoped the same way:

  Var = erlang

  Var = erlang::whatever

This is a special case for the erlang keyword only.

I have no strong opinion about it. If you find it confusing, we can remove it.

wbond · 2019-10-16T19:08:50Z

Thank you very much for all of your work on this @deathaxe and your feedback @eproxus! This will be part of the next dev cycle.

eproxus · 2019-10-17T08:38:10Z

Thanks @deathaxe, stellar work!

eproxus · 2019-10-17T08:41:07Z

@deathaxe It is the "namespace" (or rather, module) from which several built-in functions are imported (such as spawn or monitor etc.). But the atom itself doesn't hold any specific meaning in Erlang. You could for example do [{language, erlang}] and here erlang would be colored differently even though it's just a normal atom.

I found it looked weird when doing roc:call(Node, erlang, spawn, [fun() -> ok end]) because I would have expected the module and function atoms to be the same color.

This commit removes the `variable-erlang` context, which was used to scope all `erlang` tokens as `variable.namespace`. As a result the `erlang` token is scoped as ordinary atom if not followed by an accessor. For the reason of change, please refer to: sublimehq#1878 (comment)

This commit removes the `variable-erlang` context, which was used to scope all `erlang` tokens as `variable.namespace`. As a result the `erlang` token is scoped as ordinary atom if not followed by an accessor. For the reason of change, please refer to: #1878 (comment)

eproxus · 2019-10-28T14:43:17Z

@deathaxe Found another tiny bug 😄

-define(_get_stacktrace_(),
        try exit('$get_stacktrace') catch exit:'$get_stacktrace':Stacktrace -> Stacktrace end).
%                ^ This is colored like a variable
%                                   ^ catch and exit are not highlighted properly

wbond · 2019-10-28T14:44:18Z

@eproxus Since this has now been merged, any bug reports should be created as a new issue. Otherwise they are liable to be lost in the shuffle.

Fixes sublimehq#1878 (comment) This commit improves the quoted identifier pattern as the old one caused the namespace lookahead to be triggered in the wrong place. The following token was matched by the (?={{ident}}\s*:[^:=]) pattern in the namespace context by accident: '$get_stacktrace') catch exit:'$get_stacktrace': The new pattern makes sure to stop matching at the first unescaped single quote. Any number of escaped backslashes is allowed in front of it.

Fixes #1878 (comment) This commit improves the quoted identifier pattern as the old one caused the namespace lookahead to be triggered in the wrong place. The following token was matched by the (?={{ident}}\s*:[^:=]) pattern in the namespace context by accident: '$get_stacktrace') catch exit:'$get_stacktrace': The new pattern makes sure to stop matching at the first unescaped single quote. Any number of escaped backslashes is allowed in front of it.

eproxus · 2019-10-30T16:48:27Z

@wbond Reported #2160

* [Erlang] Rewrite Syntax Fixes sublimehq#1609 Fixes sublimehq#1724 According to sublimehq#481 this PR provides an Oniguruma free Erlang syntax definition written from scratch. Outline ======= The syntax definition is based on the http://erlang.org/doc and includes all sematic features of Erlang/OTP 21.2 which are required for highlighting. It follows https://www.sublimetext.com/docs/3/syntax.html and includes concepts from https://github.com/sublimehq/Packages. Nearly every aspect and detail of the syntax definition was designed from ground up making use of the latest features of ST's syntax engine and with the intent to apply the most recent best practice examples found in other recently updated syntax definitions. Changes ======= * Add support for Erlang ShellScript (escript) * Add support for Erlang Type Language (`-type`, `-spec` directive) * Add lots of detailed test cases * Add some invalid illegal matching where appropriate * Add builtin constants and macros highlighting * Improve the list of builtin functions ([BIFs](http://erlang.org/doc/man/erlang.html#exports)) * Improve indention settings * Improve shebang/editorconfig detection in `first_line_match` * Improve support for _Goto Definition_ and _Goto Reference_ * Improve parsing performance by round about 50% * Fix quoted atom highlighting (function names, module names, field names, ...) * Fix bitstring data type highlighting * Fix string escape highlighting * Fix string placeholders highlighting * Fix identifier break handling (`@` is no word-break) * Fix lots of context pop offs * ... Benchmarks ========== The whole design and development process included benchmarking of different solutions to find the best possible performance for the different sets of rules. The following examples provide an impression of the performance improvements achieved with the new design. **10k binary** Bin = << << (X*2) >> || <<X>> <= << 1,2,3 >> >>. **10k maps** Expr#{name=>"adam",{age,24}:=fct(),4.0=>{july,29}} **10k records** #record{field1="val1", Field2=3, field3, 'Field-4'={}, _=atom} **10k fun types** -type fun() :: fun(() -> int()). **10k spec** -spec is_foo(Pid) -> boolean() when Pid :: pid() | if(); (Pid) -> ok. **ErlangOTP files** https://github.com/erlang/otp/blob/master/lib/wx/include/gl.hrl https://github.com/erlang/otp/blob/master/lib/stdlib/test/re_testoutput1_split_test.erl https://github.com/erlang/otp/blob/master/lib/xmerl/test/xmerl_sax_std_SUITE.erl https://github.com/erlang/otp/blob/master/lib/crypto/test/crypto_SUITE.erl Results ======= Type | before | after | difference -------------------------------|------------|-------------|------------ 10k binaries | 303ms | 140ms | -53.8% 10k maps | 326ms | 156ms | -52.1% 10k records | 399ms | 130ms | -67.4% 10k fun type | n.a. | 79ms | 10k spec | n.a. | 176ms | gl.hrl | 47ms | 24ms | -48.9% re_testoutput1_split_test.erl | 448ms | 305ms | -31.9% xmerl_sax_std_SUITE.erl | 389ms | 127ms | -67.4% crypto_SUITE.erl | 79ms | 27ms | -65.8% * [Erlang] Apply numeric scope guideline This commit applies numeric constant scope names with regards to the updated guideline which is a result of a proposal at sublimehq#1877. * [Erlang] Fix anonymous function definitions This commit applies suggestions from the review process. Including anonymous function definitions into goto definition features makes not much sense on the one hand and brings too much noise on the other. The anonymous function maybe assigned to a variable, but this is a dynamic reference to the function then, which can be redefined at any point. Hence scoping the l-value of an anonymous function definition as `entity.name.function` is removed. It's scoped as ordinary `variable.other` instead. * [Erlang] Fix record scopes This commit applies suggestions from the review process. A record is defined only by `-define(record_name, {...})` preprocessor. Everything else can be considered usage. This simplifies the `record` context a little bit. * [Erlang] Remove anonymous functions from goto definition * [Erlang] Add missing MACRO to symbol list This commit adds macro definitions to the symbol list. * [Erlang] Fix record field scopes As the local creation/copy of records is no longer scoped as definition the logical result is to handle the field access within the creation expression as normal field-write-access instead of a field definition as well. To keep the field definition within preprocessor expressions intact, the `record-body` is now assigned to record definitions only. A copy and modified `record-fields` context is used within record creation expressions. Example: Expr#rec{field=1} ^^^^^ variable.other.field Note: We might probably think about more general scopes for fields and records? Maybe: `variable.other.field` -> `variable.other.member` `variable.other.record` -> `variable.other.struct` or `variable.qualifier`?? * [Erlang] Fix spec/type scopes This commit fixes an issue with clearing too many scopes in the -spec() and -type() preprocessor expressions, which caused the main scope `source.erlang` to be removed by accident. * [Erlang] Remove obsolete symbol list override As anonymous function definitions were removed recently, the corresponding and therefore obsolete symbol list definition can be removed as well. * [Erlang] Update indexed symbol and reference lists This commit... 1. removes `entity.name.record` from the references this scope was recently removed from all "usage" use-cases. 2. cleans the indexed symbol list a bit. * [Erlang] Add meta.atom.erlang This commit adds the `meta.atom.erlang` scope to all atoms. Note: It's handled optional and therefore no tests are added. * [Erlang] Fix fun scopes This commit fixes an issue with clearing too many scopes in `fun()` expressions. * [Erlang] Update type test cases This commit extends the test cases of `-type` expressions to verify not to clear too many scopes in any situation. * [Erlang] Fix macro in indexed symbol list * [Erlang] Tweak symbol list This commit applies review suggestions to the symbol list. 1. As all definitions are made in the preprocessor statements, the corresponding keywords are used as prefix for the symbol list item labels. < -> -export > -> -import # -> -record :: -> -spec, -type Ξ -> removed ... 2. Merges the "Function Macro" list into "Macro". 3. Fixes an issue with exports not being listed. 4. Fixes an issue with function definitions not being listed, which use macros as name (e.g.: `?macro(args) -> ok.`). * [Erlang] Fix some emergency bailouts This commit adds (some) missing emergency bailouts to pop off from scopes not after a clause's end. Effects: - function parameters - function call arguments - type preprocessor statements - illegal parens/brackets/braces * [Erlang] Fix more emergency bailouts This commit adds (some) missing emergency bailouts to pop off from scopes not after a clause's end. Effects: - groups - lists - maps - record fields - tuples It also adds the `illegal-stray` context to scope the last closing parentheses/bracket/brace `invalid.illegal` in order to indicate an syntax error before the end of the clause. The idea behind is any nested unclosed context to with the `.`. * [Erlang] Add bailout from hex numbers This commit adds a bailout to pop off from a hexadecimal number context not after a clause's end. * [Erlang] Add bailout from binary strings This commit adds a bailout to pop off from a binary string context not after a clause's end. * [Erlang] Remove repetitions This commit creates an `clause-end-or-stray` context to avoid including the two `clause-end-pop` and `illegal-stray` one after each other repetitively. * [Erlang] Add bailout from function parameters This commit makes sure to pop off from parameter lists of function/spec/type definitions even with unbalanced parentheses/... . * [Erlang] Add ... in list typespecs The `...` is used in Erlang's typespec language to indicate an arbitrary number of list items, parameters or tuple items. This commit makes sure not to interfere with emergency bailouts. The variable is used in ErlangOTP\erts\preloaded\src\erlang.erl * [Erlang] Remove meta.function from macro definitions Even though macro definitions may look like function definitions, they are not related with them in any manner. The parameter list is just used to create multi-purpose macros. This commit therefore removes the `meta.function` scopes from expressions: -define(MACRO_NAME(ARG1,ARG2) , ...). * [Erlang] Rename _ scope The `_` variable is called "anonymous variable" by Erlang. Hence the scope is renamed accordingly. * [Erlang] Add variable-anonymous-pop context This commit is to reduce duplicated matches. * [Erlang] Add variable-other-pop context This commit is to reduce duplicated matches. * [Erlang] Reorganize function-call context This commit splits the function-call context into smaller pieces and adds a `function-call-pop` for later use. * [Erlang] Reorganize type-call context This commit splits the type-call context into smaller pieces and adds a `type-call-pop` for later use. The identifier context is moved to the other "atom" contexts. * [Erlang] Reorganize type-fun context This commit splits the type-fun context into smaller pieces and adds a `type-fun-pop` for later use. * [Erlang] Merge fun-type into type-call context Basically the fun-type context contains a special type of type-call. Hence those contexts are merged. This does not change the behavior in any way. * [Erlang] Add meta.path This commit applies the scope naming guideline which says: Complete identifiers, including namespace names, should use the following scope. Such identifiers are the fully-qualified forms of variable, function and class names. * [Erlang] Fix fun call identifier The `variable.function` scope is generally used to scope an function identifier in all default syntaxes. A normal function identifier in Erlang is an atom. Means a normal function name must start with lower case or underscore. my_func_name() On the other hand, Erlang supports the definition of closures via fun keyword, which binds a "function" to a real variable. // define the function MyLocalFunc = fun() ... end, // call the funciton MyLocalFunc(), Scoping both, an ordinary function identifier and a closure as `variable.function` is therefore confusing. As we already support constants or macros to be used as identifiers %MY_FUNC_MACRO() it is logical to scope `MyLocalFunc` as `variable.other` because it is a real variable, which holds a function. * [Erlang] Fix fun definition identifier This commit applies the philosophy of the previous commit to `fun` definitions and renames the `entity.name.function` to `variable.other` for local fun identifiers. Fun1 = fun FunName(X) -> X+ FunName(X) end. ^ rename entity -> variable * [Erlang] Fix fun definition identifier part 2 A local fun identifier really needs to be a variable. Invalid: `fun funcName() -> ... end.` Valid: `fun FuncName() -> ... end.` * [Erlang] Fix record field access in macros * [Erlang] Fix mapping separator This commit fixes an issue reported during the review process, which caused the `:=` key-value separator not to be scoped correctly.

This commit removes the `variable-erlang` context, which was used to scope all `erlang` tokens as `variable.namespace`. As a result the `erlang` token is scoped as ordinary atom if not followed by an accessor. For the reason of change, please refer to: sublimehq#1878 (comment)

Fixes sublimehq#1878 (comment) This commit improves the quoted identifier pattern as the old one caused the namespace lookahead to be triggered in the wrong place. The following token was matched by the (?={{ident}}\s*:[^:=]) pattern in the namespace context by accident: '$get_stacktrace') catch exit:'$get_stacktrace': The new pattern makes sure to stop matching at the first unescaped single quote. Any number of escaped backslashes is allowed in front of it.

[Erlang] Apply numeric scope guideline

32bf6a5

This commit applies numeric constant scope names with regards to the updated guideline which is a result of a proposal at sublimehq#1877.

DeathAxe added 2 commits May 8, 2019 18:59

[Erlang] Fix record scopes

db71c01

This commit applies suggestions from the review process. A record is defined only by `-define(record_name, {...})` preprocessor. Everything else can be considered usage. This simplifies the `record` context a little bit.

DeathAxe added 3 commits May 8, 2019 20:42

[Erlang] Remove anonymous functions from goto definition

cea9bb0

[Erlang] Add missing MACRO to symbol list

e72bf19

This commit adds macro definitions to the symbol list.

DeathAxe added 6 commits May 10, 2019 18:24

[Erlang] Fix spec/type scopes

4da3e6e

This commit fixes an issue with clearing too many scopes in the -spec() and -type() preprocessor expressions, which caused the main scope `source.erlang` to be removed by accident.

[Erlang] Remove obsolete symbol list override

de308bb

As anonymous function definitions were removed recently, the corresponding and therefore obsolete symbol list definition can be removed as well.

[Erlang] Update indexed symbol and reference lists

71b7311

This commit... 1. removes `entity.name.record` from the references this scope was recently removed from all "usage" use-cases. 2. cleans the indexed symbol list a bit.

[Erlang] Add meta.atom.erlang

7417a13

This commit adds the `meta.atom.erlang` scope to all atoms. Note: It's handled optional and therefore no tests are added.

[Erlang] Fix fun scopes

7b82a4a

This commit fixes an issue with clearing too many scopes in `fun()` expressions.

[Erlang] Update type test cases

659db3e

This commit extends the test cases of `-type` expressions to verify not to clear too many scopes in any situation.

[Erlang] Fix record field access in macros

63a3a9c

[Erlang] Fix mapping separator

207f357

This commit fixes an issue reported during the review process, which caused the `:=` key-value separator not to be scoped correctly.

wbond merged commit 7068a80 into sublimehq:master Oct 16, 2019

deathaxe mentioned this pull request Oct 17, 2019

[Erlang] Remove special scoping of erlang atoms #2129

Merged

deathaxe deleted the pr/erlang-rewrite branch October 17, 2019 15:30

deathaxe mentioned this pull request Oct 28, 2019

[Erlang] Fix quoted identifier lookahead #2157

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Erlang] Rewrite Syntax #1878

[Erlang] Rewrite Syntax #1878

deathaxe commented Feb 10, 2019 •

edited

Loading

deathaxe commented Feb 10, 2019

wbond commented Feb 10, 2019

mroach commented Feb 11, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

FichteFoll commented May 7, 2019 •

edited

Loading

eproxus commented May 7, 2019

deathaxe commented May 7, 2019

wbond commented May 7, 2019

eproxus commented May 8, 2019 •

edited

Loading

deathaxe commented May 8, 2019

eproxus commented May 10, 2019 •

edited

Loading

deathaxe commented May 10, 2019 •

edited

Loading

deathaxe commented May 10, 2019 •

edited

Loading

eproxus commented May 10, 2019

eproxus commented Jun 20, 2019

deathaxe commented Jun 20, 2019

eproxus commented Jun 21, 2019

wbond commented Jul 29, 2019

deathaxe commented Jul 30, 2019

eproxus commented Aug 22, 2019

eproxus commented Sep 11, 2019

eproxus commented Oct 8, 2019 •

edited

Loading

deathaxe commented Oct 8, 2019

eproxus commented Oct 16, 2019

deathaxe commented Oct 16, 2019 •

edited

Loading

wbond commented Oct 16, 2019

eproxus commented Oct 17, 2019

eproxus commented Oct 17, 2019

eproxus commented Oct 28, 2019

wbond commented Oct 28, 2019

eproxus commented Oct 30, 2019

[Erlang] Rewrite Syntax #1878

[Erlang] Rewrite Syntax #1878

Conversation

deathaxe commented Feb 10, 2019 • edited Loading

General Notes

Changes

Code Examples

Preprocessor

Functions

Binaries

Placeholders

Records

Screenshots

ST 3189

New Syntax

Benchmarks

Results

deathaxe commented Feb 10, 2019

wbond commented Feb 10, 2019

mroach commented Feb 11, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

eproxus commented May 7, 2019

FichteFoll commented May 7, 2019 • edited Loading

eproxus commented May 7, 2019

deathaxe commented May 7, 2019

wbond commented May 7, 2019

eproxus commented May 8, 2019 • edited Loading

deathaxe commented May 8, 2019

eproxus commented May 10, 2019 • edited Loading

deathaxe commented May 10, 2019 • edited Loading

deathaxe commented May 10, 2019 • edited Loading

eproxus commented May 10, 2019

eproxus commented Jun 20, 2019

deathaxe commented Jun 20, 2019

eproxus commented Jun 21, 2019

wbond commented Jul 29, 2019

deathaxe commented Jul 30, 2019

eproxus commented Aug 22, 2019

eproxus commented Sep 11, 2019

eproxus commented Oct 8, 2019 • edited Loading

deathaxe commented Oct 8, 2019

eproxus commented Oct 16, 2019

deathaxe commented Oct 16, 2019 • edited Loading

wbond commented Oct 16, 2019

eproxus commented Oct 17, 2019

eproxus commented Oct 17, 2019

eproxus commented Oct 28, 2019

wbond commented Oct 28, 2019

eproxus commented Oct 30, 2019

deathaxe commented Feb 10, 2019 •

edited

Loading

FichteFoll commented May 7, 2019 •

edited

Loading

eproxus commented May 8, 2019 •

edited

Loading

eproxus commented May 10, 2019 •

edited

Loading

deathaxe commented May 10, 2019 •

edited

Loading

deathaxe commented May 10, 2019 •

edited

Loading

eproxus commented Oct 8, 2019 •

edited

Loading

deathaxe commented Oct 16, 2019 •

edited

Loading