Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex literal syntax #6776

Merged
merged 7 commits into from
Jul 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- Throws an instance of JavaScript's `new Error()` and adds the extension payload for `cause` option. https://github.com/rescript-lang/rescript-compiler/pull/6611
- Allow free vars in types for type coercion `e :> t`. https://github.com/rescript-lang/rescript-compiler/pull/6828
- Allow `private` in with constraints. https://github.com/rescript-lang/rescript-compiler/pull/6843
- Add regex literals as syntax sugar for `@bs.re`. https://github.com/rescript-lang/rescript-compiler/pull/6776

#### :boom: Breaking Change

Expand Down
6 changes: 3 additions & 3 deletions jscomp/others/js_re.res
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ external input: result => string = "input"

/**
Constructs a RegExp object (Js.Re.t) from a `string`.
Regex literals `%re("/.../")` should generally be preferred, but `fromString`
Regex literals `/.../` should generally be preferred, but `fromString`
is useful when you need to dynamically construct a regex using strings,
exactly like when you do so in JavaScript.

Expand Down Expand Up @@ -112,7 +112,7 @@ set.
## Examples

```rescript
let re = %re("/ab*TODO/g")
let re = /ab*TODO/g
let str = "abbcdefabh"

let break = ref(false)
Expand Down Expand Up @@ -166,7 +166,7 @@ Returns `Some(Js.Re.result)` if a match is found, `None` otherwise.
* Ignore case
*/

let re = %re("/quick\s(brown).+?(jumps)/ig")
let re = /quick\s(brown).+?(jumps)/ig
let result = Js.Re.exec_(re, "The Quick Brown Fox Jumps Over The Lazy Dog")
```

Expand Down
22 changes: 11 additions & 11 deletions jscomp/others/js_string.res
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,11 @@ on MDN.
## Examples

```rescript
Js.String.match_(%re("/b[aeiou]t/"), "The better bats") == Some(["bet"])
Js.String.match_(%re("/b[aeiou]t/g"), "The better bats") == Some(["bet", "bat"])
Js.String.match_(%re("/(\d+)-(\d+)-(\d+)/"), "Today is 2018-04-05.") ==
Js.String.match_(/b[aeiou]t/, "The better bats") == Some(["bet"])
Js.String.match_(/b[aeiou]t/g, "The better bats") == Some(["bet", "bat"])
Js.String.match_(/(\d+)-(\d+)-(\d+)/, "Today is 2018-04-05.") ==
Some(["2018-04-05", "2018", "04", "05"])
Js.String.match_(%re("/b[aeiou]g/"), "The large container.") == None
Js.String.match_(/b[aeiou]g/, "The large container.") == None
```
*/
external match_: Js_re.t => option<array<option<t>>> = "match"
Expand Down Expand Up @@ -514,8 +514,8 @@ on MDN.
## Examples

```rescript
Js.String.replaceByRe(%re("/[aeiou]/g"), "x", "vowels be gone") == "vxwxls bx gxnx"
Js.String.replaceByRe(%re("/(\w+) (\w+)/"), "$2, $1", "Juan Fulano") == "Fulano, Juan"
Js.String.replaceByRe(/[aeiou]/g, "x", "vowels be gone") == "vxwxls bx gxnx"
Js.String.replaceByRe(/(\w+) (\w+)/, "$2, $1", "Juan Fulano") == "Fulano, Juan"
```
*/
external replaceByRe: (Js_re.t, t) => t = "replace"
Expand All @@ -534,7 +534,7 @@ on MDN.

```rescript
let str = "beautiful vowels"
let re = %re("/[aeiou]/g")
let re = /[aeiou]/g
let matchFn = (matchPart, _offset, _wholeString) => Js.String.toUpperCase(matchPart)

Js.String.unsafeReplaceBy0(re, matchFn, str) == "bEAUtIfUl vOwEls"
Expand All @@ -557,7 +557,7 @@ on MDN.

```rescript
let str = "Jony is 40"
let re = %re("/(Jony is )\d+/g")
let re = /(Jony is )\d+/g
let matchFn = (_match, part1, _offset, _wholeString) => {
part1 ++ "41"
}
Expand All @@ -582,7 +582,7 @@ on MDN.

```rescript
let str = "7 times 6"
let re = %re("/(\d+) times (\d+)/")
let re = /(\d+) times (\d+)/
let matchFn = (_match, p1, p2, _offset, _wholeString) => {
switch (Belt.Int.fromString(p1), Belt.Int.fromString(p2)) {
| (Some(x), Some(y)) => Belt.Int.toString(x * y)
Expand Down Expand Up @@ -619,8 +619,8 @@ on MDN.
## Examples

```rescript
Js.String.search(%re("/\d+/"), "testing 1 2 3") == 8
Js.String.search(%re("/\d+/"), "no numbers") == -1
Js.String.search(/\d+/, "testing 1 2 3") == 8
Js.String.search(/\d+/, "no numbers") == -1
```
*/
external search: Js_re.t => int = "search"
Expand Down
30 changes: 15 additions & 15 deletions jscomp/others/js_string2.res
Original file line number Diff line number Diff line change
Expand Up @@ -432,11 +432,11 @@ on MDN.
## Examples

```rescript
Js.String2.match_("The better bats", %re("/b[aeiou]t/")) == Some(["bet"])
Js.String2.match_("The better bats", %re("/b[aeiou]t/g")) == Some(["bet", "bat"])
Js.String2.match_("Today is 2018-04-05.", %re("/(\d+)-(\d+)-(\d+)/")) ==
Js.String2.match_("The better bats", /b[aeiou]t/) == Some(["bet"])
Js.String2.match_("The better bats", /b[aeiou]t/g) == Some(["bet", "bat"])
Js.String2.match_("Today is 2018-04-05.", /(\d+)-(\d+)-(\d+)/) ==
Some(["2018-04-05", "2018", "04", "05"])
Js.String2.match_("The large container.", %re("/b[aeiou]g/")) == None
Js.String2.match_("The large container.", /b[aeiou]g/) == None
```
*/
external match_: (t, Js_re.t) => option<array<option<t>>> = "match"
Expand Down Expand Up @@ -516,8 +516,8 @@ on MDN.
## Examples

```rescript
Js.String2.replaceByRe("vowels be gone", %re("/[aeiou]/g"), "x") == "vxwxls bx gxnx"
Js.String2.replaceByRe("Juan Fulano", %re("/(\w+) (\w+)/"), "$2, $1") == "Fulano, Juan"
Js.String2.replaceByRe("vowels be gone", /[aeiou]/g, "x") == "vxwxls bx gxnx"
Js.String2.replaceByRe("Juan Fulano", /(\w+) (\w+)/, "$2, $1") == "Fulano, Juan"
```
*/
external replaceByRe: (t, Js_re.t, t) => t = "replace"
Expand All @@ -536,7 +536,7 @@ on MDN.

```rescript
let str = "beautiful vowels"
let re = %re("/[aeiou]/g")
let re = /[aeiou]/g
let matchFn = (matchPart, _offset, _wholeString) => Js.String2.toUpperCase(matchPart)

Js.String2.unsafeReplaceBy0(str, re, matchFn) == "bEAUtIfUl vOwEls"
Expand All @@ -559,7 +559,7 @@ on MDN.

```rescript
let str = "Jony is 40"
let re = %re("/(Jony is )\d+/g")
let re = /(Jony is )\d+/g
let matchFn = (_match, part1, _offset, _wholeString) => {
part1 ++ "41"
}
Expand All @@ -584,7 +584,7 @@ on MDN.

```rescript
let str = "7 times 6"
let re = %re("/(\d+) times (\d+)/")
let re = /(\d+) times (\d+)/
let matchFn = (_match, p1, p2, _offset, _wholeString) => {
switch (Belt.Int.fromString(p1), Belt.Int.fromString(p2)) {
| (Some(x), Some(y)) => Belt.Int.toString(x * y)
Expand Down Expand Up @@ -621,8 +621,8 @@ on MDN.
## Examples

```rescript
Js.String2.search("testing 1 2 3", %re("/\d+/")) == 8
Js.String2.search("no numbers", %re("/\d+/")) == -1
Js.String2.search("testing 1 2 3", /\d+/) == 8
Js.String2.search("no numbers", /\d+/) == -1
```
*/
external search: (t, Js_re.t) => int = "search"
Expand Down Expand Up @@ -709,7 +709,7 @@ on MDN.
## Examples

```rescript
Js.String2.splitByRe("art; bed , cog ;dad", %re("/\s*[,;]\s*TODO/")) == [
Js.String2.splitByRe("art; bed , cog ;dad", /\s*[,;]\s*TODO/) == [
Some("art"),
Some("bed"),
Some("cog"),
Expand All @@ -732,15 +732,15 @@ on MDN.
## Examples

```rescript
Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=3) == [
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=3) == [
Some("one"),
Some("two"),
Some("three"),
]

Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=0) == []
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=0) == []

Js.String2.splitByReAtMost("one: two: three: four", %re("/\s*:\s*TODO/"), ~limit=8) == [
Js.String2.splitByReAtMost("one: two: three: four", /\s*:\s*TODO/, ~limit=8) == [
Some("one"),
Some("two"),
Some("three"),
Expand Down
28 changes: 28 additions & 0 deletions jscomp/syntax/src/res_core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1891,6 +1891,22 @@ and parse_constrained_expr_region p =
| _ -> Some expr)
| _ -> None

and parse_regex p pattern flags =
let start_pos = p.Parser.start_pos in
Parser.next p;
let loc = mk_loc start_pos p.prev_end_pos in
let payload =
Parsetree.PStr
[
Ast_helper.Str.eval ~loc
(Ast_helper.Exp.constant ~loc
(Pconst_string
( "/" ^ pattern ^ "/" ^ flags,
if p.mode = ParseForTypeChecker then Some "js" else None )));
]
in
Ast_helper.Exp.extension (Location.mknoloc "re", payload)

(* Atomic expressions represent unambiguous expressions.
* This means that regardless of the context, these expressions
* are always interpreted correctly. *)
Expand Down Expand Up @@ -1960,6 +1976,18 @@ and parse_atomic_expr p =
Parser.err ~start_pos:p.prev_end_pos p
(Diagnostics.unexpected p.Parser.token p.breadcrumbs);
Recover.default_expr ()
| Forwardslash -> (
Parser.next_regex_token p;
match p.token with
| Regex (pattern, flags) -> parse_regex p pattern flags
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
)
| ForwardslashDot -> (
Parser.next_regex_token p;
match p.token with
| Regex (pattern, flags) -> parse_regex p ("." ^ pattern) flags
| _ -> Ast_helper.Exp.extension (Location.mknoloc "re", Parsetree.PStr [])
)
| token -> (
let err_pos = p.prev_end_pos in
Parser.err ~start_pos:err_pos p
Expand Down
12 changes: 6 additions & 6 deletions jscomp/syntax/src/res_grammar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ let is_atomic_pattern_start = function
let is_atomic_expr_start = function
| Token.True | False | Int _ | String _ | Float _ | Codepoint _ | Backtick
| Uident _ | Lident _ | Hash | Lparen | List | Lbracket | Lbrace | LessThan
| Module | Percent ->
| Module | Percent | Forwardslash | ForwardslashDot ->
true
| _ -> false

Expand All @@ -151,7 +151,7 @@ let is_expr_start = function
| For | Hash | If | Int _ | Lbrace | Lbracket | LessThan | Lident _ | List
| Lparen | Minus | MinusDot | Module | Percent | Plus | PlusDot | String _
| Switch | True | Try | Uident _ | Underscore (* _ => doThings() *)
| While ->
| While | Forwardslash | ForwardslashDot ->
true
| _ -> false

Expand Down Expand Up @@ -257,10 +257,10 @@ let is_jsx_child_start = is_atomic_expr_start

let is_block_expr_start = function
| Token.Assert | At | Await | Backtick | Bang | Codepoint _ | Exception
| False | Float _ | For | Forwardslash | Hash | If | Int _ | Lbrace | Lbracket
| LessThan | Let | Lident _ | List | Lparen | Minus | MinusDot | Module | Open
| Percent | Plus | PlusDot | String _ | Switch | True | Try | Uident _
| Underscore | While ->
| False | Float _ | For | Forwardslash | ForwardslashDot | Hash | If | Int _
| Lbrace | Lbracket | LessThan | Let | Lident _ | List | Lparen | Minus
| MinusDot | Module | Open | Percent | Plus | PlusDot | String _ | Switch
| True | Try | Uident _ | Underscore | While ->
true
| _ -> false

Expand Down
7 changes: 7 additions & 0 deletions jscomp/syntax/src/res_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ let next_template_literal_token p =
p.start_pos <- start_pos;
p.end_pos <- end_pos

let next_regex_token p =
let start_pos, end_pos, token = Scanner.scan_regex p.scanner in
p.token <- token;
p.prev_end_pos <- p.end_pos;
p.start_pos <- start_pos;
p.end_pos <- end_pos

let check_progress ~prev_end_pos ~result p =
if p.end_pos == prev_end_pos then None else Some result

Expand Down
1 change: 1 addition & 0 deletions jscomp/syntax/src/res_parser.mli
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ val optional : t -> Token.t -> bool
val next : ?prev_end_pos:Lexing.position -> t -> unit
val next_unsafe : t -> unit (* Does not assert on Eof, makes no progress *)
val next_template_literal_token : t -> unit
val next_regex_token : t -> unit
val lookahead : t -> (t -> 'a) -> 'a
val err :
?start_pos:Lexing.position ->
Expand Down
10 changes: 10 additions & 0 deletions jscomp/syntax/src/res_printer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3075,6 +3075,16 @@ and print_expression ~state (e : Parsetree.expression) cmt_tbl =
Doc.soft_line;
Doc.rbrace;
])
| ( {txt = "re"},
PStr
[
{
pstr_desc =
Pstr_eval
({pexp_desc = Pexp_constant (Pconst_string (expr, _))}, []);
};
] ) ->
Doc.text expr
| extension ->
print_extension ~state ~at_module_lvl:false extension cmt_tbl)
| Pexp_apply (e, [(Nolabel, {pexp_desc = Pexp_array sub_lists})])
Expand Down
58 changes: 58 additions & 0 deletions jscomp/syntax/src/res_scanner.ml
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,64 @@ let scan_escape scanner =
(* TODO: do we know it's \' ? *)
Token.Codepoint {c = codepoint; original = contents}

let scan_regex scanner =
let start_pos = position scanner in
let buf = Buffer.create 0 in
let first_char_offset = scanner.offset in
let last_offset_in_buf = ref first_char_offset in

let bring_buf_up_to_date ~start_offset =
let str_up_to_now =
(String.sub scanner.src !last_offset_in_buf
(start_offset - !last_offset_in_buf) [@doesNotRaise])
in
Buffer.add_string buf str_up_to_now;
last_offset_in_buf := start_offset
in

let result ~first_char_offset ~last_char_offset =
if Buffer.length buf = 0 then
(String.sub [@doesNotRaise]) scanner.src first_char_offset
(last_char_offset - first_char_offset)
else (
bring_buf_up_to_date ~start_offset:last_char_offset;
Buffer.contents buf)
in
let rec scan () =
match scanner.ch with
| '/' ->
let last_char_offset = scanner.offset in
next scanner;
let pattern = result ~first_char_offset ~last_char_offset in
let flags =
let flags_buf = Buffer.create 0 in
let rec scan_flags () =
match scanner.ch with
| 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' ->
Buffer.add_char flags_buf scanner.ch;
next scanner;
scan_flags ()
| _ -> Buffer.contents flags_buf
in
scan_flags ()
in
(pattern, flags)
| ch when ch == '\n' || ch == hacky_eof_char ->
let end_pos = position scanner in
scanner.err ~start_pos ~end_pos (Diagnostics.message "unterminated regex");
("", "")
| '\\' ->
next scanner;
next scanner;
scan ()
| _ ->
next scanner;
scan ()
in
let pattern, flags = scan () in
let end_pos = position scanner in
(start_pos, end_pos, Token.Regex (pattern, flags))

let scan_single_line_comment scanner =
let start_off = scanner.offset in
let start_pos = position scanner in
Expand Down
2 changes: 2 additions & 0 deletions jscomp/syntax/src/res_scanner.mli
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ val reconsider_less_than : t -> Res_token.t

val scan_template_literal_token :
t -> Lexing.position * Lexing.position * Res_token.t

val scan_regex : t -> Lexing.position * Lexing.position * Res_token.t
2 changes: 2 additions & 0 deletions jscomp/syntax/src/res_token.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type t =
| Backslash [@live]
| Forwardslash
| ForwardslashDot
| Regex of string * string
| Asterisk
| AsteriskDot
| Exponentiation
Expand Down Expand Up @@ -153,6 +154,7 @@ let to_string = function
| PlusPlus -> "++"
| PlusEqual -> "+="
| Backslash -> "\\"
| Regex (pattern, flags) -> "regex: /" ^ pattern ^ "/" ^ flags
| Forwardslash -> "/"
| ForwardslashDot -> "/."
| Exception -> "exception"
Expand Down
Loading