Skip to content

Commit

Permalink
Allow unicode flag if all RegExps use it
Browse files Browse the repository at this point in the history
Closes #116.
  • Loading branch information
tjvr committed Feb 25, 2019
1 parent 6d6bfa4 commit 02c064f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 7 deletions.
19 changes: 17 additions & 2 deletions moo.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
if (obj.global) throw new Error('RegExp /g flag is implied')
if (obj.sticky) throw new Error('RegExp /y flag is implied')
if (obj.multiline) throw new Error('RegExp /m flag is implied')
if (obj.unicode) throw new Error('RegExp /u flag is not allowed')
return obj.source

} else {
Expand Down Expand Up @@ -154,6 +153,7 @@
var errorRule = null
var fast = Object.create(null)
var fastAllowed = true
var unicodeFlag = null
var groups = []
var parts = []

Expand Down Expand Up @@ -210,6 +210,20 @@

groups.push(options)

// Check unicode flag is used everywhere or nowhere
for (var j = 0; j < match.length; j++) {
var obj = match[j]
if (!isRegExp(obj)) {
continue
}

if (unicodeFlag === null) {
unicodeFlag = obj.unicode
} else if (unicodeFlag !== obj.unicode) {
throw new Error("If one rule is /u then all must be")
}
}

// convert to RegExp
var pat = reUnion(match.map(regexpOrLiteral))

Expand Down Expand Up @@ -241,8 +255,9 @@
var fallbackRule = errorRule && errorRule.fallback
var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
var suffix = hasSticky || fallbackRule ? '' : '|'
var combined = new RegExp(reUnion(parts) + suffix, flags)

if (unicodeFlag === true) flags += "u"
var combined = new RegExp(reUnion(parts) + suffix, flags)
return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
}

Expand Down
35 changes: 30 additions & 5 deletions test/test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

const fs = require('fs')
const vm = require('vm')

Expand Down Expand Up @@ -29,17 +28,14 @@ describe('compiler', () => {
expect(lex4.next()).toMatchObject({type: 'err', text: 'nope!'})
})

test("warns for /g, /y, /i, /m, /u", () => {
test("warns for /g, /y, /i, /m", () => {
expect(() => compile({ word: /foo/ })).not.toThrow()
expect(() => compile({ word: /foo/g })).toThrow('implied')
expect(() => compile({ word: /foo/i })).toThrow('not allowed')
expect(() => compile({ word: /foo/y })).toThrow('implied')
expect(() => compile({ word: /foo/m })).toThrow('implied')
expect(() => compile({ word: /foo/u })).toThrow('not allowed')
})

// TODO warns if no lineBreaks: true

test('warns about missing states', () => {
const rules = [
{match: '=', next: 'missing'},
Expand Down Expand Up @@ -1186,3 +1182,32 @@ describe('include', () => {
])
})
})


describe("unicode flag", () => {

test("allows all rules to be /u", () => {
expect(() => compile({ a: /foo/u, b: /bar/u, c: "quxx" })).not.toThrow()
expect(() => compile({ a: /foo/u, b: /bar/, c: "quxx" })).toThrow("If one rule is /u then all must be")
expect(() => compile({ a: /foo/, b: /bar/u, c: "quxx" })).toThrow("If one rule is /u then all must be")
})

test("supports unicode", () => {
const lexer = compile({
a: /[𝌆]/u,
})
lexer.reset("𝌆")
expect(lexer.next()).toMatchObject({value: "𝌆"})
lexer.reset("𝌆".charCodeAt(0))
expect(() => lexer.next()).toThrow()

const lexer2 = compile({
a: /\u{1D356}/u,
})
lexer2.reset("𝍖")
expect(lexer2.next()).toMatchObject({value: "𝍖"})
lexer2.reset("\\u{1D356}")
expect(() => lexer2.next()).toThrow()
})

})

0 comments on commit 02c064f

Please sign in to comment.