From 45d3ea361ec59e09dec248e81f4447d08b7c987b Mon Sep 17 00:00:00 2001 From: satyr Date: Wed, 23 Oct 2013 16:17:09 +0900 Subject: [PATCH] heregexes now keep escaped whitespaces (jashkenas/coffee-script#3059) --- lib/lexer.js | 15 +++++++++++---- src/lexer.co | 10 +++++++--- test/regex.co | 9 +++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/lib/lexer.js b/lib/lexer.js index 411ae9ae8..3e4bdbbe9 100644 --- a/lib/lexer.js +++ b/lib/lexer.js @@ -1,4 +1,4 @@ -var string, TABS, unlines, enlines, enslash, reslash, camelize, character, KEYWORDS_SHARED, KEYWORDS_UNUSED, KEYWORDS, ID, SYMBOL, SPACE, MULTIDENT, SIMPLESTR, BSTOKEN, JSTOKEN, NUMBER, NUMBER_OMIT, REGEX, HEREGEX_OMIT, LASTDENT, INLINEDENT, NONASCII, OPENERS, CLOSERS, INVERSES, CHAIN, ARG, BLOCK_USERS, slice$ = [].slice; +var string, TABS, unlines, enlines, enslash, reslash, camelize, deheregex, character, KEYWORDS_SHARED, KEYWORDS_UNUSED, KEYWORDS, ID, SYMBOL, SPACE, MULTIDENT, SIMPLESTR, BSTOKEN, JSTOKEN, NUMBER, NUMBER_OMIT, REGEX, LASTDENT, INLINEDENT, NONASCII, OPENERS, CLOSERS, INVERSES, CHAIN, ARG, BLOCK_USERS, slice$ = [].slice; exports.lex = function(code, options){ return (clone$(exports)).tokenize(code || '', options || {}); }; @@ -424,7 +424,7 @@ exports.doHeregex = function(code, index){ if (t[0] === 'TOKENS') { tokens.push.apply(tokens, t[1]); } else { - val = t[1].replace(HEREGEX_OMIT, ''); + val = deheregex(t[1]); if (one && !val) { continue; } @@ -443,7 +443,7 @@ exports.doHeregex = function(code, index){ } this.token(flag === '$' ? ')' : ')CALL', ''); } else { - this.regex(reslash(parts[0][1].replace(HEREGEX_OMIT, '')), flag); + this.regex(reslash(deheregex(parts[0][1])), flag); } return delim.length + parts.size + flag.length; }; @@ -1064,6 +1064,14 @@ reslash = replacer(/(\\.)|\//g, function(){ camelize = replacer(/-[a-z]/ig, function(it){ return it.charAt(1).toUpperCase(); }); +deheregex = replacer(/\s+(?:#.*)?|(\\[\s\S])/g, function(arg$, bs){ + bs || (bs = ''); + if ('\n' === bs.charAt(1)) { + return '\\n'; + } else { + return bs; + } +}); function lchomp(it){ return it.slice(1 + it.lastIndexOf('\n', 0)); } @@ -1506,7 +1514,6 @@ JSTOKEN = /(`+)([^`][\s\S]*?)\1|/g; NUMBER = /0x[\dA-Fa-f][\dA-Fa-f_]*|([2-9]|[12]\d|3[0-6])r([\dA-Za-z]\w*)|((\d[\d_]*)(\.\d[\d_]*)?(?:e[+-]?\d[\d_]*)?)[$\w]*|/g; NUMBER_OMIT = /_+/g; REGEX = /\/([^[\/\n\\]*(?:(?:\\.|\[[^\]\n\\]*(?:\\.[^\]\n\\]*)*\])[^[\/\n\\]*)*)\/([gimy]{1,4}|\$?)|/g; -HEREGEX_OMIT = /\s+(?:#.*)?/g; LASTDENT = /\n[^\n\S]*$/; INLINEDENT = /[^\n\S]*[^#\s]|/g; NONASCII = /[\x80-\uFFFF]/; diff --git a/src/lexer.co b/src/lexer.co index 24cabab96..d015a3c00 100644 --- a/src/lexer.co +++ b/src/lexer.co @@ -310,7 +310,7 @@ exports import if t.0 is \TOKENS tokens.push ...t.1 else - val = t.1.replace HEREGEX_OMIT, '' + val = deheregex t.1 continue if one and not val one = tokens.push t <<< [\STRNUM @string \' enslash val] tokens.push [\+- \+ tokens[*-1]2] @@ -319,7 +319,7 @@ exports import @token \, \, if dynaflag then tokens.push ...dynaflag else @token \STRNUM "'#flag'" @token (if flag is \$ then \) else \)CALL), '' - else @regex reslash(parts.0.1.replace HEREGEX_OMIT, ''), flag + else @regex reslash(deheregex parts.0.1), flag delim.length + parts.size + flag.length # Matches a word literal, or ignores a sequence of whitespaces. @@ -748,6 +748,11 @@ reslash = replacer /(\\.)|\//g -> @@1 or \\\/ # Transforms hyphenated-words to camelCase. camelize = replacer /-[a-z]/ig -> it.char-at 1 .to-upper-case! +# ESifies a heregex. +deheregex = replacer do + /\s+(?:#.*)?|(\\[\s\S])/g + (, bs || '') -> if \\n is bs.charAt 1 then \\\n else bs + # Deletes the first character if newline. function lchomp then it.slice 1 + it.lastIndexOf \\n 0 @@ -1069,7 +1074,6 @@ REGEX = // )* )/ ([gimy]{1,4}|\$?) |//g -HEREGEX_OMIT = /\s+(?:#.*)?/g LASTDENT = /\n[^\n\S]*$/ INLINEDENT = /[^\n\S]*[^#\s]|/g diff --git a/test/regex.co b/test/regex.co index 7bce167d4..87a516e60 100644 --- a/test/regex.co +++ b/test/regex.co @@ -68,6 +68,7 @@ eq \01234 // 0 #{ // 1 #{ //2//source } 3 //source } 4 //source +# `this` interpolation let this = \THIS ok //^ \\##@#this $//test //\#THISTHIS//source @@ -82,6 +83,14 @@ ok /:\/[/]goog/.test 'http://google.com' eq /0/['source'], //#{0}//['source'] +# [coffee#3059](https://github.com/jashkenas/coffee-script/pull/3059) +# Keep escaped whitespaces. +ok //^ + a \ b \ c \ + d +$//test 'a b\u3000c\nd' + + ### $ flag eq \string typeof /^$/$ eq \string typeof //^$//$