Skip to content

Commit

Permalink
[ #257 ] support \r and \f in BNFC, Haskell, Java, Ocaml (only \r)
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasabel committed May 21, 2019
1 parent 23dda42 commit c3bda39
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 26 deletions.
8 changes: 6 additions & 2 deletions source/src/BNFC/Backend/Haskell/CFtoAlex3.hs
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ restOfAlex _ shareStrings byteStrings cf = [
userDefTokenTypes,
ident,

ifC catString ("\\\" ([$u # [\\\" \\\\ \\n]] | (\\\\ (\\\" | \\\\ | \\' | n | t)))* \\\"" ++
ifC catString ("\\\" ([$u # [\\\" \\\\ \\n]] | (\\\\ (\\\" | \\\\ | \\' | n | t | r | f)))* \\\"" ++
"\n { tok (\\p s -> PT p (TL $ share $ unescapeInitTail s)) }"),
ifC catChar "\\\' ($u # [\\\' \\\\] | \\\\ [\\\\ \\\' n t]) \\'\n { tok (\\p s -> PT p (TC $ share s)) }",
ifC catChar "\\\' ($u # [\\\' \\\\] | \\\\ [\\\\ \\\' n t r f]) \\'\n { tok (\\p s -> PT p (TC $ share s)) }",
ifC catInteger "$d+\n { tok (\\p s -> PT p (TI $ share s)) }",
ifC catDouble "$d+ \\. $d+ (e (\\-)? $d+)?\n { tok (\\p s -> PT p (TD $ share s)) }",
"",
Expand Down Expand Up @@ -172,6 +172,8 @@ restOfAlex _ shareStrings byteStrings cf = [
" '\\\\':c:cs | elem c ['\\\"', '\\\\', '\\\''] -> c : unesc cs",
" '\\\\':'n':cs -> '\\n' : unesc cs",
" '\\\\':'t':cs -> '\\t' : unesc cs",
" '\\\\':'r':cs -> '\\r' : unesc cs",
" '\\\\':'f':cs -> '\\f' : unesc cs",
" '\"':[] -> []",
" c:cs -> c : unesc cs",
" _ -> []",
Expand Down Expand Up @@ -341,6 +343,8 @@ instance Print Char where
prt _ = \case
'\n' -> ["\\n"]
'\t' -> ["\\t"]
'\r' -> ["\\r"]
'\f' -> ["\\f"]
c | isAlphaNum c -> [[c]]
c | isPrint c -> ['\\':[c]]
c -> ['\\':show (ord c)]
Expand Down
4 changes: 2 additions & 2 deletions source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ restOfLexerGrammar cf = vcat
"IDENT : IDENTIFIER_FIRST (IDENTIFIER_FIRST | DIGIT)*;"
]
, "// Whitespace"
, "WS : (' ' | '\\r' | '\\t' | '\\n')+ -> skip;"
, "WS : (' ' | '\\r' | '\\t' | '\\n' | '\\f')+ -> skip;"
, "// Escapable sequences"
, "fragment"
, "Escapable : ('\"' | '\\\\' | 'n' | 't' | 'r');"
, "Escapable : ('\"' | '\\\\' | 'n' | 't' | 'r' | 'f');"
, "ErrorToken : . ;"
, ifString stringmodes
, ifChar charmodes
Expand Down
38 changes: 20 additions & 18 deletions source/src/BNFC/Backend/Java/CFtoJLex15.hs
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,18 @@ import BNFC.Options (JavaLexerParser(..), RecordPositions(..))
import BNFC.Utils (cstring)
import Text.PrettyPrint

--The environment must be returned for the parser to use.
-- | The environment is returned for further use in the parser.
cf2jlex :: JavaLexerParser -> RecordPositions -> String -> CF -> (Doc, SymEnv)
cf2jlex jflex rp packageBase cf = (vcat
[
prelude jflex rp packageBase,
cMacros,
lexSymbols jflex env,
restOfJLex jflex rp cf
], env)
cf2jlex jflex rp packageBase cf = (, env) . vcat $
[ prelude jflex rp packageBase
, cMacros
, lexSymbols jflex env
, restOfJLex jflex rp cf
]
where
env = makeSymEnv (cfgSymbols cf ++ reservedWords cf) (0 :: Int)
makeSymEnv [] _ = []
makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ show n) : makeSymEnv symbs (n+1)
env = zipWith (\ s n -> (s, "_SYMB_" ++ show n)) (cfgSymbols cf ++ reservedWords cf) [(0 :: Int)..]

-- | File prelude
-- | File prelude.
prelude :: JavaLexerParser -> RecordPositions -> String -> Doc
prelude jflex rp packageBase = vcat
[ "// This JLex file was machine-generated by the BNF converter"
Expand Down Expand Up @@ -113,15 +110,16 @@ prelude jflex rp packageBase = vcat
else ""
]
where
positionDeclarations =
positionDeclarations
-- JFlex always defines yyline, yychar, yycolumn, even if unused.
if jflex == JFlexCup then ""
else if rp == RecordPositions then "int yycolumn = unknown - 1;"
else vcat
| jflex == JFlexCup = ""
| rp == RecordPositions = "int yycolumn = unknown - 1;"
| otherwise = vcat
-- subtract one so that one based numbering still ends up with unknown.
[ "int yyline = unknown - 1;"
, "int yycolumn = unknown - 1;"
, "int yychar = unknown;" ]
, "int yychar = unknown;"
]

--For now all categories are included.
--Optimally only the ones that are used should be generated.
Expand Down Expand Up @@ -203,9 +201,11 @@ restOfJLex jflex rp cf = vcat
then "<STRING><<EOF>> { throw new Error(\"Unterminated string at EOF, beginning at \" + left.getLine() + \":\" + left.getColumn()); }"
else ""
, "<ESCAPED>n { pstring += \"\\n\"; yybegin(STRING); }"
, "<ESCAPED>t { pstring += \"\\t\"; yybegin(STRING); }"
, "<ESCAPED>r { pstring += \"\\r\"; yybegin(STRING); }"
, "<ESCAPED>f { pstring += \"\\f\"; yybegin(STRING); }"
, "<ESCAPED>\\\" { pstring += \"\\\"\"; yybegin(STRING); }"
, "<ESCAPED>\\\\ { pstring += \"\\\\\"; yybegin(STRING); }"
, "<ESCAPED>t { pstring += \"\\t\"; yybegin(STRING); }"
, "<ESCAPED>. { pstring += yytext(); yybegin(STRING); }"
, "<ESCAPED>\\r\\n|\\r|\\n { throw new Error(\"Unterminated string on line \" + left.getLine() " <>
(if jflex == JFlexCup then "+ \" beginning at column \" + left.getColumn()" else "") <> "); }"
Expand All @@ -224,6 +224,8 @@ restOfJLex jflex rp cf = vcat
else ""
, "<CHARESC>n { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\n')); }"
, "<CHARESC>t { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\t')); }"
, "<CHARESC>r { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\r')); }"
, "<CHARESC>f { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\f')); }"
, "<CHARESC>. { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character(yytext().charAt(0))); }"
, "<CHARESC>\\r\\n|\\r|\\n { throw new Error(\"Unterminated character literal on line \" + left.getLine() " <>
(if jflex == JFlexCup then "+ \" beginning at column \" + left.getColumn()" else "") <> "); }"
Expand Down
6 changes: 4 additions & 2 deletions source/src/BNFC/Backend/OCaml/CFtoOCamlLex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ header parserMod cf = [
" '\\\\'::c::cs when List.mem c ['\\\"'; '\\\\'; '\\\''] -> c :: unesc cs",
" | '\\\\'::'n'::cs -> '\\n' :: unesc cs",
" | '\\\\'::'t'::cs -> '\\t' :: unesc cs",
" | '\\\\'::'r'::cs -> '\\r' :: unesc cs",
-- " | '\\\\'::'f'::cs -> '\\f' :: unesc cs", -- \f not supported by ocaml
" | '\\\"'::[] -> []",
" | c::cs -> c :: unesc cs",
" | _ -> []",
Expand Down Expand Up @@ -208,10 +210,10 @@ rules cf = mkRule "token" $
, ( "d+ '.' d+ ('e' ('-')? d+)?"
, "let f = lexeme lexbuf in TOK_Double (float_of_string f)" )
-- strings
, ( "'\\\"' ((u # ['\\\"' '\\\\' '\\n']) | ('\\\\' ('\\\"' | '\\\\' | '\\\'' | 'n' | 't')))* '\\\"'"
, ( "'\\\"' ((u # ['\\\"' '\\\\' '\\n']) | ('\\\\' ('\\\"' | '\\\\' | '\\\'' | 'n' | 't' | 'r')))* '\\\"'"
, "let s = lexeme lexbuf in TOK_String (unescapeInitTail s)" )
-- chars
, ( "'\\'' ((u # ['\\\'' '\\\\']) | ('\\\\' ('\\\\' | '\\\'' | 'n' | 't'))) '\\\''"
, ( "'\\'' ((u # ['\\\'' '\\\\']) | ('\\\\' ('\\\\' | '\\\'' | 'n' | 't' | 'r'))) '\\\''"
, "let s = lexeme lexbuf in TOK_Char s.[1]")
-- spaces
, ( "[' ' '\\t']", "token lexbuf")
Expand Down
6 changes: 4 additions & 2 deletions source/src/LexBNF.x
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ $white+ ;

$l $i*
{ tok (\p s -> PT p (eitherResIdent (TV . share) s)) }
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t)))* \"
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | r | f)))* \"
{ tok (\p s -> PT p (TL $ share $ unescapeInitTail s)) }
\' ($u # [\' \\] | \\ [\\ \' n t]) \'
\' ($u # [\' \\] | \\ [\\ \' n t r f]) \'
{ tok (\p s -> PT p (TC $ share s)) }
$d+
{ tok (\p s -> PT p (TI $ share s)) }
Expand Down Expand Up @@ -117,6 +117,8 @@ unescapeInitTail = id . unesc . tail . id where
'\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
'\\':'n':cs -> '\n' : unesc cs
'\\':'t':cs -> '\t' : unesc cs
'\\':'r':cs -> '\r' : unesc cs
'\\':'f':cs -> '\f' : unesc cs
'"':[] -> []
c:cs -> c : unesc cs
_ -> []
Expand Down

0 comments on commit c3bda39

Please sign in to comment.