From 9bac8a78babf022825a70e5de2a46b948390f98b Mon Sep 17 00:00:00 2001 From: Alfonso Garcia-Caro Date: Wed, 8 Jun 2022 15:23:49 +0900 Subject: [PATCH 1/3] Double dollar for triple quoted interpolation --- src/Compiler/Facilities/prim-lexing.fsi | 3 + src/Compiler/Service/ServiceLexing.fs | 36 ++++--- src/Compiler/SyntaxTree/ParseHelpers.fs | 11 +- src/Compiler/SyntaxTree/ParseHelpers.fsi | 9 +- src/Compiler/lex.fsl | 124 +++++++++++------------ src/Compiler/pars.fsy | 2 +- 6 files changed, 97 insertions(+), 88 deletions(-) diff --git a/src/Compiler/Facilities/prim-lexing.fsi b/src/Compiler/Facilities/prim-lexing.fsi index 290b48b53e8..8eb18f087fc 100644 --- a/src/Compiler/Facilities/prim-lexing.fsi +++ b/src/Compiler/Facilities/prim-lexing.fsi @@ -115,6 +115,9 @@ type internal LexBuffer<'Char> = /// Determine if Lexeme contains a specific character member LexemeContains: 'Char -> bool + /// The length of the lexeme. + member LexemeLength: int + /// Fast helper to turn the matched characters into a string, avoiding an intermediate array. static member LexemeString: LexBuffer -> string diff --git a/src/Compiler/Service/ServiceLexing.fs b/src/Compiler/Service/ServiceLexing.fs index 9ecdbb823a4..34d9210a315 100644 --- a/src/Compiler/Service/ServiceLexing.fs +++ b/src/Compiler/Service/ServiceLexing.fs @@ -605,23 +605,26 @@ module internal LexerStateEncoding = ifdefStackCount <- ifdefStackCount + 1 let stringKindValue = - (if stringKind.IsByteString then 0b100 else 0) + (if stringKind.IsDoubleDollarInterpolated then 0b1000 else 0) + ||| (if stringKind.IsByteString then 0b100 else 0) ||| (if stringKind.IsInterpolated then 0b010 else 0) ||| (if stringKind.IsInterpolatedFirst then 0b001 else 0) let nestingValue = - let tag1, i1, kind1, rest = + let tag1, i1, kind1, b1, rest = match stringNest with - | [] -> false, 0, 0, [] - | (i1, kind1, _) :: rest -> true, i1, encodeStringStyle kind1, rest - - let tag2, i2, kind2 = + | [] -> false, 0, 0, false, [] + | (i1, kind1, b1, _)::rest -> true, i1, encodeStringStyle kind1, b1, rest + + let tag2, i2, kind2, b2 = match rest with - | [] -> false, 0, 0 - | (i2, kind2, _) :: _ -> true, i2, encodeStringStyle kind2 - - (if tag1 then 0b100000000000 else 0) - ||| (if tag2 then 0b010000000000 else 0) + | [] -> false, 0, 0, false + | (i2, kind2, b2, _)::_ -> true, i2, encodeStringStyle kind2, b2 + + (if tag1 then 0b10000000000000 else 0) + ||| (if tag2 then 0b01000000000000 else 0) + ||| (if b1 then 0b100000000000 else 0) + ||| (if b2 then 0b010000000000 else 0) ||| ((i1 <<< 7) &&& 0b001110000000) ||| ((i2 <<< 4) &&& 0b000001110000) ||| ((kind1 <<< 2) &&& 0b000000001100) @@ -665,6 +668,7 @@ module internal LexerStateEncoding = let stringKind: LexerStringKind = { + IsDoubleDollarInterpolated = ((stringKindValue &&& 0b1000) = 0b1000) IsByteString = ((stringKindValue &&& 0b100) = 0b100) IsInterpolated = ((stringKindValue &&& 0b010) = 0b010) IsInterpolatedFirst = ((stringKindValue &&& 0b001) = 0b001) @@ -675,16 +679,18 @@ module internal LexerStateEncoding = let nestingValue = int32 ((bits &&& nestingMask) >>> nestingStart) let stringNest: LexerInterpolatedStringNesting = - let tag1 = ((nestingValue &&& 0b100000000000) = 0b100000000000) - let tag2 = ((nestingValue &&& 0b010000000000) = 0b010000000000) + let tag1 = ((nestingValue &&& 0b10000000000000) = 0b10000000000000) + let tag2 = ((nestingValue &&& 0b01000000000000) = 0b01000000000000) + let b1 = ((nestingValue &&& 0b100000000000) = 0b100000000000) + let b2 = ((nestingValue &&& 0b010000000000) = 0b010000000000) let i1 = ((nestingValue &&& 0b001110000000) >>> 7) let i2 = ((nestingValue &&& 0b000001110000) >>> 4) let kind1 = ((nestingValue &&& 0b000000001100) >>> 2) let kind2 = ((nestingValue &&& 0b000000000011) >>> 0) [ - if tag1 then i1, decodeStringStyle kind1, range0 - if tag2 then i2, decodeStringStyle kind2, range0 + if tag1 then i1, decodeStringStyle kind1, b1, range0 + if tag2 then i2, decodeStringStyle kind2, b2, range0 ] (colorState, ncomments, pos, ifDefs, hardwhite, stringKind, stringNest) diff --git a/src/Compiler/SyntaxTree/ParseHelpers.fs b/src/Compiler/SyntaxTree/ParseHelpers.fs index 2d31001f33f..aab6fce2848 100644 --- a/src/Compiler/SyntaxTree/ParseHelpers.fs +++ b/src/Compiler/SyntaxTree/ParseHelpers.fs @@ -274,6 +274,7 @@ type LexerStringKind = IsByteString: bool IsInterpolated: bool IsInterpolatedFirst: bool + IsDoubleDollarInterpolated: bool } static member String = @@ -281,6 +282,7 @@ type LexerStringKind = IsByteString = false IsInterpolated = false IsInterpolatedFirst = false + IsDoubleDollarInterpolated = false } static member ByteString = @@ -288,25 +290,28 @@ type LexerStringKind = IsByteString = true IsInterpolated = false IsInterpolatedFirst = false + IsDoubleDollarInterpolated = false } - static member InterpolatedStringFirst = + static member InterpolatedStringFirst(isDoubleDollar) = { IsByteString = false IsInterpolated = true IsInterpolatedFirst = true + IsDoubleDollarInterpolated = isDoubleDollar } - static member InterpolatedStringPart = + static member InterpolatedStringPart(isDoubleDollar) = { IsByteString = false IsInterpolated = true IsInterpolatedFirst = false + IsDoubleDollarInterpolated = isDoubleDollar } /// Represents the degree of nesting of '{..}' and the style of the string to continue afterwards, in an interpolation fill. /// Nesting counters and styles of outer interpolating strings are pushed on this stack. -type LexerInterpolatedStringNesting = (int * LexerStringStyle * range) list +type LexerInterpolatedStringNesting = (int * LexerStringStyle * bool * range) list /// The parser defines a number of tokens for whitespace and /// comments eliminated by the lexer. These carry a specification of diff --git a/src/Compiler/SyntaxTree/ParseHelpers.fsi b/src/Compiler/SyntaxTree/ParseHelpers.fsi index 09133a9deb6..80b592b25f6 100644 --- a/src/Compiler/SyntaxTree/ParseHelpers.fsi +++ b/src/Compiler/SyntaxTree/ParseHelpers.fsi @@ -106,17 +106,18 @@ type LexerStringStyle = type LexerStringKind = { IsByteString: bool IsInterpolated: bool - IsInterpolatedFirst: bool } + IsInterpolatedFirst: bool + IsDoubleDollarInterpolated: bool } static member ByteString: LexerStringKind - static member InterpolatedStringFirst: LexerStringKind + static member InterpolatedStringFirst: bool -> LexerStringKind - static member InterpolatedStringPart: LexerStringKind + static member InterpolatedStringPart: bool -> LexerStringKind static member String: LexerStringKind -type LexerInterpolatedStringNesting = (int * LexerStringStyle * range) list +type LexerInterpolatedStringNesting = (int * LexerStringStyle * bool * range) list [] type LexerContinuation = diff --git a/src/Compiler/lex.fsl b/src/Compiler/lex.fsl index 45057efa104..1bfe866d780 100644 --- a/src/Compiler/lex.fsl +++ b/src/Compiler/lex.fsl @@ -600,14 +600,14 @@ rule token args skip = parse // Single quote in triple quote ok, others disallowed match args.stringNest with - | (_, LexerStringStyle.TripleQuote, _) :: _ -> () + | (_, LexerStringStyle.TripleQuote, _, _) :: _ -> () | _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m)) | [] -> () if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, LexerStringKind.String, m)) else singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf } - | '$' '"' '"' '"' + | ("$$" | "$") '"' '"' '"' { let buf, fin, m = startString args lexbuf // Single quote in triple quote ok, others disallowed @@ -615,20 +615,21 @@ rule token args skip = parse | _ :: _ -> errorR(Error(FSComp.SR.lexTripleQuoteInTripleQuote(), m)) | [] -> () - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, LexerStringKind.InterpolatedStringFirst, m)) - else tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf } + let doubleDollar = lexbuf.LexemeLength > 4 + if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, LexerStringKind.InterpolatedStringFirst(doubleDollar), m)) + else tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst(doubleDollar), args) skip lexbuf } | '$' '"' { let buf,fin,m = startString args lexbuf // Single quote in triple quote ok, others disallowed match args.stringNest with - | (_, LexerStringStyle.TripleQuote, _) :: _ -> () + | (_, LexerStringStyle.TripleQuote, _, _) :: _ -> () | _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m)) | _ -> () - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, LexerStringKind.InterpolatedStringFirst, m)) - else singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf } + if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, LexerStringKind.InterpolatedStringFirst(false), m)) + else singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst(false), args) skip lexbuf } | '"' '"' '"' { let buf, fin, m = startString args lexbuf @@ -646,7 +647,7 @@ rule token args skip = parse // Single quote in triple quote ok, others disallowed match args.stringNest with - | (_, LexerStringStyle.TripleQuote, _) :: _ -> () + | (_, LexerStringStyle.TripleQuote, _, _) :: _ -> () | _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m)) | _ -> () @@ -658,12 +659,12 @@ rule token args skip = parse // Single quote in triple quote ok, others disallowed match args.stringNest with - | (_, LexerStringStyle.TripleQuote, _) :: _ -> () + | (_, LexerStringStyle.TripleQuote, _, _) :: _ -> () | _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m)) | _ -> () - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, LexerStringKind.InterpolatedStringFirst, m)) - else verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf } + if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, LexerStringKind.InterpolatedStringFirst(false), m)) + else verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringFirst(false), args) skip lexbuf } | truewhite+ { if skip then token args skip lexbuf @@ -845,14 +846,15 @@ rule token args skip = parse | ">]" { GREATER_RBRACK } - | "{" + | ("{{" | "{") { + let isDouble = lexbuf.LexemeLength > 1 match args.stringNest with - | [] -> () - | (counter, style, m) :: rest -> + | (counter, style, isDoubleDollar, m) :: rest when isDoubleDollar = isDouble -> // Note, we do not update the 'm', any incomplete-interpolation error // will be reported w.r.t. the first '{' - args.stringNest <- (counter + 1, style, m) :: rest + args.stringNest <- (counter + 1, style, isDoubleDollar, m) :: rest + | _ -> () // To continue token-by-token lexing may involve picking up the new args.stringNes let cont = LexCont.Token(args.ifdefStack, args.stringNest) LBRACE cont @@ -860,26 +862,27 @@ rule token args skip = parse | "|" { BAR } - | "}" + | ("}}" | "}") { // We encounter a '}' in the expression token stream. First check if we're in an interpolated string expression // and continue the string if necessary + let isDouble = lexbuf.LexemeLength > 1 match args.stringNest with - | (1, style, _) :: rest -> + | (1, style, isDoubleDollar, _) :: rest when isDoubleDollar = isDouble -> args.stringNest <- rest let buf, fin, m = startString args lexbuf if not skip then - STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, style, LexerStringKind.InterpolatedStringPart, m)) + STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, style, LexerStringKind.InterpolatedStringPart(isDoubleDollar), m)) else match style with - | LexerStringStyle.Verbatim -> verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf - | LexerStringStyle.SingleQuote -> singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf - | LexerStringStyle.TripleQuote -> tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf + | LexerStringStyle.Verbatim -> verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringPart(isDoubleDollar), args) skip lexbuf + | LexerStringStyle.SingleQuote -> singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart(isDoubleDollar), args) skip lexbuf + | LexerStringStyle.TripleQuote -> tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart(isDoubleDollar), args) skip lexbuf - | (counter, style, m) :: rest -> + | (counter, style, isDoubleDollar, m) :: rest when isDoubleDollar = isDouble -> // Note, we do not update the 'm', any incomplete-interpolation error // will be reported w.r.t. the first '{' - args.stringNest <- (counter - 1, style, m) :: rest + args.stringNest <- (counter - 1, style, isDoubleDollar, m) :: rest let cont = LexCont.Token(args.ifdefStack, args.stringNest) RBRACE cont @@ -1189,36 +1192,33 @@ and singleQuoteString sargs skip = parse fin.Finish buf { kind with IsByteString = true } (enum(0)) cont } - | ("{{" | "}}") - { let (buf, _fin, m, kind, args) = sargs - let s = lexeme lexbuf - addUnicodeString buf (if kind.IsInterpolated then s.[0..0] else s) - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, m)) - else singleQuoteString sargs skip lexbuf } - - | "{" + | ("{{" | "{") { let (buf, fin, m, kind, args) = sargs - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then // get a new range for where the fill starts let m2 = lexbuf.LexemeRange - args.stringNest <- (1, LexerStringStyle.SingleQuote, m2) :: args.stringNest + args.stringNest <- (1, LexerStringStyle.SingleQuote, kind.IsDoubleDollarInterpolated, m2) :: args.stringNest let cont = LexCont.Token(args.ifdefStack, args.stringNest) fin.Finish buf kind LexerStringFinisherContext.InterpolatedPart cont else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, m)) else singleQuoteString sargs skip lexbuf } - | "}" + | ("}}" | "}") { let (buf, _fin, m, kind, args) = sargs let result() = if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, m)) else singleQuoteString sargs skip lexbuf - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then fail args lexbuf (FSComp.SR.lexRBraceInInterpolatedString()) (result()) else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) (result()) } @@ -1286,36 +1286,33 @@ and verbatimString sargs skip = parse if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, kind, m)) else verbatimString sargs skip lexbuf } - | ("{{" | "}}") - { let (buf, _fin, m, kind, args) = sargs - let s = lexeme lexbuf - addUnicodeString buf (if kind.IsInterpolated then s.[0..0] else s) - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, kind, m)) - else verbatimString sargs skip lexbuf } - - | "{" + | ("{{" | "{") { let (buf, fin, m, kind, args) = sargs - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then // get a new range for where the fill starts let m2 = lexbuf.LexemeRange - args.stringNest <- (1, LexerStringStyle.Verbatim, m2) :: args.stringNest + args.stringNest <- (1, LexerStringStyle.Verbatim, kind.IsDoubleDollarInterpolated, m2) :: args.stringNest let cont = LexCont.Token(args.ifdefStack, args.stringNest) fin.Finish buf kind (enum(3)) cont else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, kind, m)) else verbatimString sargs skip lexbuf } - | "}" + | ("}}" | "}") { let (buf, _fin, m, kind, args) = sargs let result() = if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, kind, m)) else verbatimString sargs skip lexbuf - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then fail args lexbuf (FSComp.SR.lexRBraceInInterpolatedString()) (result()) else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) (result()) } @@ -1382,36 +1379,33 @@ and tripleQuoteString sargs skip = parse if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, kind, m)) else tripleQuoteString sargs skip lexbuf } - | ("{{" | "}}") - { let (buf, _fin, m, kind, args) = sargs - let s = lexeme lexbuf - addUnicodeString buf (if kind.IsInterpolated then s.[0..0] else s) - if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, kind, m)) - else tripleQuoteString sargs skip lexbuf } - - | "{" + | ("{{" | "{") { let (buf, fin, m, kind, args) = sargs - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then // get a new range for where the fill starts let m2 = lexbuf.LexemeRange - args.stringNest <- (1, LexerStringStyle.TripleQuote, m2) :: args.stringNest + args.stringNest <- (1, LexerStringStyle.TripleQuote, kind.IsDoubleDollarInterpolated, m2) :: args.stringNest let cont = LexCont.Token(args.ifdefStack, args.stringNest) fin.Finish buf kind (enum(5)) cont else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, kind, m)) else tripleQuoteString sargs skip lexbuf } - | "}" + | ("}}" | "}") { let (buf, _fin, m, kind, args) = sargs let result() = if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, kind, m)) else tripleQuoteString sargs skip lexbuf - if kind.IsInterpolated then + let isDouble = lexbuf.LexemeLength > 1 + if kind.IsInterpolated && kind.IsDoubleDollarInterpolated = isDouble then fail args lexbuf (FSComp.SR.lexRBraceInInterpolatedString()) (result()) else - addUnicodeString buf (lexeme lexbuf) + let s = lexeme lexbuf + addUnicodeString buf (if kind.IsInterpolated && not kind.IsDoubleDollarInterpolated then s.[0..0] else s) (result()) } diff --git a/src/Compiler/pars.fsy b/src/Compiler/pars.fsy index e0f78e337b0..b5fa1ae25ac 100644 --- a/src/Compiler/pars.fsy +++ b/src/Compiler/pars.fsy @@ -142,7 +142,7 @@ let checkEndOfFileError t = | (_, m) :: _ -> reportParseErrorAt m (FSComp.SR.parsNoHashEndIfFound()) match nesting with | [] -> () - | (_, _, m) :: _ -> reportParseErrorAt m (FSComp.SR.parsEofInInterpolatedStringFill()) + | (_, _, _, m) :: _ -> reportParseErrorAt m (FSComp.SR.parsEofInInterpolatedStringFill()) type BindingSet = BindingSetPreAttrs of range * bool * bool * (SynAttributes -> SynAccess option -> SynAttributes * SynBinding list) * range From 5d4a3d7f5aa1d0859930f716f8486ab472f2a260 Mon Sep 17 00:00:00 2001 From: Alfonso Garcia-Caro Date: Fri, 10 Jun 2022 11:44:40 +0900 Subject: [PATCH 2/3] Move back lex buffer if double brace is not meaningful --- src/Compiler/Facilities/prim-lexing.fsi | 2 +- src/Compiler/lex.fsl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Compiler/Facilities/prim-lexing.fsi b/src/Compiler/Facilities/prim-lexing.fsi index 8eb18f087fc..fd0a5fcec5b 100644 --- a/src/Compiler/Facilities/prim-lexing.fsi +++ b/src/Compiler/Facilities/prim-lexing.fsi @@ -116,7 +116,7 @@ type internal LexBuffer<'Char> = member LexemeContains: 'Char -> bool /// The length of the lexeme. - member LexemeLength: int + member LexemeLength: int with get, set /// Fast helper to turn the matched characters into a string, avoiding an intermediate array. static member LexemeString: LexBuffer -> string diff --git a/src/Compiler/lex.fsl b/src/Compiler/lex.fsl index 1bfe866d780..b6d0f245e2f 100644 --- a/src/Compiler/lex.fsl +++ b/src/Compiler/lex.fsl @@ -854,6 +854,9 @@ rule token args skip = parse // Note, we do not update the 'm', any incomplete-interpolation error // will be reported w.r.t. the first '{' args.stringNest <- (counter + 1, style, isDoubleDollar, m) :: rest + | _ when isDouble -> + lexbuf.LexemeLength <- 1 + lexbuf.EndPos <- lexbuf.EndPos.ShiftColumnBy -1 | _ -> () // To continue token-by-token lexing may involve picking up the new args.stringNes let cont = LexCont.Token(args.ifdefStack, args.stringNest) @@ -887,6 +890,9 @@ rule token args skip = parse RBRACE cont | _ -> + if isDouble then + lexbuf.LexemeLength <- 1 + lexbuf.EndPos <- lexbuf.EndPos.ShiftColumnBy -1 let cont = LexCont.Token(args.ifdefStack, args.stringNest) RBRACE cont } From c4229585a85b602c2bef0d1dbc7af32f26bd1723 Mon Sep 17 00:00:00 2001 From: Alfonso Garcia-Caro Date: Wed, 15 Jun 2022 12:10:34 +0900 Subject: [PATCH 3/3] Add some tests --- src/Compiler/Service/ServiceLexing.fs | 2 +- .../Compiler/Language/StringInterpolation.fs | 21 +++++++ tests/service/TokenizerTests.fs | 55 ++++++++++++++++++- 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/src/Compiler/Service/ServiceLexing.fs b/src/Compiler/Service/ServiceLexing.fs index 34d9210a315..532c68f25e3 100644 --- a/src/Compiler/Service/ServiceLexing.fs +++ b/src/Compiler/Service/ServiceLexing.fs @@ -615,7 +615,7 @@ module internal LexerStateEncoding = match stringNest with | [] -> false, 0, 0, false, [] | (i1, kind1, b1, _)::rest -> true, i1, encodeStringStyle kind1, b1, rest - + let tag2, i2, kind2, b2 = match rest with | [] -> false, 0, 0, false diff --git a/tests/fsharp/Compiler/Language/StringInterpolation.fs b/tests/fsharp/Compiler/Language/StringInterpolation.fs index 708d9be5e3b..081b36f114d 100644 --- a/tests/fsharp/Compiler/Language/StringInterpolation.fs +++ b/tests/fsharp/Compiler/Language/StringInterpolation.fs @@ -463,6 +463,16 @@ check "vcewweh228d" "{{" "{{" check "vcewweh229f" "}}" "}}" """ + [] + let ``String interpolation using escaped braces - double dollar`` () = + SimpleCheckTest + """ +check "vcewweh226i" $$\"\"\"{\"\"\" "{" +check "vcewweh226f" $$\"\"\"{}\"\"\" "{}" +check "vcewweh226p" $$\"\"\"{ {{1}} }\"\"\" "{ 1 }" +check "vcewweh227a" $$\"\"\"}\"\"\" "}" + """ + [] let ``String interpolation using verbatim strings`` () = SimpleCheckTest @@ -487,11 +497,19 @@ type R2 = { X : int ; Y: int } // Check record expression (parenthesized) check "vcewweh18" $"abc{({contents=1}.contents)}def" "abc1def" +// Check record expression (parenthesized) - double dollar +check "vcewweh18b" $$\"\"\"abc{{({contents=1}.contents)}}def\"\"\" "abc1def" + // Check record expression (unparenthesized, spaced) check "vcewweh19a" $"abc{ {X=1} }def" "abc{ X = 1 }def" check "vcewweh19b" $"abc{ {X=1} }def" "abc{ X = 1 }def" +// Check record expression (unparenthesized, spaced) - double dollar +check "vcewweh19c" $$"abc{{ {X=1} }}def" "abc{ X = 1 }def" + +check "vcewweh19d" $$\"\"\"abc{{ {X=1} }}def\"\"\" "abc{ X = 1 }def" + // Check record expression (unparenthesized, spaced ending in token brace then string hole brace) check "vcewweh19v" $"abc{ {X=1}}def" "abc{ X = 1 }def" @@ -501,6 +519,9 @@ check "vcewweh20" $"abc{{X=1}}def" "abc{X=1}def" // Check thing that is not really a record expression (braces are escaped) check "vcewweh20b" $"abc{{quack=1}}def" "abc{quack=1}def" +// Check thing that is not really a record expression (double dollar) +check "vcewweh20c" $$\"\"\"abc{X=1}def\"\"\" "abc{X=1}def" + // Check thing that is not really a record expression (braces are escaped) check "vcewweh21" $"abc{{X=1; Y=2}}def" "abc{X=1; Y=2}def" diff --git a/tests/service/TokenizerTests.fs b/tests/service/TokenizerTests.fs index 8b0fc2c8995..28881f32ad9 100644 --- a/tests/service/TokenizerTests.fs +++ b/tests/service/TokenizerTests.fs @@ -129,7 +129,9 @@ let ``Tokenizer test - multiline non-nested string interpolation``() = let tokenizedLines = tokenizeLines [| "let hello1t = $\"\"\"abc {1+" - " 1} def\"\"\"" |] + " 1} def\"\"\"" + "let hello2t = $$\"\"\"abc {{1+" + " 1}} def\"\"\"" |] let actual = [ for lineNo, lineToks in tokenizedLines do @@ -142,6 +144,14 @@ let ``Tokenizer test - multiline non-nested string interpolation``() = ("INTERP_STRING_BEGIN_PART", "{"); ("INT32", "1"); ("PLUS_MINUS_OP", "+")]); (1, [("WHITESPACE", " "); ("INT32", "1"); ("STRING_TEXT", "}"); + ("STRING_TEXT", " "); ("STRING_TEXT", "def"); ("INTERP_STRING_END", "\"\"\"")]) + (2, + [("LET", "let"); ("WHITESPACE", " "); ("IDENT", "hello2t"); + ("WHITESPACE", " "); ("EQUALS", "="); ("WHITESPACE", " "); + ("STRING_TEXT", "$$\"\"\""); ("STRING_TEXT", "abc"); ("STRING_TEXT", " "); + ("INTERP_STRING_BEGIN_PART", "{{"); ("INT32", "1"); ("PLUS_MINUS_OP", "+")]); + (3, + [("WHITESPACE", " "); ("INT32", "1"); ("STRING_TEXT", "}}"); ("STRING_TEXT", " "); ("STRING_TEXT", "def"); ("INTERP_STRING_END", "\"\"\"")])] if actual <> expected then @@ -191,6 +201,48 @@ let ``Tokenizer test - multi-line nested string interpolation``() = printfn "expected = %A" expected Assert.Fail(sprintf "actual and expected did not match,actual =\n%A\nexpected=\n%A\n" actual expected) +[] +// checks nested '{' and nested single-quote strings +let ``Tokenizer test - multi-line nested string interpolation - double dollar``() = + let tokenizedLines = + tokenizeLines + [| "let hello1t = $$\"\"\"abc {{\"a\" + " + " { " + " contents = \"b\" " + " }.contents " + " }} def\"\"\"" |] + + let actual = + [ for lineNo, lineToks in tokenizedLines do + yield lineNo, [ for str, info in lineToks do yield info.TokenName, str ] ] + let expected = + [(0, + [("LET", "let"); ("WHITESPACE", " "); ("IDENT", "hello1t"); + ("WHITESPACE", " "); ("EQUALS", "="); ("WHITESPACE", " "); + ("STRING_TEXT", "$$\"\"\""); ("STRING_TEXT", "abc"); ("STRING_TEXT", " "); + ("INTERP_STRING_BEGIN_PART", "{{"); ("STRING_TEXT", "\""); ("STRING_TEXT", "a"); + ("STRING", "\""); ("WHITESPACE", " "); ("PLUS_MINUS_OP", "+"); + ("WHITESPACE", " ")]); + (1, + [("WHITESPACE", " "); ("LBRACE", "{"); + ("WHITESPACE", " ")]); + (2, + [("WHITESPACE", " "); ("IDENT", "contents"); + ("WHITESPACE", " "); ("EQUALS", "="); ("WHITESPACE", " "); + ("STRING_TEXT", "\""); ("STRING_TEXT", "b"); ("STRING", "\""); + ("WHITESPACE", " ")]); + (3, + [("WHITESPACE", " "); ("RBRACE", "}"); ("DOT", "."); + ("IDENT", "contents"); ("WHITESPACE", " ")]); + (4, + [("WHITESPACE", " "); ("STRING_TEXT", "}}"); + ("STRING_TEXT", " "); ("STRING_TEXT", "def"); ("INTERP_STRING_END", "\"\"\"")])] + + if actual <> expected then + printfn "actual = %A" actual + printfn "expected = %A" expected + Assert.Fail(sprintf "actual and expected did not match,actual =\n%A\nexpected=\n%A\n" actual expected) + [] let ``Tokenizer test - single-line nested string interpolation``() = let tokenizedLines = @@ -213,4 +265,3 @@ let ``Tokenizer test - single-line nested string interpolation``() = printfn "actual = %A" actual printfn "expected = %A" expected Assert.Fail(sprintf "actual and expected did not match,actual =\n%A\nexpected=\n%A\n" actual expected) -