diff --git a/rzk-js/Main.hs b/rzk-js/Main.hs index ea1812162..a604f54a6 100644 --- a/rzk-js/Main.hs +++ b/rzk-js/Main.hs @@ -6,6 +6,7 @@ import qualified GHCJS.Foreign.Callback as GHCJS import GHCJS.Marshal (fromJSVal, toJSVal) import GHCJS.Prim (JSVal) import Data.JSString(JSString, pack) +import qualified Data.Text as T import JavaScript.Object import JavaScript.Object.Internal (Object (..), create) import qualified Rzk.Main as Rzk @@ -23,8 +24,8 @@ main = do input <- maybe (Left "Could not turn JSRef to a String") Right <$> fromJSVal rawInput case Rzk.typecheckString =<< input of - Left err -> setStringProp "status" "error" o >> setStringProp "result" (pack err) o - Right ok -> setStringProp "status" "ok" o >> setStringProp "result" (pack ok) o + Left err -> setStringProp "status" "error" o >> setStringProp "result" (pack (T.unpack err)) o + Right ok -> setStringProp "status" "ok" o >> setStringProp "result" (pack (T.unpack ok)) o set_rzk_typecheck_callback callback diff --git a/rzk-js/rzk-js.cabal b/rzk-js/rzk-js.cabal index 4465e8d86..349551138 100644 --- a/rzk-js/rzk-js.cabal +++ b/rzk-js/rzk-js.cabal @@ -9,7 +9,7 @@ executable rzk-js main-is: Main.hs ghcjs-options: -dedupe - build-depends: base, rzk + build-depends: base, rzk, text >=1.2.3.1 if impl(ghcjs) build-depends: ghcjs-base, ghcjs-prim diff --git a/rzk/Makefile b/rzk/Makefile index 7660ab833..c9c97f699 100644 --- a/rzk/Makefile +++ b/rzk/Makefile @@ -9,7 +9,7 @@ clean: src/Language/Rzk/Syntax/Test: src/Language/Rzk/Syntax.cf cd src/ \ - && bnfc -d Language/Rzk/Syntax.cf -p Language.Rzk --makefile=Language/Rzk/Makefile --generic --functor \ + && bnfc -d Language/Rzk/Syntax.cf -p Language.Rzk --makefile=Language/Rzk/Makefile --generic --functor --text-token \ && make --makefile=Language/Rzk/Makefile \ && rm Language/Rzk/Syntax/Test.hs ; \ cd ../ diff --git a/rzk/Setup.hs b/rzk/Setup.hs index f310944b7..5d6b27605 100644 --- a/rzk/Setup.hs +++ b/rzk/Setup.hs @@ -20,7 +20,7 @@ main = defaultMainWithHooks $ simpleUserHooks { hookedPrograms = [ bnfcProgram ] , postConf = \args flags packageDesc localBuildInfo -> do #ifndef mingw32_HOST_OS - _ <- system "bnfc -d -p Language.Rzk --generic --functor -o src/ grammar/Syntax.cf" + _ <- system "bnfc -d -p Language.Rzk --generic --functor --text-token -o src/ grammar/Syntax.cf" #endif postConf simpleUserHooks args flags packageDesc localBuildInfo } diff --git a/rzk/app/Main.hs b/rzk/app/Main.hs index d2f6c4c40..8a7528328 100644 --- a/rzk/app/Main.hs +++ b/rzk/app/Main.hs @@ -2,18 +2,17 @@ {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} -{-# LANGUAGE NamedFieldPuns #-} module Main (main) where #ifndef __GHCJS__ -import Main.Utf8 (withUtf8) +import Main.Utf8 (withUtf8) #endif -import Control.Monad (forM, forM_, unless, when, - (>=>)) +import Control.Monad (forM, forM_, unless, when, (>=>)) import Data.Version (showVersion) #ifdef LSP @@ -24,11 +23,13 @@ import Options.Generic import System.Exit (exitFailure, exitSuccess) import Data.Functor (void, (<&>)) +import qualified Data.Text.IO as T + import Paths_rzk (version) import Rzk.Format (formatFile, formatFileWrite, isWellFormattedFile) -import Rzk.TypeCheck import Rzk.Main +import Rzk.TypeCheck data FormatOptions = FormatOptions { check :: Bool @@ -78,7 +79,7 @@ main = do case expandedPaths of [] -> error "No files found" filePaths -> do - when (not check && not write) $ forM_ filePaths (formatFile >=> putStrLn) + when (not check && not write) $ forM_ filePaths (formatFile >=> T.putStrLn) when write $ forM_ filePaths formatFileWrite when check $ do results <- forM filePaths $ \path -> isWellFormattedFile path <&> (path,) diff --git a/rzk/src/Language/Rzk/Free/Syntax.hs b/rzk/src/Language/Rzk/Free/Syntax.hs index 786b17b6b..4eb4a12dd 100644 --- a/rzk/src/Language/Rzk/Free/Syntax.hs +++ b/rzk/src/Language/Rzk/Free/Syntax.hs @@ -19,6 +19,7 @@ import Data.Functor (void) import Data.List (intercalate, nub, (\\)) import Data.Maybe (fromMaybe) import Data.String +import qualified Data.Text as T import Free.Scoped import Free.Scoped.TH @@ -428,18 +429,18 @@ incVarIdentIndex (VarIdent (Rzk.VarIdent loc token)) = -- | Increment the subscript number at the end of the indentifier. -- --- >>> putStrLn $ incIndex "x" +-- >>> putStrLn $ T.unpack $ incIndex "x" -- x₁ --- >>> putStrLn $ incIndex "x₁₉" +-- >>> putStrLn $ T.unpack $ incIndex "x₁₉" -- x₂₀ -incIndex :: String -> String -incIndex s = name <> newIndex +incIndex :: T.Text -> T.Text +incIndex s = T.pack $ name <> newIndex where digitsSub = "₀₁₂₃₄₅₆₇₈₉" :: String isDigitSub = (`elem` digitsSub) digitFromSub c = chr ((ord c - ord '₀') + ord '0') digitToSub c = chr ((ord c - ord '0') + ord '₀') - (name, index) = break isDigitSub s + (name, index) = break isDigitSub (T.unpack s) oldIndexN = read ('0' : map digitFromSub index) -- FIXME: read newIndex = map digitToSub (show (oldIndexN + 1)) @@ -447,9 +448,9 @@ instance Show Term' where show = Rzk.printTree . fromTerm' instance IsString Term' where - fromString = toTerm' . fromRight . Rzk.parseTerm + fromString = toTerm' . fromRight . Rzk.parseTerm . T.pack where - fromRight (Left err) = error ("Parse error: " <> err) + fromRight (Left err) = error (T.unpack $ "Parse error: " <> err) fromRight (Right t) = t instance Show TermT' where diff --git a/rzk/src/Language/Rzk/Syntax.hs b/rzk/src/Language/Rzk/Syntax.hs index 8a93e47f7..da3d8e7fb 100644 --- a/rzk/src/Language/Rzk/Syntax.hs +++ b/rzk/src/Language/Rzk/Syntax.hs @@ -21,48 +21,49 @@ import Control.Exception (Exception (..), SomeException, evaluate, try) import Data.Char (isSpace) -import qualified Data.List as List +import qualified Data.Text as T import Language.Rzk.Syntax.Abs import qualified Language.Rzk.Syntax.Layout as Layout import qualified Language.Rzk.Syntax.Print as Print -import Language.Rzk.Syntax.Lex (tokens, Token) +import Control.Arrow (ArrowChoice (left)) +import GHC.IO (unsafePerformIO) +import Language.Rzk.Syntax.Lex (Token, tokens) import Language.Rzk.Syntax.Par (pModule, pTerm) -import GHC.IO (unsafePerformIO) -tryOrDisplayException :: Either String a -> IO (Either String a) +tryOrDisplayException :: Either T.Text a -> IO (Either T.Text a) tryOrDisplayException = tryOrDisplayExceptionIO . evaluate -tryOrDisplayExceptionIO :: IO (Either String a) -> IO (Either String a) +tryOrDisplayExceptionIO :: IO (Either T.Text a) -> IO (Either T.Text a) tryOrDisplayExceptionIO x = try x >>= \case - Left (ex :: SomeException) -> return (Left (displayException ex)) + Left (ex :: SomeException) -> return (Left (T.pack $ displayException ex)) Right result -> return result -parseModuleSafe :: String -> IO (Either String Module) +parseModuleSafe :: T.Text -> IO (Either T.Text Module) parseModuleSafe = tryOrDisplayException . parseModule -parseModule :: String -> Either String Module -parseModule = pModule . Layout.resolveLayout True . tokens . tryExtractMarkdownCodeBlocks "rzk" +parseModule :: T.Text -> Either T.Text Module +parseModule = left T.pack . pModule . Layout.resolveLayout True . tokens . tryExtractMarkdownCodeBlocks "rzk" -parseModuleRzk :: String -> Either String Module -parseModuleRzk = pModule . Layout.resolveLayout True . tokens +parseModuleRzk :: T.Text -> Either T.Text Module +parseModuleRzk = left T.pack . pModule . Layout.resolveLayout True . tokens -parseModuleFile :: FilePath -> IO (Either String Module) +parseModuleFile :: FilePath -> IO (Either T.Text Module) parseModuleFile path = do source <- readFile path - parseModuleSafe source + parseModuleSafe (T.pack source) -parseTerm :: String -> Either String Term -parseTerm = pTerm . tokens +parseTerm :: T.Text -> Either T.Text Term +parseTerm = left T.pack . pTerm . tokens -tryExtractMarkdownCodeBlocks :: String -> String -> String +tryExtractMarkdownCodeBlocks :: T.Text -> T.Text -> T.Text tryExtractMarkdownCodeBlocks alias input - | ("```" <> alias <> "\n") `List.isInfixOf` input = extractMarkdownCodeBlocks alias input + | ("```" <> alias <> "\n") `T.isInfixOf` input = extractMarkdownCodeBlocks alias input | otherwise = input -data LineType = NonCode | CodeOf String +data LineType = NonCode | CodeOf T.Text -- | Extract code for a given alias (e.g. "rzk" or "haskell") from a Markdown file -- by replacing any lines that do not belong to the code in that language with blank lines. @@ -86,7 +87,7 @@ data LineType = NonCode | CodeOf String -- := U -- ``` -- asda --- >>> putStrLn $ extractMarkdownCodeBlocks "rzk" example +-- >>> putStrLn $ T.unpack $ extractMarkdownCodeBlocks "rzk" example -- -- -- #lang rzk-1 @@ -98,8 +99,8 @@ data LineType = NonCode | CodeOf String -- -- -- -extractMarkdownCodeBlocks :: String -> String -> String -extractMarkdownCodeBlocks alias = unlines . blankNonCode NonCode . map trim . lines +extractMarkdownCodeBlocks :: T.Text -> T.Text -> T.Text +extractMarkdownCodeBlocks alias = T.unlines . blankNonCode NonCode . map trim . T.lines where blankNonCode _prevType [] = [] blankNonCode prevType (line : lines_) = @@ -110,19 +111,20 @@ extractMarkdownCodeBlocks alias = unlines . blankNonCode NonCode . map trim . li | otherwise -> "" : blankNonCode prevType lines_ NonCode -> "" : blankNonCode (identifyCodeBlockStart line) lines_ - trim = List.dropWhileEnd isSpace + trim = T.dropWhileEnd isSpace -identifyCodeBlockStart :: String -> LineType +identifyCodeBlockStart :: T.Text -> LineType identifyCodeBlockStart line | prefix == "```" = - case words suffix of - [] -> CodeOf "text" -- default to text - ('{':'.':lang) : _options -> CodeOf lang -- ``` {.rzk ... - "{" : ('.':lang) : _options -> CodeOf lang -- ``` { .rzk ... - lang : _options -> CodeOf lang -- ```rzk ... + -- TODO: find if there is a better way to pattern match than pack/unpack + case map T.unpack $ T.words suffix of + [] -> CodeOf "text" -- default to text + ('{': '.' : lang) : _options -> CodeOf (T.pack lang) -- ``` {.rzk ... + "{" : ('.':lang) : _options -> CodeOf (T.pack lang) -- ``` { .rzk ... + lang : _options -> CodeOf (T.pack lang) -- ```rzk ... | otherwise = NonCode where - (prefix, suffix) = List.splitAt 3 line + (prefix, suffix) = T.splitAt 3 line -- * Making BNFC resolveLayout safer diff --git a/rzk/src/Language/Rzk/Syntax/Abs.hs b/rzk/src/Language/Rzk/Syntax/Abs.hs index ad4c07fbe..cd11e9bfc 100644 --- a/rzk/src/Language/Rzk/Syntax/Abs.hs +++ b/rzk/src/Language/Rzk/Syntax/Abs.hs @@ -1,12 +1,11 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE OverloadedStrings #-} -- | The abstract syntax of language Syntax. @@ -20,28 +19,27 @@ import qualified Prelude as C ) import qualified Data.String -import qualified Data.Data as C (Data, Typeable) -import qualified GHC.Generics as C (Generic) +import qualified Data.Text type Module = Module' BNFC'Position data Module' a = Module a (LanguageDecl' a) [Command' a] - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type HoleIdent = HoleIdent' BNFC'Position data HoleIdent' a = HoleIdent a HoleIdentToken - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type VarIdent = VarIdent' BNFC'Position data VarIdent' a = VarIdent a VarIdentToken - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type LanguageDecl = LanguageDecl' BNFC'Position data LanguageDecl' a = LanguageDecl a (Language' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Language = Language' BNFC'Position data Language' a = Rzk1 a - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Command = Command' BNFC'Position data Command' a @@ -56,23 +54,23 @@ data Command' a | CommandSection a (SectionName' a) | CommandSectionEnd a (SectionName' a) | CommandDefine a (VarIdent' a) (DeclUsedVars' a) [Param' a] (Term' a) (Term' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type DeclUsedVars = DeclUsedVars' BNFC'Position data DeclUsedVars' a = DeclUsedVars a [VarIdent' a] - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type SectionName = SectionName' BNFC'Position data SectionName' a = NoSectionName a | SomeSectionName a (VarIdent' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Pattern = Pattern' BNFC'Position data Pattern' a = PatternUnit a | PatternVar a (VarIdent' a) | PatternPair a (Pattern' a) (Pattern' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Param = Param' BNFC'Position data Param' a @@ -80,7 +78,7 @@ data Param' a | ParamPatternType a [Pattern' a] (Term' a) | ParamPatternShape a [Pattern' a] (Term' a) (Term' a) | ParamPatternShapeDeprecated a (Pattern' a) (Term' a) (Term' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type ParamDecl = ParamDecl' BNFC'Position data ParamDecl' a @@ -89,13 +87,13 @@ data ParamDecl' a | ParamTermShape a (Term' a) (Term' a) (Term' a) | ParamTermTypeDeprecated a (Pattern' a) (Term' a) | ParamVarShapeDeprecated a (Pattern' a) (Term' a) (Term' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Restriction = Restriction' BNFC'Position data Restriction' a = Restriction a (Term' a) (Term' a) | ASCII_Restriction a (Term' a) (Term' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) type Term = Term' BNFC'Position data Term' a @@ -152,7 +150,7 @@ data Term' a | ASCII_TypeExtensionDeprecated a (ParamDecl' a) (Term' a) | ASCII_First a (Term' a) | ASCII_Second a (Term' a) - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable, C.Data, C.Typeable, C.Generic) + deriving (C.Eq, C.Ord, C.Show, C.Read, C.Functor, C.Foldable, C.Traversable) commandPostulateNoParams :: a -> VarIdent' a -> DeclUsedVars' a -> Term' a -> Command' a commandPostulateNoParams = \ _a x vars ty -> CommandPostulate _a x vars [] ty @@ -184,11 +182,11 @@ ascii_CubeProduct = \ _a l r -> CubeProduct _a l r unicode_TypeSigmaAlt :: a -> Pattern' a -> Term' a -> Term' a -> Term' a unicode_TypeSigmaAlt = \ _a pat fst snd -> TypeSigma _a pat fst snd -newtype VarIdentToken = VarIdentToken String - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Data, C.Typeable, C.Generic, Data.String.IsString) +newtype VarIdentToken = VarIdentToken Data.Text.Text + deriving (C.Eq, C.Ord, C.Show, C.Read, Data.String.IsString) -newtype HoleIdentToken = HoleIdentToken String - deriving (C.Eq, C.Ord, C.Show, C.Read, C.Data, C.Typeable, C.Generic, Data.String.IsString) +newtype HoleIdentToken = HoleIdentToken Data.Text.Text + deriving (C.Eq, C.Ord, C.Show, C.Read, Data.String.IsString) -- | Start position (line, column) of something. diff --git a/rzk/src/Language/Rzk/Syntax/Doc.txt b/rzk/src/Language/Rzk/Syntax/Doc.txt index ea571c2c7..c9fcc98ec 100644 --- a/rzk/src/Language/Rzk/Syntax/Doc.txt +++ b/rzk/src/Language/Rzk/Syntax/Doc.txt @@ -1,186 +1,186 @@ -The Language Syntax -BNF Converter - - -%Process by txt2tags to generate html or latex - - - -This document was automatically generated by the //BNF-Converter//. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place). - -==The lexical structure of Syntax== - -===Literals=== -String literals //String// have the form -``"``//x//``"``}, where //x// is any sequence of any characters -except ``"`` unless preceded by ``\``. - - - - - -VarIdentToken literals are recognized by the regular expression -`````(char - [" - !"#(),-.;<>?[\]{|}"]) (char - [" - "#(),;<>[\]{|}"])*````` - -HoleIdentToken literals are recognized by the regular expression -`````'?'````` - - -===Reserved words and symbols=== -The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions. - -The reserved words used in Syntax are the following: - | ``BOT`` | ``CUBE`` | ``Sigma`` | ``TOP`` - | ``TOPE`` | ``U`` | ``Unit`` | ``as`` - | ``first`` | ``idJ`` | ``recBOT`` | ``recOR`` - | ``refl`` | ``second`` | ``unit`` | ``uses`` - | ``Σ`` | ``π₁`` | ``π₂`` | - -The symbols used in Syntax are the following: - | #lang | ; | rzk-1 | #set-option - | = | #unset-option | #check | : - | #compute | #compute-whnf | #compute-nf | #postulate - | #assume | #variable | #variables | #section - | #end | #define | := | #def - | ( | ) | , | | - | { | } | ↦ | 1 - | *₁ | 2 | 0₂ | 1₂ - | × | ⊤ | ⊥ | ≡ - | ≤ | ∧ | ∨ | → - | =_{ | [ | ] | < - | > | \ | refl_{ | * - | *_1 | 0_2 | 1_2 | === - | <= | /\ | \/ | -> - | |-> | ∑ | | - -===Comments=== -Single-line comments begin with --.Multiple-line comments are enclosed with {- and -}. - -==The syntactic structure of Syntax== -Non-terminals are enclosed between < and >. -The symbols -> (production), **|** (union) -and **eps** (empty rule) belong to the BNF notation. -All other symbols are terminals. - - | //Module// | -> | //LanguageDecl// //[Command]// - | //HoleIdent// | -> | //HoleIdentToken// - | //VarIdent// | -> | //VarIdentToken// - | //[VarIdent]// | -> | //VarIdent// - | | **|** | //VarIdent// //[VarIdent]// - | //LanguageDecl// | -> | ``#lang`` //Language// ``;`` - | //Language// | -> | ``rzk-1`` - | //Command// | -> | ``#set-option`` //String// ``=`` //String// - | | **|** | ``#unset-option`` //String// - | | **|** | ``#check`` //Term// ``:`` //Term// - | | **|** | ``#compute`` //Term// - | | **|** | ``#compute-whnf`` //Term// - | | **|** | ``#compute-nf`` //Term// - | | **|** | ``#postulate`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// - | | **|** | ``#postulate`` //VarIdent// //DeclUsedVars// ``:`` //Term// - | | **|** | ``#assume`` //[VarIdent]// ``:`` //Term// - | | **|** | ``#variable`` //VarIdent// ``:`` //Term// - | | **|** | ``#variables`` //[VarIdent]// ``:`` //Term// - | | **|** | ``#section`` //SectionName// - | | **|** | ``#end`` //SectionName// - | | **|** | ``#define`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// ``:=`` //Term// - | | **|** | ``#define`` //VarIdent// //DeclUsedVars// ``:`` //Term// ``:=`` //Term// - | | **|** | ``#def`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// ``:=`` //Term// - | | **|** | ``#def`` //VarIdent// //DeclUsedVars// ``:`` //Term// ``:=`` //Term// - | //[Command]// | -> | **eps** - | | **|** | //Command// ``;`` //[Command]// - | //DeclUsedVars// | -> | ``uses`` ``(`` //[VarIdent]// ``)`` - | | **|** | **eps** - | //SectionName// | -> | **eps** - | | **|** | //VarIdent// - | //Pattern// | -> | ``unit`` - | | **|** | //VarIdent// - | | **|** | ``(`` //Pattern// ``,`` //Pattern// ``)`` - | //[Pattern]// | -> | //Pattern// - | | **|** | //Pattern// //[Pattern]// - | //Param// | -> | //Pattern// - | | **|** | ``(`` //[Pattern]// ``:`` //Term// ``)`` - | | **|** | ``(`` //[Pattern]// ``:`` //Term// ``|`` //Term// ``)`` - | | **|** | ``{`` //Pattern// ``:`` //Term// ``|`` //Term// ``}`` - | //[Param]// | -> | //Param// - | | **|** | //Param// //[Param]// - | //ParamDecl// | -> | //Term6// - | | **|** | ``(`` //Term// ``:`` //Term// ``)`` - | | **|** | ``(`` //Term// ``:`` //Term// ``|`` //Term// ``)`` - | | **|** | ``{`` //Pattern// ``:`` //Term// ``}`` - | | **|** | ``{`` ``(`` //Pattern// ``:`` //Term// ``)`` ``|`` //Term// ``}`` - | | **|** | ``{`` //Pattern// ``:`` //Term// ``|`` //Term// ``}`` - | //Restriction// | -> | //Term// ``↦`` //Term// - | | **|** | //Term// ``|->`` //Term// - | //[Restriction]// | -> | //Restriction// - | | **|** | //Restriction// ``,`` //[Restriction]// - | //Term7// | -> | ``U`` - | | **|** | ``CUBE`` - | | **|** | ``TOPE`` - | | **|** | ``1`` - | | **|** | ``*₁`` - | | **|** | ``2`` - | | **|** | ``0₂`` - | | **|** | ``1₂`` - | | **|** | ``⊤`` - | | **|** | ``⊥`` - | | **|** | ``recBOT`` - | | **|** | ``recOR`` ``(`` //[Restriction]// ``)`` - | | **|** | ``recOR`` ``(`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``)`` - | | **|** | ``Unit`` - | | **|** | ``<`` //ParamDecl// ``→`` //Term// ``>`` - | | **|** | ``(`` //Term// ``,`` //Term// ``)`` - | | **|** | ``unit`` - | | **|** | ``refl`` - | | **|** | ``refl_{`` //Term// ``}`` - | | **|** | ``refl_{`` //Term// ``:`` //Term// ``}`` - | | **|** | ``idJ`` ``(`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``)`` - | | **|** | //HoleIdent// - | | **|** | //VarIdent// - | | **|** | ``(`` //Term// ``)`` - | | **|** | ``*_1`` - | | **|** | ``0_2`` - | | **|** | ``1_2`` - | | **|** | ``TOP`` - | | **|** | ``BOT`` - | | **|** | ``<`` //ParamDecl// ``->`` //Term// ``>`` - | //Term5// | -> | //Term5// ``×`` //Term6// - | | **|** | //Term6// - | | **|** | //Term5// ``*`` //Term6// - | //Term4// | -> | //Term5// ``≡`` //Term5// - | | **|** | //Term5// ``≤`` //Term5// - | | **|** | //Term5// - | | **|** | //Term5// ``===`` //Term5// - | | **|** | //Term5// ``<=`` //Term5// - | //Term3// | -> | //Term4// ``∧`` //Term3// - | | **|** | //Term4// - | | **|** | //Term4// ``/\`` //Term3// - | //Term2// | -> | //Term3// ``∨`` //Term2// - | | **|** | //Term3// - | | **|** | //Term3// ``\/`` //Term2// - | //Term1// | -> | //ParamDecl// ``→`` //Term1// - | | **|** | ``Σ`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// - | | **|** | //Term2// ``=_{`` //Term// ``}`` //Term2// - | | **|** | //Term2// ``=`` //Term2// - | | **|** | ``\`` //[Param]// ``→`` //Term1// - | | **|** | //Term2// - | | **|** | //ParamDecl// ``->`` //Term1// - | | **|** | ``Sigma`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// - | | **|** | ``\`` //[Param]// ``->`` //Term1// - | | **|** | ``∑`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// - | //Term6// | -> | //Term6// ``[`` //[Restriction]// ``]`` - | | **|** | //Term6// //Term7// - | | **|** | ``π₁`` //Term7// - | | **|** | ``π₂`` //Term7// - | | **|** | //Term7// - | | **|** | ``first`` //Term7// - | | **|** | ``second`` //Term7// - | //Term// | -> | //Term2// ``as`` //Term1// - | | **|** | //Term1// - | //[Term]// | -> | //Term// - | | **|** | //Term// ``,`` //[Term]// - - - -%% File generated by the BNF Converter (bnfc 2.9.5). +The Language Syntax +BNF Converter + + +%Process by txt2tags to generate html or latex + + + +This document was automatically generated by the //BNF-Converter//. It was generated together with the lexer, the parser, and the abstract syntax module, which guarantees that the document matches with the implementation of the language (provided no hand-hacking has taken place). + +==The lexical structure of Syntax== + +===Literals=== +String literals //String// have the form +``"``//x//``"``}, where //x// is any sequence of any characters +except ``"`` unless preceded by ``\``. + + + + + +VarIdentToken literals are recognized by the regular expression +`````(char - [" + !"#(),-.;<>?[\]{|}"]) (char - [" + "#(),;<>[\]{|}"])*````` + +HoleIdentToken literals are recognized by the regular expression +`````'?'````` + + +===Reserved words and symbols=== +The set of reserved words is the set of terminals appearing in the grammar. Those reserved words that consist of non-letter characters are called symbols, and they are treated in a different way from those that are similar to identifiers. The lexer follows rules familiar from languages like Haskell, C, and Java, including longest match and spacing conventions. + +The reserved words used in Syntax are the following: + | ``BOT`` | ``CUBE`` | ``Sigma`` | ``TOP`` + | ``TOPE`` | ``U`` | ``Unit`` | ``as`` + | ``first`` | ``idJ`` | ``recBOT`` | ``recOR`` + | ``refl`` | ``second`` | ``unit`` | ``uses`` + | ``Σ`` | ``π₁`` | ``π₂`` | + +The symbols used in Syntax are the following: + | #lang | ; | rzk-1 | #set-option + | = | #unset-option | #check | : + | #compute | #compute-whnf | #compute-nf | #postulate + | #assume | #variable | #variables | #section + | #end | #define | := | #def + | ( | ) | , | | + | { | } | ↦ | 1 + | *₁ | 2 | 0₂ | 1₂ + | × | ⊤ | ⊥ | ≡ + | ≤ | ∧ | ∨ | → + | =_{ | [ | ] | < + | > | \ | refl_{ | * + | *_1 | 0_2 | 1_2 | === + | <= | /\ | \/ | -> + | |-> | ∑ | | + +===Comments=== +Single-line comments begin with --.Multiple-line comments are enclosed with {- and -}. + +==The syntactic structure of Syntax== +Non-terminals are enclosed between < and >. +The symbols -> (production), **|** (union) +and **eps** (empty rule) belong to the BNF notation. +All other symbols are terminals. + + | //Module// | -> | //LanguageDecl// //[Command]// + | //HoleIdent// | -> | //HoleIdentToken// + | //VarIdent// | -> | //VarIdentToken// + | //[VarIdent]// | -> | //VarIdent// + | | **|** | //VarIdent// //[VarIdent]// + | //LanguageDecl// | -> | ``#lang`` //Language// ``;`` + | //Language// | -> | ``rzk-1`` + | //Command// | -> | ``#set-option`` //String// ``=`` //String// + | | **|** | ``#unset-option`` //String// + | | **|** | ``#check`` //Term// ``:`` //Term// + | | **|** | ``#compute`` //Term// + | | **|** | ``#compute-whnf`` //Term// + | | **|** | ``#compute-nf`` //Term// + | | **|** | ``#postulate`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// + | | **|** | ``#postulate`` //VarIdent// //DeclUsedVars// ``:`` //Term// + | | **|** | ``#assume`` //[VarIdent]// ``:`` //Term// + | | **|** | ``#variable`` //VarIdent// ``:`` //Term// + | | **|** | ``#variables`` //[VarIdent]// ``:`` //Term// + | | **|** | ``#section`` //SectionName// + | | **|** | ``#end`` //SectionName// + | | **|** | ``#define`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// ``:=`` //Term// + | | **|** | ``#define`` //VarIdent// //DeclUsedVars// ``:`` //Term// ``:=`` //Term// + | | **|** | ``#def`` //VarIdent// //DeclUsedVars// //[Param]// ``:`` //Term// ``:=`` //Term// + | | **|** | ``#def`` //VarIdent// //DeclUsedVars// ``:`` //Term// ``:=`` //Term// + | //[Command]// | -> | **eps** + | | **|** | //Command// ``;`` //[Command]// + | //DeclUsedVars// | -> | ``uses`` ``(`` //[VarIdent]// ``)`` + | | **|** | **eps** + | //SectionName// | -> | **eps** + | | **|** | //VarIdent// + | //Pattern// | -> | ``unit`` + | | **|** | //VarIdent// + | | **|** | ``(`` //Pattern// ``,`` //Pattern// ``)`` + | //[Pattern]// | -> | //Pattern// + | | **|** | //Pattern// //[Pattern]// + | //Param// | -> | //Pattern// + | | **|** | ``(`` //[Pattern]// ``:`` //Term// ``)`` + | | **|** | ``(`` //[Pattern]// ``:`` //Term// ``|`` //Term// ``)`` + | | **|** | ``{`` //Pattern// ``:`` //Term// ``|`` //Term// ``}`` + | //[Param]// | -> | //Param// + | | **|** | //Param// //[Param]// + | //ParamDecl// | -> | //Term6// + | | **|** | ``(`` //Term// ``:`` //Term// ``)`` + | | **|** | ``(`` //Term// ``:`` //Term// ``|`` //Term// ``)`` + | | **|** | ``{`` //Pattern// ``:`` //Term// ``}`` + | | **|** | ``{`` ``(`` //Pattern// ``:`` //Term// ``)`` ``|`` //Term// ``}`` + | | **|** | ``{`` //Pattern// ``:`` //Term// ``|`` //Term// ``}`` + | //Restriction// | -> | //Term// ``↦`` //Term// + | | **|** | //Term// ``|->`` //Term// + | //[Restriction]// | -> | //Restriction// + | | **|** | //Restriction// ``,`` //[Restriction]// + | //Term7// | -> | ``U`` + | | **|** | ``CUBE`` + | | **|** | ``TOPE`` + | | **|** | ``1`` + | | **|** | ``*₁`` + | | **|** | ``2`` + | | **|** | ``0₂`` + | | **|** | ``1₂`` + | | **|** | ``⊤`` + | | **|** | ``⊥`` + | | **|** | ``recBOT`` + | | **|** | ``recOR`` ``(`` //[Restriction]// ``)`` + | | **|** | ``recOR`` ``(`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``)`` + | | **|** | ``Unit`` + | | **|** | ``<`` //ParamDecl// ``→`` //Term// ``>`` + | | **|** | ``(`` //Term// ``,`` //Term// ``)`` + | | **|** | ``unit`` + | | **|** | ``refl`` + | | **|** | ``refl_{`` //Term// ``}`` + | | **|** | ``refl_{`` //Term// ``:`` //Term// ``}`` + | | **|** | ``idJ`` ``(`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``,`` //Term// ``)`` + | | **|** | //HoleIdent// + | | **|** | //VarIdent// + | | **|** | ``(`` //Term// ``)`` + | | **|** | ``*_1`` + | | **|** | ``0_2`` + | | **|** | ``1_2`` + | | **|** | ``TOP`` + | | **|** | ``BOT`` + | | **|** | ``<`` //ParamDecl// ``->`` //Term// ``>`` + | //Term5// | -> | //Term5// ``×`` //Term6// + | | **|** | //Term6// + | | **|** | //Term5// ``*`` //Term6// + | //Term4// | -> | //Term5// ``≡`` //Term5// + | | **|** | //Term5// ``≤`` //Term5// + | | **|** | //Term5// + | | **|** | //Term5// ``===`` //Term5// + | | **|** | //Term5// ``<=`` //Term5// + | //Term3// | -> | //Term4// ``∧`` //Term3// + | | **|** | //Term4// + | | **|** | //Term4// ``/\`` //Term3// + | //Term2// | -> | //Term3// ``∨`` //Term2// + | | **|** | //Term3// + | | **|** | //Term3// ``\/`` //Term2// + | //Term1// | -> | //ParamDecl// ``→`` //Term1// + | | **|** | ``Σ`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// + | | **|** | //Term2// ``=_{`` //Term// ``}`` //Term2// + | | **|** | //Term2// ``=`` //Term2// + | | **|** | ``\`` //[Param]// ``→`` //Term1// + | | **|** | //Term2// + | | **|** | //ParamDecl// ``->`` //Term1// + | | **|** | ``Sigma`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// + | | **|** | ``\`` //[Param]// ``->`` //Term1// + | | **|** | ``∑`` ``(`` //Pattern// ``:`` //Term// ``)`` ``,`` //Term1// + | //Term6// | -> | //Term6// ``[`` //[Restriction]// ``]`` + | | **|** | //Term6// //Term7// + | | **|** | ``π₁`` //Term7// + | | **|** | ``π₂`` //Term7// + | | **|** | //Term7// + | | **|** | ``first`` //Term7// + | | **|** | ``second`` //Term7// + | //Term// | -> | //Term2// ``as`` //Term1// + | | **|** | //Term1// + | //[Term]// | -> | //Term// + | | **|** | //Term// ``,`` //[Term]// + + + +%% File generated by the BNF Converter (bnfc 2.9.6). diff --git a/rzk/src/Language/Rzk/Syntax/ErrM.hs b/rzk/src/Language/Rzk/Syntax/ErrM.hs index dfdbb62f2..0c5e84b8c 100644 --- a/rzk/src/Language/Rzk/Syntax/ErrM.hs +++ b/rzk/src/Language/Rzk/Syntax/ErrM.hs @@ -1,4 +1,4 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). {-# LANGUAGE CPP #-} diff --git a/rzk/src/Language/Rzk/Syntax/Layout.hs b/rzk/src/Language/Rzk/Syntax/Layout.hs index a1b7e9f27..fc83e7a40 100644 --- a/rzk/src/Language/Rzk/Syntax/Layout.hs +++ b/rzk/src/Language/Rzk/Syntax/Layout.hs @@ -1,4 +1,4 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} diff --git a/rzk/src/Language/Rzk/Syntax/Lex.hs b/rzk/src/Language/Rzk/Syntax/Lex.hs index ebf16f985..8fe55c7a1 100644 --- a/rzk/src/Language/Rzk/Syntax/Lex.hs +++ b/rzk/src/Language/Rzk/Syntax/Lex.hs @@ -11,6 +11,7 @@ module Language.Rzk.Syntax.Lex where import Prelude +import qualified Data.Text import qualified Data.Bits import Data.Char (ord) import Data.Function (on) @@ -228,234 +229,6 @@ alex_actions = array (0 :: Int, 29) , (0,alex_action_3) ] -{-# LINE 64 "Language/Rzk/Syntax/Lex.x" #-} --- | Create a token with position. -tok :: (String -> Tok) -> (Posn -> String -> Token) -tok f p = PT p . f - --- | Token without position. -data Tok - = TK {-# UNPACK #-} !TokSymbol -- ^ Reserved word or symbol. - | TL !String -- ^ String literal. - | TI !String -- ^ Integer literal. - | TV !String -- ^ Identifier. - | TD !String -- ^ Float literal. - | TC !String -- ^ Character literal. - | T_VarIdentToken !String - | T_HoleIdentToken !String - deriving (Eq, Show, Ord) - --- | Smart constructor for 'Tok' for the sake of backwards compatibility. -pattern TS :: String -> Int -> Tok -pattern TS t i = TK (TokSymbol t i) - --- | Keyword or symbol tokens have a unique ID. -data TokSymbol = TokSymbol - { tsText :: String - -- ^ Keyword or symbol text. - , tsID :: !Int - -- ^ Unique ID. - } deriving (Show) - --- | Keyword/symbol equality is determined by the unique ID. -instance Eq TokSymbol where (==) = (==) `on` tsID - --- | Keyword/symbol ordering is determined by the unique ID. -instance Ord TokSymbol where compare = compare `on` tsID - --- | Token with position. -data Token - = PT Posn Tok - | Err Posn - deriving (Eq, Show, Ord) - --- | Pretty print a position. -printPosn :: Posn -> String -printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c - --- | Pretty print the position of the first token in the list. -tokenPos :: [Token] -> String -tokenPos (t:_) = printPosn (tokenPosn t) -tokenPos [] = "end of file" - --- | Get the position of a token. -tokenPosn :: Token -> Posn -tokenPosn (PT p _) = p -tokenPosn (Err p) = p - --- | Get line and column of a token. -tokenLineCol :: Token -> (Int, Int) -tokenLineCol = posLineCol . tokenPosn - --- | Get line and column of a position. -posLineCol :: Posn -> (Int, Int) -posLineCol (Pn _ l c) = (l,c) - --- | Convert a token into "position token" form. -mkPosToken :: Token -> ((Int, Int), String) -mkPosToken t = (tokenLineCol t, tokenText t) - --- | Convert a token to its text. -tokenText :: Token -> String -tokenText t = case t of - PT _ (TS s _) -> s - PT _ (TL s) -> show s - PT _ (TI s) -> s - PT _ (TV s) -> s - PT _ (TD s) -> s - PT _ (TC s) -> s - Err _ -> "#error" - PT _ (T_VarIdentToken s) -> s - PT _ (T_HoleIdentToken s) -> s - --- | Convert a token to a string. -prToken :: Token -> String -prToken t = tokenText t - --- | Finite map from text to token organized as binary search tree. -data BTree - = N -- ^ Nil (leaf). - | B String Tok BTree BTree - -- ^ Binary node. - deriving (Show) - --- | Convert potential keyword into token or use fallback conversion. -eitherResIdent :: (String -> Tok) -> String -> Tok -eitherResIdent tv s = treeFind resWords - where - treeFind N = tv s - treeFind (B a t left right) = - case compare s a of - LT -> treeFind left - GT -> treeFind right - EQ -> t - --- | The keywords and symbols of the language organized as binary search tree. -resWords :: BTree -resWords = - b "BOT" 39 - (b "*\8321" 20 - (b "#postulate" 10 - (b "#compute-whnf" 5 - (b "#compute" 3 - (b "#check" 2 (b "#assume" 1 N N) N) (b "#compute-nf" 4 N N)) - (b "#end" 8 (b "#define" 7 (b "#def" 6 N N) N) (b "#lang" 9 N N))) - (b "#variables" 15 - (b "#unset-option" 13 - (b "#set-option" 12 (b "#section" 11 N N) N) - (b "#variable" 14 N N)) - (b "*" 18 (b ")" 17 (b "(" 16 N N) N) (b "*_1" 19 N N)))) - (b ":" 30 - (b "0\8322" 25 - (b "/\\" 23 (b "->" 22 (b "," 21 N N) N) (b "0_2" 24 N N)) - (b "1\8322" 28 (b "1_2" 27 (b "1" 26 N N) N) (b "2" 29 N N))) - (b "=" 35 - (b "<" 33 (b ";" 32 (b ":=" 31 N N) N) (b "<=" 34 N N)) - (b "=_{" 37 (b "===" 36 N N) (b ">" 38 N N))))) - (b "unit" 59 - (b "]" 49 - (b "U" 44 - (b "TOP" 42 (b "Sigma" 41 (b "CUBE" 40 N N) N) (b "TOPE" 43 N N)) - (b "\\" 47 (b "[" 46 (b "Unit" 45 N N) N) (b "\\/" 48 N N))) - (b "recOR" 54 - (b "idJ" 52 (b "first" 51 (b "as" 50 N N) N) (b "recBOT" 53 N N)) - (b "rzk-1" 57 - (b "refl_{" 56 (b "refl" 55 N N) N) (b "second" 58 N N)))) - (b "\8594" 69 - (b "}" 64 - (b "|" 62 (b "{" 61 (b "uses" 60 N N) N) (b "|->" 63 N N)) - (b "\960\8321" 67 - (b "\931" 66 (b "\215" 65 N N) N) (b "\960\8322" 68 N N))) - (b "\8801" 74 - (b "\8743" 72 - (b "\8721" 71 (b "\8614" 70 N N) N) (b "\8744" 73 N N)) - (b "\8868" 76 (b "\8804" 75 N N) (b "\8869" 77 N N))))) - where - b s n = B bs (TS bs n) - where - bs = s - --- | Unquote string literal. -unescapeInitTail :: String -> String -unescapeInitTail = id . unesc . tail . id - where - unesc s = case s of - '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs - '\\':'n':cs -> '\n' : unesc cs - '\\':'t':cs -> '\t' : unesc cs - '\\':'r':cs -> '\r' : unesc cs - '\\':'f':cs -> '\f' : unesc cs - '"':[] -> [] - c:cs -> c : unesc cs - _ -> [] - -------------------------------------------------------------------- --- Alex wrapper code. --- A modified "posn" wrapper. -------------------------------------------------------------------- - -data Posn = Pn !Int !Int !Int - deriving (Eq, Show, Ord) - -alexStartPos :: Posn -alexStartPos = Pn 0 1 1 - -alexMove :: Posn -> Char -> Posn -alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) -alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 -alexMove (Pn a l c) _ = Pn (a+1) l (c+1) - -type Byte = Word8 - -type AlexInput = (Posn, -- current position, - Char, -- previous char - [Byte], -- pending bytes on the current char - String) -- current input string - -tokens :: String -> [Token] -tokens str = go (alexStartPos, '\n', [], str) - where - go :: AlexInput -> [Token] - go inp@(pos, _, _, str) = - case alexScan inp 0 of - AlexEOF -> [] - AlexError (pos, _, _, _) -> [Err pos] - AlexSkip inp' len -> go inp' - AlexToken inp' len act -> act pos (take len str) : (go inp') - -alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) -alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s)) -alexGetByte (p, _, [], s) = - case s of - [] -> Nothing - (c:s) -> - let p' = alexMove p c - (b:bs) = utf8Encode c - in p' `seq` Just (b, (p', c, bs, s)) - -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (p, c, bs, s) = c - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -utf8Encode :: Char -> [Word8] -utf8Encode = map fromIntegral . go . ord - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) - , 0x80 + oc Data.Bits..&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) - , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] alex_action_3 = tok (eitherResIdent TV) alex_action_4 = tok (eitherResIdent T_VarIdentToken) alex_action_5 = tok (eitherResIdent T_HoleIdentToken) @@ -627,9 +400,10 @@ alex_scan_tkn user__ orig_input len input__ s last_acc = let base = alexIndexInt32OffAddr alex_base s offset = PLUS(base,ord_c) - check = alexIndexInt16OffAddr alex_check offset - new_s = if GTE(offset,ILIT(0)) && EQ(check,ord_c) + new_s = if GTE(offset,ILIT(0)) + && let check = alexIndexInt16OffAddr alex_check offset + in EQ(check,ord_c) then alexIndexInt16OffAddr alex_table offset else alexIndexInt16OffAddr alex_deflt s in @@ -702,3 +476,231 @@ alexRightContext IBOX(sc) user__ _ _ input__ = -- match when checking the right context, just -- the first match will do. #endif +{-# LINE 65 ".\Language\Rzk\Syntax\Lex.x" #-} +-- | Create a token with position. +tok :: (Data.Text.Text -> Tok) -> (Posn -> Data.Text.Text -> Token) +tok f p = PT p . f + +-- | Token without position. +data Tok + = TK {-# UNPACK #-} !TokSymbol -- ^ Reserved word or symbol. + | TL !Data.Text.Text -- ^ String literal. + | TI !Data.Text.Text -- ^ Integer literal. + | TV !Data.Text.Text -- ^ Identifier. + | TD !Data.Text.Text -- ^ Float literal. + | TC !Data.Text.Text -- ^ Character literal. + | T_VarIdentToken !Data.Text.Text + | T_HoleIdentToken !Data.Text.Text + deriving (Eq, Show, Ord) + +-- | Smart constructor for 'Tok' for the sake of backwards compatibility. +pattern TS :: Data.Text.Text -> Int -> Tok +pattern TS t i = TK (TokSymbol t i) + +-- | Keyword or symbol tokens have a unique ID. +data TokSymbol = TokSymbol + { tsText :: Data.Text.Text + -- ^ Keyword or symbol text. + , tsID :: !Int + -- ^ Unique ID. + } deriving (Show) + +-- | Keyword/symbol equality is determined by the unique ID. +instance Eq TokSymbol where (==) = (==) `on` tsID + +-- | Keyword/symbol ordering is determined by the unique ID. +instance Ord TokSymbol where compare = compare `on` tsID + +-- | Token with position. +data Token + = PT Posn Tok + | Err Posn + deriving (Eq, Show, Ord) + +-- | Pretty print a position. +printPosn :: Posn -> String +printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c + +-- | Pretty print the position of the first token in the list. +tokenPos :: [Token] -> String +tokenPos (t:_) = printPosn (tokenPosn t) +tokenPos [] = "end of file" + +-- | Get the position of a token. +tokenPosn :: Token -> Posn +tokenPosn (PT p _) = p +tokenPosn (Err p) = p + +-- | Get line and column of a token. +tokenLineCol :: Token -> (Int, Int) +tokenLineCol = posLineCol . tokenPosn + +-- | Get line and column of a position. +posLineCol :: Posn -> (Int, Int) +posLineCol (Pn _ l c) = (l,c) + +-- | Convert a token into "position token" form. +mkPosToken :: Token -> ((Int, Int), Data.Text.Text) +mkPosToken t = (tokenLineCol t, tokenText t) + +-- | Convert a token to its text. +tokenText :: Token -> Data.Text.Text +tokenText t = case t of + PT _ (TS s _) -> s + PT _ (TL s) -> Data.Text.pack (show s) + PT _ (TI s) -> s + PT _ (TV s) -> s + PT _ (TD s) -> s + PT _ (TC s) -> s + Err _ -> Data.Text.pack "#error" + PT _ (T_VarIdentToken s) -> s + PT _ (T_HoleIdentToken s) -> s + +-- | Convert a token to a string. +prToken :: Token -> String +prToken t = Data.Text.unpack (tokenText t) + +-- | Finite map from text to token organized as binary search tree. +data BTree + = N -- ^ Nil (leaf). + | B Data.Text.Text Tok BTree BTree + -- ^ Binary node. + deriving (Show) + +-- | Convert potential keyword into token or use fallback conversion. +eitherResIdent :: (Data.Text.Text -> Tok) -> Data.Text.Text -> Tok +eitherResIdent tv s = treeFind resWords + where + treeFind N = tv s + treeFind (B a t left right) = + case compare s a of + LT -> treeFind left + GT -> treeFind right + EQ -> t + +-- | The keywords and symbols of the language organized as binary search tree. +resWords :: BTree +resWords = + b "BOT" 39 + (b "*\8321" 20 + (b "#postulate" 10 + (b "#compute-whnf" 5 + (b "#compute" 3 + (b "#check" 2 (b "#assume" 1 N N) N) (b "#compute-nf" 4 N N)) + (b "#end" 8 (b "#define" 7 (b "#def" 6 N N) N) (b "#lang" 9 N N))) + (b "#variables" 15 + (b "#unset-option" 13 + (b "#set-option" 12 (b "#section" 11 N N) N) + (b "#variable" 14 N N)) + (b "*" 18 (b ")" 17 (b "(" 16 N N) N) (b "*_1" 19 N N)))) + (b ":" 30 + (b "0\8322" 25 + (b "/\\" 23 (b "->" 22 (b "," 21 N N) N) (b "0_2" 24 N N)) + (b "1\8322" 28 (b "1_2" 27 (b "1" 26 N N) N) (b "2" 29 N N))) + (b "=" 35 + (b "<" 33 (b ";" 32 (b ":=" 31 N N) N) (b "<=" 34 N N)) + (b "=_{" 37 (b "===" 36 N N) (b ">" 38 N N))))) + (b "unit" 59 + (b "]" 49 + (b "U" 44 + (b "TOP" 42 (b "Sigma" 41 (b "CUBE" 40 N N) N) (b "TOPE" 43 N N)) + (b "\\" 47 (b "[" 46 (b "Unit" 45 N N) N) (b "\\/" 48 N N))) + (b "recOR" 54 + (b "idJ" 52 (b "first" 51 (b "as" 50 N N) N) (b "recBOT" 53 N N)) + (b "rzk-1" 57 + (b "refl_{" 56 (b "refl" 55 N N) N) (b "second" 58 N N)))) + (b "\8594" 69 + (b "}" 64 + (b "|" 62 (b "{" 61 (b "uses" 60 N N) N) (b "|->" 63 N N)) + (b "\960\8321" 67 + (b "\931" 66 (b "\215" 65 N N) N) (b "\960\8322" 68 N N))) + (b "\8801" 74 + (b "\8743" 72 + (b "\8721" 71 (b "\8614" 70 N N) N) (b "\8744" 73 N N)) + (b "\8868" 76 (b "\8804" 75 N N) (b "\8869" 77 N N))))) + where + b s n = B bs (TS bs n) + where + bs = Data.Text.pack s + +-- | Unquote string literal. +unescapeInitTail :: Data.Text.Text -> Data.Text.Text +unescapeInitTail = Data.Text.pack . unesc . tail . Data.Text.unpack + where + unesc s = case s of + '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs + '\\':'n':cs -> '\n' : unesc cs + '\\':'t':cs -> '\t' : unesc cs + '\\':'r':cs -> '\r' : unesc cs + '\\':'f':cs -> '\f' : unesc cs + '"':[] -> [] + c:cs -> c : unesc cs + _ -> [] + +------------------------------------------------------------------- +-- Alex wrapper code. +-- A modified "posn" wrapper. +------------------------------------------------------------------- + +data Posn = Pn !Int !Int !Int + deriving (Eq, Show, Ord) + +alexStartPos :: Posn +alexStartPos = Pn 0 1 1 + +alexMove :: Posn -> Char -> Posn +alexMove (Pn a l c) '\t' = Pn (a+1) l (((c+7) `div` 8)*8+1) +alexMove (Pn a l c) '\n' = Pn (a+1) (l+1) 1 +alexMove (Pn a l c) _ = Pn (a+1) l (c+1) + +type Byte = Word8 + +type AlexInput = (Posn, -- current position, + Char, -- previous char + [Byte], -- pending bytes on the current char + Data.Text.Text) -- current input string + +tokens :: Data.Text.Text -> [Token] +tokens str = go (alexStartPos, '\n', [], str) + where + go :: AlexInput -> [Token] + go inp@(pos, _, _, str) = + case alexScan inp 0 of + AlexEOF -> [] + AlexError (pos, _, _, _) -> [Err pos] + AlexSkip inp' len -> go inp' + AlexToken inp' len act -> act pos (Data.Text.take len str) : (go inp') + +alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) +alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s)) +alexGetByte (p, _, [], s) = + case Data.Text.uncons s of + Nothing -> Nothing + Just (c,s) -> + let p' = alexMove p c + (b:bs) = utf8Encode c + in p' `seq` Just (b, (p', c, bs, s)) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (p, c, bs, s) = c + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +utf8Encode :: Char -> [Word8] +utf8Encode = map fromIntegral . go . ord + where + go oc + | oc <= 0x7f = [oc] + + | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) + , 0x80 + oc Data.Bits..&. 0x3f + ] + + | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) + , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) + , 0x80 + oc Data.Bits..&. 0x3f + ] + | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) + , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) + , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) + , 0x80 + oc Data.Bits..&. 0x3f + ] diff --git a/rzk/src/Language/Rzk/Syntax/Lex.x b/rzk/src/Language/Rzk/Syntax/Lex.x index b8e8c474b..4acf792a4 100644 --- a/rzk/src/Language/Rzk/Syntax/Lex.x +++ b/rzk/src/Language/Rzk/Syntax/Lex.x @@ -1,4 +1,4 @@ --- -*- haskell -*- File generated by the BNF Converter (bnfc 2.9.5). +-- -*- haskell -*- File generated by the BNF Converter (bnfc 2.9.6). -- Lexer definition for use with Alex 3 { @@ -11,6 +11,7 @@ module Language.Rzk.Syntax.Lex where import Prelude +import qualified Data.Text import qualified Data.Bits import Data.Char (ord) import Data.Function (on) @@ -63,28 +64,28 @@ $l $i* { -- | Create a token with position. -tok :: (String -> Tok) -> (Posn -> String -> Token) +tok :: (Data.Text.Text -> Tok) -> (Posn -> Data.Text.Text -> Token) tok f p = PT p . f -- | Token without position. data Tok = TK {-# UNPACK #-} !TokSymbol -- ^ Reserved word or symbol. - | TL !String -- ^ String literal. - | TI !String -- ^ Integer literal. - | TV !String -- ^ Identifier. - | TD !String -- ^ Float literal. - | TC !String -- ^ Character literal. - | T_VarIdentToken !String - | T_HoleIdentToken !String + | TL !Data.Text.Text -- ^ String literal. + | TI !Data.Text.Text -- ^ Integer literal. + | TV !Data.Text.Text -- ^ Identifier. + | TD !Data.Text.Text -- ^ Float literal. + | TC !Data.Text.Text -- ^ Character literal. + | T_VarIdentToken !Data.Text.Text + | T_HoleIdentToken !Data.Text.Text deriving (Eq, Show, Ord) -- | Smart constructor for 'Tok' for the sake of backwards compatibility. -pattern TS :: String -> Int -> Tok +pattern TS :: Data.Text.Text -> Int -> Tok pattern TS t i = TK (TokSymbol t i) -- | Keyword or symbol tokens have a unique ID. data TokSymbol = TokSymbol - { tsText :: String + { tsText :: Data.Text.Text -- ^ Keyword or symbol text. , tsID :: !Int -- ^ Unique ID. @@ -125,35 +126,35 @@ posLineCol :: Posn -> (Int, Int) posLineCol (Pn _ l c) = (l,c) -- | Convert a token into "position token" form. -mkPosToken :: Token -> ((Int, Int), String) +mkPosToken :: Token -> ((Int, Int), Data.Text.Text) mkPosToken t = (tokenLineCol t, tokenText t) -- | Convert a token to its text. -tokenText :: Token -> String +tokenText :: Token -> Data.Text.Text tokenText t = case t of PT _ (TS s _) -> s - PT _ (TL s) -> show s + PT _ (TL s) -> Data.Text.pack (show s) PT _ (TI s) -> s PT _ (TV s) -> s PT _ (TD s) -> s PT _ (TC s) -> s - Err _ -> "#error" + Err _ -> Data.Text.pack "#error" PT _ (T_VarIdentToken s) -> s PT _ (T_HoleIdentToken s) -> s -- | Convert a token to a string. prToken :: Token -> String -prToken t = tokenText t +prToken t = Data.Text.unpack (tokenText t) -- | Finite map from text to token organized as binary search tree. data BTree = N -- ^ Nil (leaf). - | B String Tok BTree BTree + | B Data.Text.Text Tok BTree BTree -- ^ Binary node. deriving (Show) -- | Convert potential keyword into token or use fallback conversion. -eitherResIdent :: (String -> Tok) -> String -> Tok +eitherResIdent :: (Data.Text.Text -> Tok) -> Data.Text.Text -> Tok eitherResIdent tv s = treeFind resWords where treeFind N = tv s @@ -206,11 +207,11 @@ resWords = where b s n = B bs (TS bs n) where - bs = s + bs = Data.Text.pack s -- | Unquote string literal. -unescapeInitTail :: String -> String -unescapeInitTail = id . unesc . tail . id +unescapeInitTail :: Data.Text.Text -> Data.Text.Text +unescapeInitTail = Data.Text.pack . unesc . tail . Data.Text.unpack where unesc s = case s of '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs @@ -243,9 +244,9 @@ type Byte = Word8 type AlexInput = (Posn, -- current position, Char, -- previous char [Byte], -- pending bytes on the current char - String) -- current input string + Data.Text.Text) -- current input string -tokens :: String -> [Token] +tokens :: Data.Text.Text -> [Token] tokens str = go (alexStartPos, '\n', [], str) where go :: AlexInput -> [Token] @@ -254,14 +255,14 @@ tokens str = go (alexStartPos, '\n', [], str) AlexEOF -> [] AlexError (pos, _, _, _) -> [Err pos] AlexSkip inp' len -> go inp' - AlexToken inp' len act -> act pos (take len str) : (go inp') + AlexToken inp' len act -> act pos (Data.Text.take len str) : (go inp') alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s)) alexGetByte (p, _, [], s) = - case s of - [] -> Nothing - (c:s) -> + case Data.Text.uncons s of + Nothing -> Nothing + Just (c,s) -> let p' = alexMove p c (b:bs) = utf8Encode c in p' `seq` Just (b, (p', c, bs, s)) diff --git a/rzk/src/Language/Rzk/Syntax/Par.hs b/rzk/src/Language/Rzk/Syntax/Par.hs index 2ae42973d..32e538a4b 100644 --- a/rzk/src/Language/Rzk/Syntax/Par.hs +++ b/rzk/src/Language/Rzk/Syntax/Par.hs @@ -41,6 +41,7 @@ import Prelude import qualified Language.Rzk.Syntax.Abs import Language.Rzk.Syntax.Lex +import qualified Data.Text import qualified Data.Array as Happy_Data_Array import qualified Data.Bits as Bits import qualified GHC.Exts as Happy_GHC_Exts @@ -429,7 +430,7 @@ happyReduce_26 = happySpecReduce_1 0# happyReduction_26 happyReduction_26 happy_x_1 = case happyOutTok happy_x_1 of { happy_var_1 -> happyIn29 - ((uncurry Language.Rzk.Syntax.Abs.BNFC'Position (tokenLineCol happy_var_1), ((\(PT _ (TL s)) -> s) happy_var_1)) + ((uncurry Language.Rzk.Syntax.Abs.BNFC'Position (tokenLineCol happy_var_1), (Data.Text.unpack ((\(PT _ (TL s)) -> s) happy_var_1))) )} happyReduce_27 = happySpecReduce_1 1# happyReduction_27 @@ -1845,7 +1846,7 @@ happyError ts = Left $ [Err _] -> " due to lexer error" t:_ -> " before `" ++ (prToken t) ++ "'" -myLexer :: String -> [Token] +myLexer :: Data.Text.Text -> [Token] myLexer = tokens -- Entrypoints diff --git a/rzk/src/Language/Rzk/Syntax/Par.y b/rzk/src/Language/Rzk/Syntax/Par.y index f2967f020..3544fd2a5 100644 --- a/rzk/src/Language/Rzk/Syntax/Par.y +++ b/rzk/src/Language/Rzk/Syntax/Par.y @@ -1,4 +1,4 @@ --- -*- haskell -*- File generated by the BNF Converter (bnfc 2.9.5). +-- -*- haskell -*- File generated by the BNF Converter (bnfc 2.9.6). -- Parser definition for use with Happy { @@ -40,6 +40,7 @@ import Prelude import qualified Language.Rzk.Syntax.Abs import Language.Rzk.Syntax.Lex +import qualified Data.Text } @@ -157,7 +158,7 @@ import Language.Rzk.Syntax.Lex %% String :: { (Language.Rzk.Syntax.Abs.BNFC'Position, String) } -String : L_quoted { (uncurry Language.Rzk.Syntax.Abs.BNFC'Position (tokenLineCol $1), ((\(PT _ (TL s)) -> s) $1)) } +String : L_quoted { (uncurry Language.Rzk.Syntax.Abs.BNFC'Position (tokenLineCol $1), (Data.Text.unpack ((\(PT _ (TL s)) -> s) $1))) } VarIdentToken :: { (Language.Rzk.Syntax.Abs.BNFC'Position, Language.Rzk.Syntax.Abs.VarIdentToken) } VarIdentToken : L_VarIdentToken { (uncurry Language.Rzk.Syntax.Abs.BNFC'Position (tokenLineCol $1), Language.Rzk.Syntax.Abs.VarIdentToken (tokenText $1)) } @@ -371,7 +372,7 @@ happyError ts = Left $ [Err _] -> " due to lexer error" t:_ -> " before `" ++ (prToken t) ++ "'" -myLexer :: String -> [Token] +myLexer :: Data.Text.Text -> [Token] myLexer = tokens -- Entrypoints diff --git a/rzk/src/Language/Rzk/Syntax/Print.hs b/rzk/src/Language/Rzk/Syntax/Print.hs index c31b45a6e..84954ee2b 100644 --- a/rzk/src/Language/Rzk/Syntax/Print.hs +++ b/rzk/src/Language/Rzk/Syntax/Print.hs @@ -1,4 +1,4 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} @@ -21,6 +21,7 @@ import Prelude ) import Data.Char ( Char, isSpace ) import qualified Language.Rzk.Syntax.Abs +import qualified Data.Text -- | The top-level printing method. @@ -138,9 +139,9 @@ instance Print Double where prt _ x = doc (shows x) instance Print Language.Rzk.Syntax.Abs.VarIdentToken where - prt _ (Language.Rzk.Syntax.Abs.VarIdentToken i) = doc $ showString i + prt _ (Language.Rzk.Syntax.Abs.VarIdentToken i) = doc $ showString (Data.Text.unpack i) instance Print Language.Rzk.Syntax.Abs.HoleIdentToken where - prt _ (Language.Rzk.Syntax.Abs.HoleIdentToken i) = doc $ showString i + prt _ (Language.Rzk.Syntax.Abs.HoleIdentToken i) = doc $ showString (Data.Text.unpack i) instance Print (Language.Rzk.Syntax.Abs.Module' a) where prt i = \case Language.Rzk.Syntax.Abs.Module _ languagedecl commands -> prPrec i 0 (concatD [prt 0 languagedecl, prt 0 commands]) diff --git a/rzk/src/Language/Rzk/Syntax/Skel.hs b/rzk/src/Language/Rzk/Syntax/Skel.hs index f563fd074..3d2c6ca16 100644 --- a/rzk/src/Language/Rzk/Syntax/Skel.hs +++ b/rzk/src/Language/Rzk/Syntax/Skel.hs @@ -1,4 +1,4 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). -- Templates for pattern matching on abstract syntax diff --git a/rzk/src/Language/Rzk/Syntax/Test.hs b/rzk/src/Language/Rzk/Syntax/Test.hs index a1f0dd00a..50e0b37d8 100644 --- a/rzk/src/Language/Rzk/Syntax/Test.hs +++ b/rzk/src/Language/Rzk/Syntax/Test.hs @@ -1,4 +1,4 @@ --- File generated by the BNF Converter (bnfc 2.9.5). +-- File generated by the BNF Converter (bnfc 2.9.6). -- | Program to test parser. @@ -13,8 +13,9 @@ import Prelude , Show, show , IO, (>>), (>>=), mapM_, putStrLn , FilePath - , getContents, readFile ) +import Data.Text.IO ( getContents, readFile ) +import qualified Data.Text import System.Environment ( getArgs ) import System.Exit ( exitFailure ) import Control.Monad ( when ) @@ -36,7 +37,7 @@ putStrV v s = when (v > 1) $ putStrLn s runFile :: (Print a, Show a) => Verbosity -> ParseFun a -> FilePath -> IO () runFile v p f = putStrLn f >> readFile f >>= run v p -run :: (Print a, Show a) => Verbosity -> ParseFun a -> String -> IO () +run :: (Print a, Show a) => Verbosity -> ParseFun a -> Data.Text.Text -> IO () run v p s = case p ts of Left err -> do diff --git a/rzk/src/Language/Rzk/VSCode/Handlers.hs b/rzk/src/Language/Rzk/VSCode/Handlers.hs index 09695553b..c816c4d42 100644 --- a/rzk/src/Language/Rzk/VSCode/Handlers.hs +++ b/rzk/src/Language/Rzk/VSCode/Handlers.hs @@ -55,7 +55,7 @@ import Rzk.TypeCheck -- | Given a list of file paths, reads them and parses them as Rzk modules, -- returning the same list of file paths but with the parsed module (or parse error) -parseFiles :: [FilePath] -> IO [(FilePath, Either String Module)] +parseFiles :: [FilePath] -> IO [(FilePath, Either T.Text Module)] parseFiles [] = pure [] parseFiles (x:xs) = do errOrMod <- parseModuleFile x @@ -65,7 +65,7 @@ parseFiles (x:xs) = do -- | Given the list of possible modules returned by `parseFiles`, this segregates the errors -- from the successfully parsed modules and returns them in separate lists so the errors -- can be reported and the modules can be typechecked. -collectErrors :: [(FilePath, Either String Module)] -> ([(FilePath, String)], [(FilePath, Module)]) +collectErrors :: [(FilePath, Either T.Text Module)] -> ([(FilePath, T.Text)], [(FilePath, Module)]) collectErrors [] = ([], []) collectErrors ((path, result) : paths) = case result of @@ -81,6 +81,8 @@ maxDiagnosticCount = 100 filePathToNormalizedUri :: FilePath -> NormalizedUri filePathToNormalizedUri = toNormalizedUri . filePathToUri +tshow :: Show a => a -> T.Text +tshow = T.pack . show typecheckFromConfigFile :: LSP () typecheckFromConfigFile = do @@ -95,7 +97,7 @@ typecheckFromConfigFile = do eitherConfig <- liftIO $ Yaml.decodeFileEither @ProjectConfig rzkYamlPath case eitherConfig of Left err -> do - logError ("Invalid or missing rzk.yaml: " ++ Yaml.prettyPrintParseException err) + logError ("Invalid or missing rzk.yaml: " <> T.pack (Yaml.prettyPrintParseException err)) Right config -> do logDebug "Starting typechecking" @@ -106,8 +108,8 @@ typecheckFromConfigFile = do let cachedPaths = map fst cachedModules modifiedFiles = paths \\ cachedPaths - logDebug ("Found " ++ show (length cachedPaths) ++ " files in the cache") - logDebug (show (length modifiedFiles) ++ " files have been modified") + logDebug ("Found " <> tshow (length cachedPaths) <> " files in the cache") + logDebug (tshow (length modifiedFiles) <> " files have been modified") (parseErrors, parsedModules) <- liftIO $ collectErrors <$> parseFiles modifiedFiles tcResults <- liftIO $ try $ evaluate $ @@ -117,15 +119,15 @@ typecheckFromConfigFile = do Left (ex :: SomeException) -> do -- Just a warning to be logged in the "Output" panel and not shown to the user as an error message -- because exceptions are expected when the file has invalid syntax - logWarning ("Encountered an exception while typechecking:\n" ++ show ex) + logWarning ("Encountered an exception while typechecking:\n" <> tshow ex) return ([], []) Right (Left err) -> do - logError ("An impossible error happened! Please report a bug:\n" ++ ppTypeErrorInScopedContext' BottomUp err) + logError ("An impossible error happened! Please report a bug:\n" <> T.pack (ppTypeErrorInScopedContext' BottomUp err)) return ([err], []) -- sort of impossible Right (Right (checkedModules, errors)) -> do -- cache well-typed modules - logInfo (show (length checkedModules) ++ " modules successfully typechecked") - logInfo (show (length errors) ++ " errors found") + logInfo (tshow (length checkedModules) <> " modules successfully typechecked") + logInfo (tshow (length errors) <> " errors found") cacheTypecheckedModules checkedModules return (errors, checkedModules) @@ -177,13 +179,13 @@ typecheckFromConfigFile = do line = fromIntegral $ fromMaybe 0 $ extractLineNumber err - diagnosticOfParseError :: String -> Diagnostic + diagnosticOfParseError :: T.Text -> Diagnostic diagnosticOfParseError err = Diagnostic (Range (Position 0 0) (Position 0 0)) (Just DiagnosticSeverity_Error) (Just $ InR "parse-error") Nothing (Just "rzk") - (T.pack err) + err Nothing (Just []) Nothing @@ -199,7 +201,7 @@ provideCompletions req res = do when (isNothing root) $ logDebug "Not in a workspace. Cannot find root path for relative paths" let rootDir = fromMaybe "/" root cachedModules <- getCachedTypecheckedModules - logDebug ("Found " ++ show (length cachedModules) ++ " modules in the cache") + logDebug ("Found " <> tshow (length cachedModules) <> " modules in the cache") let currentFile = fromMaybe "" $ uriToFilePath $ req ^. params . textDocument . uri -- Take all the modules up to and including the currently open one let modules = takeWhileInc ((/= currentFile) . fst) cachedModules @@ -210,7 +212,7 @@ provideCompletions req res = do | otherwise = [x] let items = concatMap (declsToItems rootDir) modules - logDebug ("Sending " ++ show (length items) ++ " completion items") + logDebug ("Sending " <> T.pack (show (length items)) <> " completion items") res $ Right $ InL items where declsToItems :: FilePath -> (FilePath, [Decl']) -> [CompletionItem] @@ -236,19 +238,19 @@ formattingEditToTextEdit (FormattingEdit startLine startCol endLine endCol newTe (Position (fromIntegral startLine - 1) (fromIntegral startCol - 1)) (Position (fromIntegral endLine - 1) (fromIntegral endCol - 1)) ) - (T.pack newText) + newText formatDocument :: Handler LSP 'Method_TextDocumentFormatting formatDocument req res = do let doc = req ^. params . textDocument . uri . to toNormalizedUri - logInfo $ "Formatting document: " <> show doc + logInfo $ "Formatting document: " <> T.pack (show doc) ServerConfig {formatEnabled = fmtEnabled} <- getConfig if fmtEnabled then do mdoc <- getVirtualFile doc possibleEdits <- case virtualFileText <$> mdoc of Nothing -> return (Left "Failed to get file contents") Just sourceCode -> do - let edits = formatTextEdits (filter (/= '\r') $ T.unpack sourceCode) + let edits = formatTextEdits (T.filter (/= '\r') sourceCode) return (Right $ map formattingEditToTextEdit edits) case possibleEdits of Left err -> res $ Left $ ResponseError (InR ErrorCodes_InternalError) err Nothing @@ -265,11 +267,11 @@ provideSemanticTokens req responder = do possibleTokens <- case virtualFileText <$> mdoc of Nothing -> return (Left "Failed to get file content") Just sourceCode -> fmap (fmap tokenizeModule) $ liftIO $ - parseModuleSafe (filter (/= '\r') $ T.unpack sourceCode) + parseModuleSafe (T.filter (/= '\r') sourceCode) case possibleTokens of Left err -> do -- Exception occurred when parsing the module - logWarning ("Failed to tokenize file: " ++ err) + logWarning ("Failed to tokenize file: " <> err) Right tokens -> do let encoded = encodeTokens defaultSemanticTokensLegend $ relativizeTokens tokens case encoded of diff --git a/rzk/src/Language/Rzk/VSCode/Logging.hs b/rzk/src/Language/Rzk/VSCode/Logging.hs index 159eb29ac..adc2b8d66 100644 --- a/rzk/src/Language/Rzk/VSCode/Logging.hs +++ b/rzk/src/Language/Rzk/VSCode/Logging.hs @@ -6,15 +6,15 @@ import Language.LSP.Logging (defaultClientLogger) import Language.LSP.Server (MonadLsp) -logDebug :: MonadLsp c m => String -> m () -logDebug msg = defaultClientLogger <& T.pack msg `WithSeverity` Debug +logDebug :: MonadLsp c m => T.Text -> m () +logDebug msg = defaultClientLogger <& msg `WithSeverity` Debug -logInfo :: MonadLsp c m => String -> m () -logInfo msg = defaultClientLogger <& T.pack msg `WithSeverity` Info +logInfo :: MonadLsp c m => T.Text -> m () +logInfo msg = defaultClientLogger <& msg `WithSeverity` Info -logWarning :: MonadLsp c m => String -> m () -logWarning msg = defaultClientLogger <& T.pack msg `WithSeverity` Warning +logWarning :: MonadLsp c m => T.Text -> m () +logWarning msg = defaultClientLogger <& msg `WithSeverity` Warning -- | Error logs will also be shown to the user via `window/showMessage` -logError :: MonadLsp c m => String -> m () -logError msg = defaultClientLogger <& T.pack msg `WithSeverity` Error +logError :: MonadLsp c m => T.Text -> m () +logError msg = defaultClientLogger <& msg `WithSeverity` Error diff --git a/rzk/src/Rzk/Format.hs b/rzk/src/Rzk/Format.hs index 233f04448..ea6ca472c 100644 --- a/rzk/src/Rzk/Format.hs +++ b/rzk/src/Rzk/Format.hs @@ -17,33 +17,33 @@ module Rzk.Format ( isWellFormatted, isWellFormattedFile, ) where -import Data.List (elemIndex, sort) +import Data.List (sort) -import qualified Data.Text as T -import qualified Data.Text.IO as T +import qualified Data.Text as T +import qualified Data.Text.IO as T -import Language.Rzk.Syntax (tryExtractMarkdownCodeBlocks, - resolveLayout) -import Language.Rzk.Syntax.Lex (Posn (Pn), Tok (..), - TokSymbol (TokSymbol), - Token (PT), tokens) +import Language.Rzk.Syntax (resolveLayout, + tryExtractMarkdownCodeBlocks) +import Language.Rzk.Syntax.Lex (Posn (Pn), Tok (..), + TokSymbol (TokSymbol), Token (PT), + tokens) -- | All indices are 1-based (as received from the lexer) -- Note: LSP uses 0-based indices -data FormattingEdit = FormattingEdit Int Int Int Int String +data FormattingEdit = FormattingEdit Int Int Int Int T.Text deriving (Eq, Ord, Show) -- TODO: more patterns, e.g. for identifiers and literals -pattern Symbol :: String -> Tok +pattern Symbol :: T.Text -> Tok pattern Symbol s <- TK (TokSymbol s _) -pattern Token :: String -> Int -> Int -> Token +pattern Token :: T.Text -> Int -> Int -> Token pattern Token s line col <- PT (Pn _ line col) (Symbol s) -- pattern TokenSym :: String -> Int -> Int -> Token -- pattern TokenSym s line col <- PT (Pn _ line col) (Symbol s) -pattern TokenIdent :: String -> Int -> Int -> Token +pattern TokenIdent :: T.Text -> Int -> Int -> Token pattern TokenIdent s line col <- PT (Pn _ line col) (T_VarIdentToken s) data FormatState = FormatState @@ -54,21 +54,22 @@ data FormatState = FormatState } -- TODO: replace all tabs with 1 space before processing -formatTextEdits :: String -> [FormattingEdit] +formatTextEdits :: T.Text -> [FormattingEdit] formatTextEdits contents = case resolveLayout True (tokens rzkBlocks) of - Left _err -> [] -- TODO: log error (in a CLI and LSP friendly way) + Left _err -> [] -- TODO: log error (in a CLI and LSP friendly way) Right allToks -> go (initialState {allTokens = allToks}) allToks where initialState = FormatState { parensDepth = 0, definingName = False, lambdaArrow = False, allTokens = [] } incParensDepth s = s { parensDepth = parensDepth s + 1 } decParensDepth s = s { parensDepth = 0 `max` (parensDepth s - 1) } rzkBlocks = tryExtractMarkdownCodeBlocks "rzk" contents -- TODO: replace tabs with spaces - contentLines line = lines rzkBlocks !! (line - 1) -- Sorry + contentLines line = T.lines rzkBlocks !! (line - 1) -- Sorry lineTokensBefore toks line col = filter isBefore toks where isBefore (PT (Pn _ l c) _) = l == line && c < col isBefore _ = False + unicodeTokens :: [(T.Text, T.Text)] unicodeTokens = [ ("->", "→") , ("|->", "↦") @@ -131,7 +132,8 @@ formatTextEdits contents = spaceCol = col + 1 lineContent = contentLines line precededBySingleCharOnly = all isPunctuation (lineTokensBefore (allTokens s) line col) - singleCharUnicodeTokens = filter (\(_, unicode) -> length unicode == 1) unicodeTokens + singleCharUnicodeTokens = filter (\(_, unicode) -> T.length unicode == 1) unicodeTokens + punctuations :: [T.Text] punctuations = concat [ map fst singleCharUnicodeTokens -- ASCII sequences will be converted soon , map snd singleCharUnicodeTokens @@ -139,16 +141,16 @@ formatTextEdits contents = ] isPunctuation (Token tk _ _) = tk `elem` punctuations isPunctuation _ = False - spacesAfter = length $ takeWhile (== ' ') (drop col lineContent) - isLastNonSpaceChar = all (== ' ') (drop col lineContent) + spacesAfter = T.length $ T.takeWhile (== ' ') (T.drop col lineContent) + isLastNonSpaceChar = T.all (== ' ') (T.drop col lineContent) -- Remove any space before the closing paren go s (Token ")" line col : tks) = edits ++ go (decParensDepth s) tks where lineContent = contentLines line - isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent) - spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent) + isFirstNonSpaceChar = T.all (== ' ') (T.take (col - 1) lineContent) + spacesBefore = T.length $ T.takeWhile (== ' ') (T.reverse $ T.take (col - 1) lineContent) edits = map snd $ filter fst [ (not isFirstNonSpaceChar && spacesBefore > 0, FormattingEdit line (col - spacesBefore) line col "") @@ -161,11 +163,11 @@ formatTextEdits contents = where isDefinitionTypeSeparator = parensDepth s == 0 && definingName s lineContent = contentLines line - isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent) - isLastNonSpaceChar = all (== ' ') (drop col lineContent) - spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent) + isFirstNonSpaceChar = T.all (== ' ') (T.take (col - 1) lineContent) + isLastNonSpaceChar = T.all (== ' ') (T.drop col lineContent) + spacesBefore = T.length $ T.takeWhile (== ' ') (T.reverse $ T.take (col - 1) lineContent) spaceCol = col + 1 - spacesAfter = length $ takeWhile (== ' ') (drop col lineContent) + spacesAfter = T.length $ T.takeWhile (== ' ') (T.drop col lineContent) typeSepEdits = map snd $ filter fst -- Ensure line break before : (and remove any spaces before) [ (not isFirstNonSpaceChar, FormattingEdit line (col - spacesBefore) line col "\n ") @@ -188,9 +190,9 @@ formatTextEdits contents = = edits ++ go s tks where lineContent = contentLines line - isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent) - spacesAfter = length $ takeWhile (== ' ') (drop (col + 1) lineContent) - spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent) + isFirstNonSpaceChar = T.all (== ' ') (T.take (col - 1) lineContent) + spacesAfter = T.length $ T.takeWhile (== ' ') (T.drop (col + 1) lineContent) + spacesBefore = T.length $ T.takeWhile (== ' ') (T.reverse $ T.take (col - 1) lineContent) edits = map snd $ filter fst -- Ensure line break before `:=` [ (not isFirstNonSpaceChar, FormattingEdit line (col - spacesBefore) line col "\n ") @@ -198,7 +200,7 @@ formatTextEdits contents = , (isFirstNonSpaceChar && spacesBefore /= 2, FormattingEdit line 1 line col " ") -- Ensure exactly one space after - , (length lineContent > col + 2 && spacesAfter /= 1, + , (T.length lineContent > col + 2 && spacesAfter /= 1, FormattingEdit line (col + 2) line (col + 2 + spacesAfter) " ") ] @@ -207,8 +209,8 @@ formatTextEdits contents = = edits ++ go (s { lambdaArrow = True }) tks where lineContent = contentLines line - spacesAfter = length $ takeWhile (== ' ') (drop col lineContent) - isLastNonSpaceChar = all (== ' ') (drop col lineContent) + spacesAfter = T.length $ T.takeWhile (== ' ') (T.drop col lineContent) + isLastNonSpaceChar = T.all (== ' ') (T.drop col lineContent) spaceCol = col + 1 edits = map snd $ filter fst [ (not isLastNonSpaceChar && spacesAfter /= 1, @@ -225,17 +227,17 @@ formatTextEdits contents = | otherwise = s isArrow = tk `elem` ["->", "→"] lineContent = contentLines line - spacesBefore = length $ takeWhile (== ' ') (reverse $ take (col - 1) lineContent) - spacesAfter = length $ takeWhile (== ' ') (drop (col + length tk - 1) lineContent) - isFirstNonSpaceChar = all (== ' ') (take (col - 1) lineContent) - isLastNonSpaceChar = all (== ' ') (drop (col + length tk - 1) lineContent) + spacesBefore = T.length $ T.takeWhile (== ' ') (T.reverse $ T.take (col - 1) lineContent) + spacesAfter = T.length $ T.takeWhile (== ' ') (T.drop (col + T.length tk - 1) lineContent) + isFirstNonSpaceChar = T.all (== ' ') (T.take (col - 1) lineContent) + isLastNonSpaceChar = T.all (== ' ') (T.drop (col + T.length tk - 1) lineContent) prevLine | line > 0 = contentLines (line - 1) | otherwise = "" nextLine - | line + 1 < length (lines rzkBlocks) = contentLines (line + 1) + | line + 1 < length (T.lines rzkBlocks) = contentLines (line + 1) | otherwise = "" - spacesNextLine = length $ takeWhile (== ' ') nextLine + spacesNextLine = T.length $ T.takeWhile (== ' ') nextLine edits = spaceEdits ++ unicodeEdits spaceEdits | tk `elem` ["->", "→", ",", "*", "×", "="] = concatMap snd $ filter fst @@ -244,71 +246,71 @@ formatTextEdits contents = [FormattingEdit line (col - spacesBefore) line col " "]) -- Ensure exactly one space after (unless last char in line) , (not isLastNonSpaceChar && spacesAfter /= 1, - [FormattingEdit line (col + length tk) line (col + length tk + spacesAfter) " "]) + [FormattingEdit line (col + T.length tk) line (col + T.length tk + spacesAfter) " "]) -- If last char in line, move it to next line (except for lambda arrow) , (isLastNonSpaceChar && not (lambdaArrow s), -- This is split into 2 edits to avoid possible overlap with unicode replacement -- 1. Add a new line (with relevant spaces) before the token [ FormattingEdit line (col - spacesBefore) line col $ - "\n" ++ replicate (2 `max` (spacesNextLine - (spacesNextLine `min` 2))) ' ' + "\n" <> T.replicate (2 `max` (spacesNextLine - (spacesNextLine `min` 2))) " " -- 2. Replace the new line and spaces after the token with a single space - , FormattingEdit line (col + length tk) (line + 1) (spacesNextLine + 1) " " + , FormattingEdit line (col + T.length tk) (line + 1) (spacesNextLine + 1) " " ]) -- If lambda -> is first char in line, move it to the previous line , (isFirstNonSpaceChar && isArrow && lambdaArrow s, - [FormattingEdit (line - 1) (length prevLine + 1) line (col + length tk + spacesAfter) $ - " " ++ tk ++ "\n" ++ replicate spacesBefore ' ']) + [FormattingEdit (line - 1) (T.length prevLine + 1) line (col + T.length tk + spacesAfter) $ + " " <> tk <> "\n" <> T.replicate spacesBefore " "]) ] | otherwise = [] unicodeEdits | Just unicodeToken <- tk `lookup` unicodeTokens = - [ FormattingEdit line col line (col + length tk) unicodeToken + [ FormattingEdit line col line (col + T.length tk) unicodeToken ] | otherwise = [] go s (_:tks) = go s tks -- Adapted from https://hackage.haskell.org/package/lsp-types-2.1.0.0/docs/Language-LSP-Protocol-Types.html#g:7 -applyTextEdit :: FormattingEdit -> String -> String +applyTextEdit :: FormattingEdit -> T.Text -> T.Text applyTextEdit (FormattingEdit sl sc el ec newText) oldText = let (_, afterEnd) = splitAtPos (el-1, ec -1) oldText (beforeStart, _) = splitAtPos (sl-1, sc-1) oldText in mconcat [beforeStart, newText, afterEnd] where - splitAtPos :: (Int, Int) -> String -> (String, String) - splitAtPos (l, c) t = let index = c + startLineIndex l t in splitAt index t + splitAtPos :: (Int, Int) -> T.Text -> (T.Text, T.Text) + splitAtPos (l, c) t = let index = c + startLineIndex l t in T.splitAt index t - startLineIndex :: Int -> String -> Int + startLineIndex :: Int -> T.Text -> Int startLineIndex 0 _ = 0 startLineIndex line t' = - case elemIndex '\n' t' of - Just i -> i + 1 + startLineIndex (line - 1) (drop (i + 1) t') - Nothing -> length t' + case T.findIndex (=='\n') t' of + Just i -> i + 1 + startLineIndex (line - 1) (T.drop (i + 1) t') + Nothing -> T.length t' -applyTextEdits :: [FormattingEdit] -> String -> String +applyTextEdits :: [FormattingEdit] -> T.Text -> T.Text applyTextEdits edits contents = foldr applyTextEdit contents (sort edits) -- | Format Rzk code, returning the formatted version. -format :: String -> String +format :: T.Text -> T.Text format = applyTextEdits =<< formatTextEdits -- | Format Rzk code from a file -formatFile :: FilePath -> IO String +formatFile :: FilePath -> IO T.Text formatFile path = do contents <- T.readFile path - return (format (T.unpack contents)) + return (format contents) -- | Format the file and write the result back to the file. formatFileWrite :: FilePath -> IO () -formatFileWrite path = formatFile path >>= writeFile path +formatFileWrite path = formatFile path >>= T.writeFile path -- | Check if the given Rzk source code is well formatted. -- This is useful for automation tasks. -isWellFormatted :: String -> Bool +isWellFormatted :: T.Text -> Bool isWellFormatted src = null (formatTextEdits src) -- | Same as 'isWellFormatted', but reads the source code from a file. isWellFormattedFile :: FilePath -> IO Bool isWellFormattedFile path = do contents <- T.readFile path - return (isWellFormatted (T.unpack contents)) + return (isWellFormatted contents) diff --git a/rzk/src/Rzk/Main.hs b/rzk/src/Rzk/Main.hs index 4faacc738..57699d793 100644 --- a/rzk/src/Rzk/Main.hs +++ b/rzk/src/Rzk/Main.hs @@ -7,6 +7,8 @@ module Rzk.Main where import Control.Monad (forM, when) import Data.List (sort) +import qualified Data.Text as T +import qualified Data.Text.IO as T import qualified Data.Yaml as Yaml import System.Directory (doesPathExist) import System.FilePath.Glob (glob) @@ -19,11 +21,11 @@ import Rzk.TypeCheck parseStdin :: IO Rzk.Module parseStdin = do - result <- Rzk.parseModule <$> getContents + result <- Rzk.parseModule <$> T.getContents case result of Left err -> do - putStrLn ("An error occurred when parsing stdin") - error err + putStrLn "An error occurred when parsing stdin" + error (T.unpack err) Right rzkModule -> return rzkModule -- | Finds matches to the given pattern in the current working directory. @@ -80,23 +82,23 @@ parseRzkFilesOrStdin = \case expandedPaths <- foldMap globNonEmpty paths forM expandedPaths $ \path -> do putStrLn ("Loading file " <> path) - result <- Rzk.parseModule <$> readFile path + result <- Rzk.parseModule <$> T.readFile path case result of Left err -> do putStrLn ("An error occurred when parsing file " <> path) - error err + error (T.unpack err) Right rzkModule -> return (path, rzkModule) -typecheckString :: String -> Either String String +typecheckString :: T.Text -> Either T.Text T.Text typecheckString moduleString = do rzkModule <- Rzk.parseModule moduleString case defaultTypeCheck (typecheckModules [rzkModule]) of - Left err -> Left $ unlines + Left err -> Left $ T.unlines [ "An error occurred when typechecking!" , "Rendering type error... (this may take a few seconds)" - , unlines + , T.unlines [ "Type Error:" - , ppTypeErrorInScopedContext' BottomUp err + , T.pack $ ppTypeErrorInScopedContext' BottomUp err ] ] Right _ -> pure "Everything is ok!" diff --git a/rzk/test/Rzk/FormatSpec.hs b/rzk/test/Rzk/FormatSpec.hs index 0ac91d09d..81fe00f49 100644 --- a/rzk/test/Rzk/FormatSpec.hs +++ b/rzk/test/Rzk/FormatSpec.hs @@ -4,14 +4,15 @@ Description : Tests related to the formatter module -} module Rzk.FormatSpec where +import qualified Data.Text.IO as T import Test.Hspec -import Rzk.Format (format, isWellFormatted) +import Rzk.Format (format, isWellFormatted) formatsTo :: FilePath -> FilePath -> Expectation formatsTo beforePath afterPath = do - beforeSrc <- readFile ("test/files/" ++ beforePath) - afterSrc <- readFile ("test/files/" ++ afterPath) + beforeSrc <- T.readFile ("test/files/" ++ beforePath) + afterSrc <- T.readFile ("test/files/" ++ afterPath) format beforeSrc `shouldBe` afterSrc isWellFormatted afterSrc `shouldBe` True -- idempotency