From 38c082f4ca700f236c394b7f4eba887b7cac12ff Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 May 2014 21:55:20 -0700 Subject: [PATCH] Preliminary changes for UTF-8 support. This patch includes everything from the patch by mcmtroffaes in #42 except the compUTF8 option itself, which is commented out pending release of a version of pcre-regex-builtin that supports it. When a supporting version is released, we can remove the comment here, conditionally on the version of pcre-regex-builtin. See #42. --- Text/Highlighting/Kate/Common.hs | 15 ++++++++------- highlighting-kate.cabal | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Text/Highlighting/Kate/Common.hs b/Text/Highlighting/Kate/Common.hs index f623b02..fbcd931 100644 --- a/Text/Highlighting/Kate/Common.hs +++ b/Text/Highlighting/Kate/Common.hs @@ -15,8 +15,9 @@ module Text.Highlighting.Kate.Common where #ifdef _PCRE_LIGHT import Text.Regex.PCRE.Light.Char8 #else +import Data.ByteString.UTF8 (fromString, toString) import System.IO.Unsafe (unsafePerformIO) -import Text.Regex.PCRE.String +import Text.Regex.PCRE.ByteString #endif import Text.Highlighting.Kate.Types import Text.ParserCombinators.Parsec hiding (State) @@ -186,11 +187,10 @@ compileRegex caseSensitive regexpStr = let opts = [anchored] ++ [caseless | not caseSensitive] in compile ('.' : convertOctal regexpStr) opts #else - let opts = compAnchored + if caseSensitive - then 0 - else compCaseless + let opts = compAnchored + {- compUTF8 + -} + if caseSensitive then 0 else compCaseless in case unsafePerformIO $ compile opts (execNotEmpty) - ('.' : convertOctal regexpStr) of + (fromString ('.' : convertOctal regexpStr)) of Left _ -> error $ "Error compiling regex: " ++ show regexpStr Right r -> r #endif @@ -199,8 +199,9 @@ matchRegex :: Regex -> String -> KateParser (Maybe [String]) #ifdef _PCRE_LIGHT matchRegex r s = return $ match r s [exec_notempty] #else -matchRegex r s = case unsafePerformIO (regexec r s) of - Right (Just (_, mat, _ , capts)) -> return $ Just (mat : capts) +matchRegex r s = case unsafePerformIO (regexec r (fromString s)) of + Right (Just (_, mat, _ , capts)) -> return $ + Just $ map toString (mat : capts) Right Nothing -> return Nothing Left matchError -> fail $ show matchError #endif diff --git a/highlighting-kate.cabal b/highlighting-kate.cabal index 40b5eaf..a0f1577 100644 --- a/highlighting-kate.cabal +++ b/highlighting-kate.cabal @@ -154,7 +154,7 @@ Library cpp-options: -D_PCRE_LIGHT else Build-depends: regex-pcre-builtin - Build-Depends: parsec, mtl, blaze-html >= 0.4.2 && < 0.8 + Build-Depends: parsec, mtl, blaze-html >= 0.4.2 && < 0.8, utf8-string Exposed-Modules: Text.Highlighting.Kate Text.Highlighting.Kate.Syntax Text.Highlighting.Kate.Types