haskell · hdgarrood · Jun 28, 2014 · Apr 4, 2015 · Apr 5, 2015 · Apr 5, 2015
diff --git a/Cabal/Cabal.cabal b/Cabal/Cabal.cabal
@@ -163,6 +163,7 @@ library
     Distribution.Compat.Exception
     Distribution.Compat.ReadP
     Distribution.Compiler
+    Distribution.Utils.Glob
     Distribution.InstalledPackageInfo
     Distribution.License
     Distribution.Make
@@ -241,6 +242,9 @@ library
     Distribution.Simple.GHC.IPI641
     Distribution.Simple.GHC.IPI642
     Distribution.Simple.GHC.ImplInfo
+    Distribution.Utils.Glob.Type
+    Distribution.Utils.Glob.Parse
+    Distribution.Utils.Glob.Match
     Paths_Cabal
 
   if flag(bundled-binary-generic)
@@ -260,6 +264,7 @@ test-suite unit-tests
     UnitTests.Distribution.Compat.ReadP
     UnitTests.Distribution.Simple.Program.Internal
     UnitTests.Distribution.Utils.NubList
+    UnitTests.Distribution.Utils.Glob
   main-is: UnitTests.hs
   build-depends:
     base,

diff --git a/Cabal/Distribution/PackageDescription/Check.hs b/Cabal/Distribution/PackageDescription/Check.hs
@@ -55,7 +55,7 @@ import Distribution.License
 import Distribution.Simple.CCompiler
          ( filenameCDialect )
 import Distribution.Simple.Utils
-         ( cabalVersion, intercalate, parseFileGlob, FileGlob(..), lowercase )
+         ( cabalVersion, intercalate, parseFileGlob, isRealGlob, lowercase )
 
 import Distribution.Version
          ( Version(..)
@@ -1098,8 +1098,8 @@ checkCabalVersion pkg =
     dataFilesUsingGlobSyntax     = filter usesGlobSyntax (dataFiles pkg)
     extraSrcFilesUsingGlobSyntax = filter usesGlobSyntax (extraSrcFiles pkg)
     usesGlobSyntax str = case parseFileGlob str of
-      Just (FileGlob _ _) -> True
-      _                   -> False
+      Just g  -> isRealGlob g
+      Nothing -> False
 
     versionRangeExpressions =
         [ dep | dep@(Dependency _ vr) <- buildDepends pkg

diff --git a/Cabal/Distribution/Simple/Utils.hs b/Cabal/Distribution/Simple/Utils.hs
@@ -78,11 +78,12 @@ module Distribution.Simple.Utils (
         isInSearchPath,
         addLibraryPath,
 
-        -- * simple file globbing
+        -- * file globbing
         matchFileGlob,
         matchDirFileGlob,
         parseFileGlob,
-        FileGlob(..),
+        Glob(..),
+        isRealGlob,
 
         -- * modification time
         moreRecentFile,
@@ -156,7 +157,7 @@ import System.Exit
 import System.FilePath
     ( normalise, (</>), (<.>)
     , getSearchPath, joinPath, takeDirectory, splitFileName
-    , splitExtension, splitExtensions, splitDirectories
+    , splitExtension, splitDirectories
     , searchPathSeparator )
 import System.Directory
     ( createDirectory, renameFile, removeDirectoryRecursive )
@@ -199,6 +200,7 @@ import Distribution.Compat.TempFile
 import Distribution.Compat.Exception
          ( tryIO, catchIO, catchExit )
 import Distribution.Verbosity
+import Distribution.Utils.Glob
 
 #ifdef VERSION_base
 import qualified Paths_Cabal (version)
@@ -723,43 +725,22 @@ addLibraryPath os paths = addEnv
 ----------------
 -- File globbing
 
-data FileGlob
-   -- | No glob at all, just an ordinary file
-   = NoGlob FilePath
-
-   -- | dir prefix and extension, like @\"foo\/bar\/\*.baz\"@ corresponds to
-   --    @FileGlob \"foo\/bar\" \".baz\"@
-   | FileGlob FilePath String
-
-parseFileGlob :: FilePath -> Maybe FileGlob
-parseFileGlob filepath = case splitExtensions filepath of
-  (filepath', ext) -> case splitFileName filepath' of
-    (dir, "*") | '*' `elem` dir
-              || '*' `elem` ext
-              || null ext            -> Nothing
-               | null dir            -> Just (FileGlob "." ext)
-               | otherwise           -> Just (FileGlob dir ext)
-    _          | '*' `elem` filepath -> Nothing
-               | otherwise           -> Just (NoGlob filepath)
-
 matchFileGlob :: FilePath -> IO [FilePath]
 matchFileGlob = matchDirFileGlob "."
 
-matchDirFileGlob :: FilePath -> FilePath -> IO [FilePath]
-matchDirFileGlob dir filepath = case parseFileGlob filepath of
-  Nothing -> die $ "invalid file glob '" ++ filepath
-                ++ "'. Wildcards '*' are only allowed in place of the file"
-                ++ " name, not in the directory name or file extension."
-                ++ " If a wildcard is used it must be with an file extension."
-  Just (NoGlob filepath') -> return [filepath']
-  Just (FileGlob dir' ext) -> do
-    files <- getDirectoryContents (dir </> dir')
-    case   [ dir' </> file
-           | file <- files
-           , let (name, ext') = splitExtensions file
-           , not (null name) && ext' == ext ] of
-      []      -> die $ "filepath wildcard '" ++ filepath
-                    ++ "' does not match any files."
+-- | Return a list of files matching a glob pattern, relative to a given source
+-- directory. Note that not all the returned files are guaranteed to exist.
+matchDirFileGlob :: FilePath -> String -> IO [FilePath]
+matchDirFileGlob dir pattern = case parseFileGlob pattern of
+  Nothing ->
+    die $ "invalid file glob '" ++ pattern ++ "'."
+  Just (NoGlob filepath') ->
+    return [filepath']
+  Just (Glob glob) -> do
+    files <- getDirectoryContentsRecursive dir
+    case filter (realIsMatch glob) files of
+      [] -> die $ "glob pattern '" ++ pattern
+                  ++ "' does not match any files."
       matches -> return matches
 
 --------------------

diff --git a/Cabal/Distribution/Utils/Glob.hs b/Cabal/Distribution/Utils/Glob.hs
@@ -0,0 +1,12 @@
+module Distribution.Utils.Glob
+  ( Glob(..)
+  , isRealGlob
+  , parseFileGlob
+  , isMatch
+  , realIsMatch
+  )
+  where
+
+import Distribution.Utils.Glob.Type
+import Distribution.Utils.Glob.Parse
+import Distribution.Utils.Glob.Match
diff --git a/Cabal/Distribution/Utils/Glob/Match.hs b/Cabal/Distribution/Utils/Glob/Match.hs
@@ -0,0 +1,103 @@
+module Distribution.Utils.Glob.Match where
+
+import Control.Monad
+    ( (>=>) )
+import Data.Maybe
+    ( listToMaybe )
+import Data.List
+    ( stripPrefix, tails )
+import Distribution.Utils.Glob.Type
+
+isMatch :: Glob -> FilePath -> Bool
+isMatch (Glob realGlob) fp = realIsMatch realGlob fp
+isMatch (NoGlob fp') fp = fp' == fp
+
+realIsMatch :: RealGlob -> FilePath -> Bool
+realIsMatch (RealGlob parts) fp = isMatch' True parts (toSegments fp)
+
+toSegments :: FilePath -> [String]
+toSegments = filter (not . null) . endBy '/'
+
+-- Not quite the same as the function from Data.List.Split (whose first
+-- argument is a sublist, not a single list element). However, we only need to
+-- split on individual elements here, and this allows for a simpler
+-- implementation.
+endBy :: Eq a => a -> [a] -> [[a]]
+endBy _ [] = []
+endBy splitter list =
+  let (next, rest) = span (/= splitter) list
+  in  next : endBy splitter (drop 1 rest)
+
+-- | Given:
+-- * A Bool which records whether we are at the beginning of the current
+-- segment
+-- * A list of GlobParts
+-- * A list of path segments in a file path
+-- Return whether the glob parts list matches the file path.
+isMatch' :: Bool -> [GlobPart] -> [String] -> Bool
+isMatch' _ (Literal l : parts) (seg : segs) =
+  case stripPrefix l seg of
+    Just seg' -> isMatch' False parts (seg' : segs)
+    Nothing -> False
+isMatch' _ (PathSeparator : parts) (seg : segs)
+  | seg == "" = isMatch' True parts segs
+  | otherwise = False
+isMatch' _ (CharList cs : parts) ((h:tl) : segs) =
+  if charListIsMatch cs h
+    then isMatch' False parts (tl : segs)
+    else False
+isMatch' _ (CharListComplement cs : parts) ((h:tl) : segs) =
+  if charListIsMatch cs h
+    then False
+    else isMatch' False parts (tl : segs)
+isMatch' startSegment (WildOne : parts) ((h:tl) : segs)
+  | startSegment && h == '.' = False
+  | otherwise = isMatch' False parts (tl : segs)
+isMatch' startSegment (WildMany : parts) segs
+  | startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False
+  | otherwise =
+    case segs of
+      first : rest ->
+        let candidates = map (:rest) (tails first)
+        in  any (isMatch' False parts) candidates
+      [] ->
+        isMatch' startSegment parts segs
+isMatch' startSegment (WildManyRecursive : parts) segs
+   | startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False
+   | otherwise =
+     anyCandidates || handlePathSep
+     where
+     anyCandidates =
+       any (\(start, segs') -> isMatch' start parts segs') candidates
+     candidates = iterateWhile (drop1' . snd) (False, segs)
+     handlePathSep =
+       case parts of
+         PathSeparator : parts' -> isMatch' startSegment parts' segs
+         _ -> False
+
+isMatch' startSegment (Choice gs : parts) segs =
+  any (\g -> isMatch' startSegment (g ++ parts) segs) gs
+isMatch' _ [] [""] = True
+isMatch' _ _ _ = False
+
+charListIsMatch :: [CharListPart] -> Char -> Bool
+charListIsMatch parts c = any (matches c) parts
+  where
+  matches x (CharLiteral y) = x == y
+  matches x (Range start end) = start <= x && x <= end
+
+-- | Drop one character from a list of path segments, or if the first segment
+-- is empty, move on to the next segment.
+drop1' :: [String] -> Maybe (Bool, [String])
+drop1' [] = Nothing
+drop1' ("" : segs) = Just (True, segs)
+drop1' (seg : segs) = Just (False, drop 1 seg : segs)
+
+-- | Generate a list of values obtained by repeatedly applying a function
+-- to an initial value, until it stops returning Just.
+iterateWhile :: (a -> Maybe a) -> a -> [a]
+iterateWhile f x = x : rest
+  where
+  rest = case f x of
+    Just y -> iterateWhile f y
+    Nothing -> []
diff --git a/Cabal/Distribution/Utils/Glob/Parse.hs b/Cabal/Distribution/Utils/Glob/Parse.hs
@@ -0,0 +1,139 @@
+module Distribution.Utils.Glob.Parse where
+
+import Control.Monad
+    ( unless, liftM2 )
+import Distribution.Compat.ReadP
+import Distribution.Utils.Glob.Type
+
+-- | We want to ensure this works the same way on all platforms, so we do not
+-- use System.FilePath here.
+--
+-- Backslashes (like on Windows) may not be used as path separators, because
+-- they would significantly complicate the implementation for little benefit.
+pathSeparators :: [Char]
+pathSeparators = "/"
+
+charIsPathSeparator :: Char -> Bool
+charIsPathSeparator x = x `elem` pathSeparators
+
+-- | Characters which must not be parsed as literals if not escaped in glob
+-- patterns
+globSpecialChars :: [Char]
+globSpecialChars = pathSeparators ++ "\\{}*[]?!^,"
+
+isSpecialChar :: Char -> Bool
+isSpecialChar x = x `elem` globSpecialChars
+
+-- | Characters which can occur at the start of a bracket pattern to transform
+-- it into its complement.
+bracketComplementors :: [Char]
+bracketComplementors = "^!"
+
+isBracketComplementor :: Char -> Bool
+isBracketComplementor x = x `elem` bracketComplementors
+
+-- | Characters which must not be parsed as literals if not escaped in bracket
+-- patterns.
+bracketSpecialChars :: [Char]
+bracketSpecialChars = bracketComplementors ++ "-[]\\/"
+
+isBracketSpecialChar :: Char -> Bool
+isBracketSpecialChar x = x `elem` bracketSpecialChars
+
+-- | Like manyTill, but always consumes at least one occurence of 'p'.
+manyTill1 :: ReadP r a -> ReadP [a] end -> ReadP r [a]
+manyTill1 p end = liftM2 (:) p (manyTill p end)
+
+-- | Parse an escape sequence. Anything is allowed, except a path separator.
+escapeSequence :: ReadP r Char
+escapeSequence = char '\\' >> satisfy (not . charIsPathSeparator)
+
+parseLiteral :: ReadP r GlobPart
+parseLiteral = fmap Literal $ manyTill1 literalSegment literalEnd
+  where
+  literalSegment = notSpecial +++ escapeSequence
+  notSpecial = satisfy (not . isSpecialChar)
+  literalEnd = do
+    str <- look
+    case str of
+        (x:_) | isSpecialChar x -> return ()
+        ""                      -> return ()
+        _                       -> pfail
+
+parsePathSeparator :: ReadP r GlobPart
+parsePathSeparator = munch1 (== '/') >> return PathSeparator
+
+parseCharList :: ReadP r GlobPart
+parseCharList =
+  between (char '[') (char ']')
+    (fmap CharList (many1 parseCharListPart))
+
+parseCharListComplement :: ReadP r GlobPart
+parseCharListComplement =
+  between (char '[') (char ']')
+    (satisfy isBracketComplementor
+     >> fmap CharListComplement (many1 parseCharListPart))
+
+parseCharListPart :: ReadP r CharListPart
+parseCharListPart = range <++ fmap CharLiteral literal
+  where
+  range = do
+    start <- literal
+    _ <- char '-'
+    end <- literal
+    unless (start < end) pfail
+    return (Range start end)
+
+  literal = satisfy (not . isBracketSpecialChar) +++ escapeSequence
+
+parseWildOne :: ReadP r GlobPart
+parseWildOne = char '?' >> return WildOne
+
+-- | Parses either a WildMany or a WildManyRecursive.
+parseWildMany :: ReadP r GlobPart
+parseWildMany = do
+  str <- munch1 (== '*')
+  case str of
+    "*"  -> return WildMany
+    "**" -> return WildManyRecursive
+    _    -> pfail
+
+parseChoice :: ReadP r GlobPart
+parseChoice =
+  between (char '{') (char '}') $ do
+    first <- parseGlobParts
+    _ <- char ','
+    rest <- sepBy1 (parseGlobParts <++ emptyGlob) (char ',')
+    return (Choice (first : rest))
+  where
+  emptyGlob = return []
+
+parseGlobPart :: ReadP r GlobPart
+parseGlobPart = choice
+  [ parseLiteral
+  , parsePathSeparator
+  , parseCharList
+  , parseCharListComplement
+  , parseWildOne
+  , parseWildMany
+  , parseChoice
+  ]
+
+parseGlobParts :: ReadP r [GlobPart]
+parseGlobParts = many1 parseGlobPart
+
+parseFileGlob :: String -> Maybe Glob
+parseFileGlob fp =
+  case fullyParsed (readP_to_S parseGlobParts fp) of
+    [parts] -> Just (mkGlob parts)
+    _       -> Nothing
+  where
+  fullyParsed = map fst . filter (null . snd)
+  mkGlob parts =
+    case sequence (map asLiteral parts) of
+      Just literalParts -> NoGlob (concat literalParts)
+      Nothing -> Glob (RealGlob parts)
+
+  asLiteral (Literal str) = Just str
+  asLiteral (PathSeparator) = Just "/"
+  asLiteral _ = Nothing