Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement bash (with globstar) style globbing #2522

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cabal/Cabal.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ library
Distribution.Compat.Exception
Distribution.Compat.ReadP
Distribution.Compiler
Distribution.Utils.Glob
Distribution.InstalledPackageInfo
Distribution.License
Distribution.Make
Expand Down Expand Up @@ -241,6 +242,9 @@ library
Distribution.Simple.GHC.IPI641
Distribution.Simple.GHC.IPI642
Distribution.Simple.GHC.ImplInfo
Distribution.Utils.Glob.Type
Distribution.Utils.Glob.Parse
Distribution.Utils.Glob.Match
Paths_Cabal

if flag(bundled-binary-generic)
Expand All @@ -260,6 +264,7 @@ test-suite unit-tests
UnitTests.Distribution.Compat.ReadP
UnitTests.Distribution.Simple.Program.Internal
UnitTests.Distribution.Utils.NubList
UnitTests.Distribution.Utils.Glob
main-is: UnitTests.hs
build-depends:
base,
Expand Down
6 changes: 3 additions & 3 deletions Cabal/Distribution/PackageDescription/Check.hs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ import Distribution.License
import Distribution.Simple.CCompiler
( filenameCDialect )
import Distribution.Simple.Utils
( cabalVersion, intercalate, parseFileGlob, FileGlob(..), lowercase )
( cabalVersion, intercalate, parseFileGlob, isRealGlob, lowercase )

import Distribution.Version
( Version(..)
Expand Down Expand Up @@ -1098,8 +1098,8 @@ checkCabalVersion pkg =
dataFilesUsingGlobSyntax = filter usesGlobSyntax (dataFiles pkg)
extraSrcFilesUsingGlobSyntax = filter usesGlobSyntax (extraSrcFiles pkg)
usesGlobSyntax str = case parseFileGlob str of
Just (FileGlob _ _) -> True
_ -> False
Just g -> isRealGlob g
Nothing -> False

versionRangeExpressions =
[ dep | dep@(Dependency _ vr) <- buildDepends pkg
Expand Down
55 changes: 18 additions & 37 deletions Cabal/Distribution/Simple/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,12 @@ module Distribution.Simple.Utils (
isInSearchPath,
addLibraryPath,

-- * simple file globbing
-- * file globbing
matchFileGlob,
matchDirFileGlob,
parseFileGlob,
FileGlob(..),
Glob(..),
isRealGlob,

-- * modification time
moreRecentFile,
Expand Down Expand Up @@ -156,7 +157,7 @@ import System.Exit
import System.FilePath
( normalise, (</>), (<.>)
, getSearchPath, joinPath, takeDirectory, splitFileName
, splitExtension, splitExtensions, splitDirectories
, splitExtension, splitDirectories
, searchPathSeparator )
import System.Directory
( createDirectory, renameFile, removeDirectoryRecursive )
Expand Down Expand Up @@ -199,6 +200,7 @@ import Distribution.Compat.TempFile
import Distribution.Compat.Exception
( tryIO, catchIO, catchExit )
import Distribution.Verbosity
import Distribution.Utils.Glob

#ifdef VERSION_base
import qualified Paths_Cabal (version)
Expand Down Expand Up @@ -723,43 +725,22 @@ addLibraryPath os paths = addEnv
----------------
-- File globbing

data FileGlob
-- | No glob at all, just an ordinary file
= NoGlob FilePath

-- | dir prefix and extension, like @\"foo\/bar\/\*.baz\"@ corresponds to
-- @FileGlob \"foo\/bar\" \".baz\"@
| FileGlob FilePath String

parseFileGlob :: FilePath -> Maybe FileGlob
parseFileGlob filepath = case splitExtensions filepath of
(filepath', ext) -> case splitFileName filepath' of
(dir, "*") | '*' `elem` dir
|| '*' `elem` ext
|| null ext -> Nothing
| null dir -> Just (FileGlob "." ext)
| otherwise -> Just (FileGlob dir ext)
_ | '*' `elem` filepath -> Nothing
| otherwise -> Just (NoGlob filepath)

matchFileGlob :: FilePath -> IO [FilePath]
matchFileGlob = matchDirFileGlob "."

matchDirFileGlob :: FilePath -> FilePath -> IO [FilePath]
matchDirFileGlob dir filepath = case parseFileGlob filepath of
Nothing -> die $ "invalid file glob '" ++ filepath
++ "'. Wildcards '*' are only allowed in place of the file"
++ " name, not in the directory name or file extension."
++ " If a wildcard is used it must be with an file extension."
Just (NoGlob filepath') -> return [filepath']
Just (FileGlob dir' ext) -> do
files <- getDirectoryContents (dir </> dir')
case [ dir' </> file
| file <- files
, let (name, ext') = splitExtensions file
, not (null name) && ext' == ext ] of
[] -> die $ "filepath wildcard '" ++ filepath
++ "' does not match any files."
-- | Return a list of files matching a glob pattern, relative to a given source
-- directory. Note that not all the returned files are guaranteed to exist.
matchDirFileGlob :: FilePath -> String -> IO [FilePath]
matchDirFileGlob dir pattern = case parseFileGlob pattern of
Nothing ->
die $ "invalid file glob '" ++ pattern ++ "'."
Just (NoGlob filepath') ->
return [filepath']
Just (Glob glob) -> do
files <- getDirectoryContentsRecursive dir
case filter (realIsMatch glob) files of
[] -> die $ "glob pattern '" ++ pattern
++ "' does not match any files."
matches -> return matches

--------------------
Expand Down
12 changes: 12 additions & 0 deletions Cabal/Distribution/Utils/Glob.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module Distribution.Utils.Glob
( Glob(..)
, isRealGlob
, parseFileGlob
, isMatch
, realIsMatch
)
where

import Distribution.Utils.Glob.Type
import Distribution.Utils.Glob.Parse
import Distribution.Utils.Glob.Match
103 changes: 103 additions & 0 deletions Cabal/Distribution/Utils/Glob/Match.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
module Distribution.Utils.Glob.Match where

import Control.Monad
( (>=>) )
import Data.Maybe
( listToMaybe )
import Data.List
( stripPrefix, tails )
import Distribution.Utils.Glob.Type

isMatch :: Glob -> FilePath -> Bool
isMatch (Glob realGlob) fp = realIsMatch realGlob fp
isMatch (NoGlob fp') fp = fp' == fp

realIsMatch :: RealGlob -> FilePath -> Bool
realIsMatch (RealGlob parts) fp = isMatch' True parts (toSegments fp)

toSegments :: FilePath -> [String]
toSegments = filter (not . null) . endBy '/'

-- Not quite the same as the function from Data.List.Split (whose first
-- argument is a sublist, not a single list element). However, we only need to
-- split on individual elements here, and this allows for a simpler
-- implementation.
endBy :: Eq a => a -> [a] -> [[a]]
endBy _ [] = []
endBy splitter list =
let (next, rest) = span (/= splitter) list
in next : endBy splitter (drop 1 rest)

-- | Given:
-- * A Bool which records whether we are at the beginning of the current
-- segment
-- * A list of GlobParts
-- * A list of path segments in a file path
-- Return whether the glob parts list matches the file path.
isMatch' :: Bool -> [GlobPart] -> [String] -> Bool
isMatch' _ (Literal l : parts) (seg : segs) =
case stripPrefix l seg of
Just seg' -> isMatch' False parts (seg' : segs)
Nothing -> False
isMatch' _ (PathSeparator : parts) (seg : segs)
| seg == "" = isMatch' True parts segs
| otherwise = False
isMatch' _ (CharList cs : parts) ((h:tl) : segs) =
if charListIsMatch cs h
then isMatch' False parts (tl : segs)
else False
isMatch' _ (CharListComplement cs : parts) ((h:tl) : segs) =
if charListIsMatch cs h
then False
else isMatch' False parts (tl : segs)
isMatch' startSegment (WildOne : parts) ((h:tl) : segs)
| startSegment && h == '.' = False
| otherwise = isMatch' False parts (tl : segs)
isMatch' startSegment (WildMany : parts) segs
| startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False
| otherwise =
case segs of
first : rest ->
let candidates = map (:rest) (tails first)
in any (isMatch' False parts) candidates
[] ->
isMatch' startSegment parts segs
isMatch' startSegment (WildManyRecursive : parts) segs
| startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False
| otherwise =
anyCandidates || handlePathSep
where
anyCandidates =
any (\(start, segs') -> isMatch' start parts segs') candidates
candidates = iterateWhile (drop1' . snd) (False, segs)
handlePathSep =
case parts of
PathSeparator : parts' -> isMatch' startSegment parts' segs
_ -> False

isMatch' startSegment (Choice gs : parts) segs =
any (\g -> isMatch' startSegment (g ++ parts) segs) gs
isMatch' _ [] [""] = True
isMatch' _ _ _ = False

charListIsMatch :: [CharListPart] -> Char -> Bool
charListIsMatch parts c = any (matches c) parts
where
matches x (CharLiteral y) = x == y
matches x (Range start end) = start <= x && x <= end

-- | Drop one character from a list of path segments, or if the first segment
-- is empty, move on to the next segment.
drop1' :: [String] -> Maybe (Bool, [String])
drop1' [] = Nothing
drop1' ("" : segs) = Just (True, segs)
drop1' (seg : segs) = Just (False, drop 1 seg : segs)

-- | Generate a list of values obtained by repeatedly applying a function
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can just use unfoldr instead of this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just tried this and it's a bit awkward with unfoldr. Here's the best way I could think of for reimplementing iterateWhile in terms of unfoldr:

iterateWhile f x = x : unfoldr (fmap tuple . f) x
  where
  tuple z = (z, z)

In particular, unfoldr seems to always leave us with one fewer element in the list than we want (hence the x : ...).

I also can't see a way of rewriting the call sites of iterateWhile to use unfoldr, other than to inline the above definition, which again, seems a bit awkward.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, iterate is just \f x -> unfoldr (\x' -> Just (x', f x')) x, so iterateWhile is \f x -> unfoldr (fmap (\x' -> (x', f x'))) (Just x).

Rewriting the call sites is easy if you pass in a function of type a -> Maybe (a, a) instead of a -> Maybe a. E.g. iterateWhile tailMay == unfoldr (\l -> case l of [] -> Nothing; (_:tl) -> Just (tl, tl)) == unfoldr (fmap (\l -> (l,l)) . tailMay).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The last example should be iterateWhile tailMay == unfoldr (\l -> case l of [] -> Nothing; (_:tl) -> Just (l, tl)) == unfoldr (\l -> fmap (\l' -> (l, l')) . tailMay $ l).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not quite - iterateWhile tailMay [1,2,3] == [[1,2,3],[2,3],[3],[]] but unfoldr (\l -> fmap (\l' -> (l, l')) . tailMay $ l) [1,2,3] == [[1,2,3],[2,3],[3]].

It's just occurred to me that iterateWhile tailMay == Data.List.tails though, shall I make that replacement and remove tailMay?

That does still leave one call site of iterateWhile.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just occurred to me that iterateWhile tailMay == Data.List.tails though, shall I make that replacement and remove tailMay?

Sure, the less code, the better.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, yeah, you're right, you need something like unfoldr (fmap (\x -> (x, case x of [] -> Nothing; (_:tl) -> Just tl))) (Just [1,2,3]) to express tails with unfoldr.

-- to an initial value, until it stops returning Just.
iterateWhile :: (a -> Maybe a) -> a -> [a]
iterateWhile f x = x : rest
where
rest = case f x of
Just y -> iterateWhile f y
Nothing -> []
139 changes: 139 additions & 0 deletions Cabal/Distribution/Utils/Glob/Parse.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
module Distribution.Utils.Glob.Parse where

import Control.Monad
( unless, liftM2 )
import Distribution.Compat.ReadP
import Distribution.Utils.Glob.Type

-- | We want to ensure this works the same way on all platforms, so we do not
-- use System.FilePath here.
--
-- Backslashes (like on Windows) may not be used as path separators, because
-- they would significantly complicate the implementation for little benefit.
pathSeparators :: [Char]
pathSeparators = "/"

charIsPathSeparator :: Char -> Bool
charIsPathSeparator x = x `elem` pathSeparators

-- | Characters which must not be parsed as literals if not escaped in glob
-- patterns
globSpecialChars :: [Char]
globSpecialChars = pathSeparators ++ "\\{}*[]?!^,"

isSpecialChar :: Char -> Bool
isSpecialChar x = x `elem` globSpecialChars

-- | Characters which can occur at the start of a bracket pattern to transform
-- it into its complement.
bracketComplementors :: [Char]
bracketComplementors = "^!"

isBracketComplementor :: Char -> Bool
isBracketComplementor x = x `elem` bracketComplementors

-- | Characters which must not be parsed as literals if not escaped in bracket
-- patterns.
bracketSpecialChars :: [Char]
bracketSpecialChars = bracketComplementors ++ "-[]\\/"

isBracketSpecialChar :: Char -> Bool
isBracketSpecialChar x = x `elem` bracketSpecialChars

-- | Like manyTill, but always consumes at least one occurence of 'p'.
manyTill1 :: ReadP r a -> ReadP [a] end -> ReadP r [a]
manyTill1 p end = liftM2 (:) p (manyTill p end)

-- | Parse an escape sequence. Anything is allowed, except a path separator.
escapeSequence :: ReadP r Char
escapeSequence = char '\\' >> satisfy (not . charIsPathSeparator)

parseLiteral :: ReadP r GlobPart
parseLiteral = fmap Literal $ manyTill1 literalSegment literalEnd
where
literalSegment = notSpecial +++ escapeSequence
notSpecial = satisfy (not . isSpecialChar)
literalEnd = do
str <- look
case str of
(x:_) | isSpecialChar x -> return ()
"" -> return ()
_ -> pfail

parsePathSeparator :: ReadP r GlobPart
parsePathSeparator = munch1 (== '/') >> return PathSeparator

parseCharList :: ReadP r GlobPart
parseCharList =
between (char '[') (char ']')
(fmap CharList (many1 parseCharListPart))

parseCharListComplement :: ReadP r GlobPart
parseCharListComplement =
between (char '[') (char ']')
(satisfy isBracketComplementor
>> fmap CharListComplement (many1 parseCharListPart))

parseCharListPart :: ReadP r CharListPart
parseCharListPart = range <++ fmap CharLiteral literal
where
range = do
start <- literal
_ <- char '-'
end <- literal
unless (start < end) pfail
return (Range start end)

literal = satisfy (not . isBracketSpecialChar) +++ escapeSequence

parseWildOne :: ReadP r GlobPart
parseWildOne = char '?' >> return WildOne

-- | Parses either a WildMany or a WildManyRecursive.
parseWildMany :: ReadP r GlobPart
parseWildMany = do
str <- munch1 (== '*')
case str of
"*" -> return WildMany
"**" -> return WildManyRecursive
_ -> pfail

parseChoice :: ReadP r GlobPart
parseChoice =
between (char '{') (char '}') $ do
first <- parseGlobParts
_ <- char ','
rest <- sepBy1 (parseGlobParts <++ emptyGlob) (char ',')
return (Choice (first : rest))
where
emptyGlob = return []

parseGlobPart :: ReadP r GlobPart
parseGlobPart = choice
[ parseLiteral
, parsePathSeparator
, parseCharList
, parseCharListComplement
, parseWildOne
, parseWildMany
, parseChoice
]

parseGlobParts :: ReadP r [GlobPart]
parseGlobParts = many1 parseGlobPart

parseFileGlob :: String -> Maybe Glob
parseFileGlob fp =
case fullyParsed (readP_to_S parseGlobParts fp) of
[parts] -> Just (mkGlob parts)
_ -> Nothing
where
fullyParsed = map fst . filter (null . snd)
mkGlob parts =
case sequence (map asLiteral parts) of
Just literalParts -> NoGlob (concat literalParts)
Nothing -> Glob (RealGlob parts)

asLiteral (Literal str) = Just str
asLiteral (PathSeparator) = Just "/"
asLiteral _ = Nothing
Loading