-
Notifications
You must be signed in to change notification settings - Fork 695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement bash (with globstar) style globbing #2522
Changes from 4 commits
43dd0de
9434591
8f2e745
2bb20fa
72d038d
186c320
75a21e8
183a0aa
87678f0
829ccf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
module Distribution.Utils.Glob | ||
( Glob(..) | ||
, isRealGlob | ||
, parseFileGlob | ||
, isMatch | ||
, realIsMatch | ||
) | ||
where | ||
|
||
import Distribution.Utils.Glob.Type | ||
import Distribution.Utils.Glob.Parse | ||
import Distribution.Utils.Glob.Match |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
module Distribution.Utils.Glob.Match where | ||
|
||
import Control.Monad | ||
( (>=>) ) | ||
import Data.Maybe | ||
( listToMaybe ) | ||
import Data.List | ||
( stripPrefix ) | ||
import Distribution.Utils.Glob.Type | ||
|
||
isMatch :: Glob -> FilePath -> Bool | ||
isMatch (Glob realGlob) fp = realIsMatch realGlob fp | ||
isMatch (NoGlob fp') fp = fp' == fp | ||
|
||
realIsMatch :: RealGlob -> FilePath -> Bool | ||
realIsMatch (RealGlob parts) fp = isMatch' True parts (toSegments fp) | ||
|
||
toSegments :: FilePath -> [String] | ||
toSegments = filter (not . null) . splitOn '/' | ||
|
||
-- Not quite the same as the function from Data.List.Split, but this allows | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In which way it's "not quite the same"? Why not just copy the function from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Copying the implementation from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this function is equivalent to |
||
-- for a simpler implementation | ||
splitOn :: Eq a => a -> [a] -> [[a]] | ||
splitOn _ [] = [] | ||
splitOn splitter list = | ||
let (next, rest) = span (/= splitter) list | ||
in next : splitOn splitter (drop 1 rest) | ||
|
||
-- | Given: | ||
-- * A Bool which records whether we are at the beginning of the current | ||
-- segment | ||
-- * A list of GlobParts | ||
-- * A list of path segments in a file path | ||
-- Return whether the glob parts list matches the file path. | ||
isMatch' :: Bool -> [GlobPart] -> [String] -> Bool | ||
isMatch' _ (Literal l : parts) (seg : segs) = | ||
case stripPrefix l seg of | ||
Just seg' -> isMatch' False parts (seg' : segs) | ||
Nothing -> False | ||
isMatch' _ (PathSeparator : parts) (seg : segs) | ||
| seg == "" = isMatch' True parts segs | ||
| otherwise = False | ||
isMatch' _ (CharList cs : parts) ((h:tl) : segs) = | ||
if charListIsMatch cs h | ||
then isMatch' False parts (tl : segs) | ||
else False | ||
isMatch' _ (CharListComplement cs : parts) ((h:tl) : segs) = | ||
if charListIsMatch cs h | ||
then False | ||
else isMatch' False parts (tl : segs) | ||
isMatch' startSegment (WildOne : parts) ((h:tl) : segs) | ||
| startSegment && h == '.' = False | ||
| otherwise = isMatch' False parts (tl : segs) | ||
isMatch' startSegment (WildMany : parts) segs | ||
| startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False | ||
| otherwise = | ||
case segs of | ||
first : rest -> | ||
let candidates = map (:rest) (iterateWhile drop1 first) | ||
in any (isMatch' False parts) candidates | ||
[] -> | ||
isMatch' startSegment parts segs | ||
isMatch' startSegment (WildManyRecursive : parts) segs | ||
| startSegment && (listToMaybe >=> listToMaybe) segs == Just '.' = False | ||
| otherwise = | ||
anyCandidates || handlePathSep | ||
where | ||
anyCandidates = | ||
any (\(start, segs') -> isMatch' start parts segs') candidates | ||
candidates = iterateWhile (drop1' . snd) (False, segs) | ||
handlePathSep = | ||
case parts of | ||
PathSeparator : parts' -> isMatch' startSegment parts' segs | ||
_ -> False | ||
|
||
isMatch' startSegment (Choice gs : parts) segs = | ||
any (\g -> isMatch' startSegment (g ++ parts) segs) gs | ||
isMatch' _ [] [""] = True | ||
isMatch' _ _ _ = False | ||
|
||
charListIsMatch :: [CharListPart] -> Char -> Bool | ||
charListIsMatch parts c = any (matches c) parts | ||
where | ||
matches x (CharLiteral y) = x == y | ||
matches x (Range start end) = start <= x && x <= end | ||
|
||
-- | A safe version of 'tail'. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah cool, didn't know that, thanks! Will do. |
||
drop1 :: String -> Maybe String | ||
drop1 [] = Nothing | ||
drop1 (_ : tl) = Just tl | ||
|
||
-- | Drop one character from a list of path segments, or if the first segment | ||
-- is empty, move on to the next segment. | ||
drop1' :: [String] -> Maybe (Bool, [String]) | ||
drop1' [] = Nothing | ||
drop1' ("" : segs) = Just (True, segs) | ||
drop1' (seg : segs) = Just (False, drop 1 seg : segs) | ||
|
||
-- | Generate a list of values obtained by repeatedly applying a function | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can just use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just tried this and it's a bit awkward with
In particular, I also can't see a way of rewriting the call sites of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, Rewriting the call sites is easy if you pass in a function of type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not quite - It's just occurred to me that That does still leave one call site of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sure, the less code, the better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, yeah, you're right, you need something like |
||
-- to an initial value, until it stops returning Just. | ||
iterateWhile :: (a -> Maybe a) -> a -> [a] | ||
iterateWhile f x = x : rest | ||
where | ||
rest = case f x of | ||
Just y -> iterateWhile f y | ||
Nothing -> [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
module Distribution.Utils.Glob.Parse where | ||
|
||
import Control.Monad | ||
( unless, liftM2 ) | ||
import Distribution.Compat.ReadP | ||
import Distribution.Utils.Glob.Type | ||
|
||
-- | We want to ensure this works the same way on all platforms, so we do not | ||
-- use System.FilePath here. | ||
-- | ||
-- Backslashes (like on Windows) may not be used as path separators, because | ||
-- they would significantly complicate the implementation for little benefit. | ||
pathSeparators :: [Char] | ||
pathSeparators = "/" | ||
|
||
charIsPathSeparator :: Char -> Bool | ||
charIsPathSeparator x = x `elem` pathSeparators | ||
|
||
-- | Characters which must not be parsed as literals if not escaped in glob | ||
-- patterns | ||
globSpecialChars :: [Char] | ||
globSpecialChars = pathSeparators ++ "\\{}*[]?!^," | ||
|
||
isSpecialChar :: Char -> Bool | ||
isSpecialChar x = x `elem` globSpecialChars | ||
|
||
-- | Characters which can occur at the start of a bracket pattern to transform | ||
-- it into its complement. | ||
bracketComplementors :: [Char] | ||
bracketComplementors = "^!" | ||
|
||
isBracketComplementor :: Char -> Bool | ||
isBracketComplementor x = x `elem` bracketComplementors | ||
|
||
-- | Characters which must not be parsed as literals if not escaped in bracket | ||
-- patterns. | ||
bracketSpecialChars :: [Char] | ||
bracketSpecialChars = bracketComplementors ++ "-[]\\/" | ||
|
||
isBracketSpecialChar :: Char -> Bool | ||
isBracketSpecialChar x = x `elem` bracketSpecialChars | ||
|
||
-- | Like manyTill, but always consumes at least one occurence of 'p'. | ||
manyTill1 :: ReadP r a -> ReadP [a] end -> ReadP r [a] | ||
manyTill1 p end = liftM2 (:) p (manyTill p end) | ||
|
||
-- | Parse an escape sequence. Anything is allowed, except a path separator. | ||
escapeSequence :: ReadP r Char | ||
escapeSequence = char '\\' >> satisfy (not . charIsPathSeparator) | ||
|
||
parseLiteral :: ReadP r GlobPart | ||
parseLiteral = fmap Literal $ manyTill1 literalSegment literalEnd | ||
where | ||
literalSegment = notSpecial +++ escapeSequence | ||
notSpecial = satisfy (not . isSpecialChar) | ||
literalEnd = do | ||
str <- look | ||
case str of | ||
(x:_) | isSpecialChar x -> return () | ||
"" -> return () | ||
_ -> pfail | ||
|
||
parsePathSeparator :: ReadP r GlobPart | ||
parsePathSeparator = munch1 (== '/') >> return PathSeparator | ||
|
||
parseCharList :: ReadP r GlobPart | ||
parseCharList = | ||
between (char '[') (char ']') | ||
(fmap CharList (many1 parseCharListPart)) | ||
|
||
parseCharListComplement :: ReadP r GlobPart | ||
parseCharListComplement = | ||
between (char '[') (char ']') | ||
(satisfy isBracketComplementor | ||
>> fmap CharListComplement (many1 parseCharListPart)) | ||
|
||
parseCharListPart :: ReadP r CharListPart | ||
parseCharListPart = range <++ fmap CharLiteral literal | ||
where | ||
range = do | ||
start <- literal | ||
_ <- char '-' | ||
end <- literal | ||
unless (start < end) pfail | ||
return (Range start end) | ||
|
||
literal = satisfy (not . isBracketSpecialChar) +++ escapeSequence | ||
|
||
parseWildOne :: ReadP r GlobPart | ||
parseWildOne = char '?' >> return WildOne | ||
|
||
-- | Parses either a WildMany or a WildManyRecursive. | ||
parseWildMany :: ReadP r GlobPart | ||
parseWildMany = do | ||
str <- munch1 (== '*') | ||
case str of | ||
"*" -> return WildMany | ||
"**" -> return WildManyRecursive | ||
_ -> pfail | ||
|
||
parseChoice :: ReadP r GlobPart | ||
parseChoice = | ||
between (char '{') (char '}') $ do | ||
first <- parseGlobParts | ||
_ <- char ',' | ||
rest <- sepBy1 (parseGlobParts <++ emptyGlob) (char ',') | ||
return (Choice (first : rest)) | ||
where | ||
emptyGlob = return [] | ||
|
||
parseGlobPart :: ReadP r GlobPart | ||
parseGlobPart = choice | ||
[ parseLiteral | ||
, parsePathSeparator | ||
, parseCharList | ||
, parseCharListComplement | ||
, parseWildOne | ||
, parseWildMany | ||
, parseChoice | ||
] | ||
|
||
parseGlobParts :: ReadP r [GlobPart] | ||
parseGlobParts = many1 parseGlobPart | ||
|
||
parseFileGlob :: String -> Maybe Glob | ||
parseFileGlob fp = | ||
case fullyParsed (readP_to_S parseGlobParts fp) of | ||
[parts] -> Just (mkGlob parts) | ||
_ -> Nothing | ||
where | ||
fullyParsed = map fst . filter (null . snd) | ||
mkGlob parts = | ||
case sequence (map asLiteral parts) of | ||
Just literalParts -> NoGlob (concat literalParts) | ||
Nothing -> Glob (RealGlob parts) | ||
|
||
asLiteral (Literal str) = Just str | ||
asLiteral (PathSeparator) = Just "/" | ||
asLiteral _ = Nothing |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should now be
UnitTests.Distribution.Utils.Glob
.