From 9178340b13d6582bd5c015ab33e2d10ef0b3ed91 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Wed, 29 Nov 2017 16:17:33 -0800 Subject: [PATCH 01/10] Initial draft of file enumeration design doc. --- accepted/file-enumeration.md | 238 +++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 accepted/file-enumeration.md diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md new file mode 100644 index 000000000..6192fb9cd --- /dev/null +++ b/accepted/file-enumeration.md @@ -0,0 +1,238 @@ +# Extensible File Enumeration + +**PM** [Immo Landwerth](https://github.com/terrajobst) | **Dev** [Jeremy Kuhne](https://github.com/jeremykuhne) + +Enumerating files in .NET provides limited configurability. You can specify a simple DOS style pattern and whether or not to look recursively. More complicated filtering requires post filtering all results which can introduce a significant performance drain. + +Recursive enumeration is also problematic in that there is no way to handle error states such as access issues or cycles created by links. + +These restrictions have a significant impact on file system intensive applications, a key example being MSBuild. We can address these restrictions and provide performant, configurable enumeration. + +## Scenarios and User Experience + +1. MSBuild can custom filter filesystem entries with limited allocations and form the results in any desired format. +2. Users can build custom enumerations utilizing completely custom or provided commonly used filters and transforms. + +## Requirements + +1. Filtering based on common file system data is possible + a. Name + b. Attributes + c. Time stamps + d. File size +2. Result transforms can be of any type +3. We provide common filters and transforms +4. Recursive behavior is configurable +5. Error handling is configurable + +### Goals + +1. API is cross platform generic +2. API minimizes allocations while meeting #1 + +### Non-Goals + +1. API will not expose platform specific data +3. Error handling configuration is fully customizable + +## Design + +### Proposed API surface + +``` C# +namespace System.IO +{ + /// + /// Delegate for filtering out find results. + /// + internal delegate bool FindPredicate(ref RawFindData findData, TState state); + + /// + /// Delegate for transforming raw find data into a result. + /// + internal delegate TResult FindTransform(ref RawFindData findData, TState state); + + [Flags] + public enum FindOptions + { + None = 0x0, + + // Enumerate subdirectories + Recurse = 0x1, + + // Skip files/directories when access is denied + IgnoreAccessDenied = 0x2, + + // Future: Add flags for tracking cycles, etc. + } + + public class FindEnumerable : CriticalFinalizerObject, IEnumerable, IEnumerator + { + public FindEnumerable( + string directory, + FindTransform transform, + FindPredicate predicate, + // Only used if FindOptions.Recurse is set. Default is to always recurse. + FindPredicate recursePredicate = null, + TState state = default, + FindOptions options = FindOptions.None) + } + + public static class Directory + { + public static IEnumerable Enumerate( + string path, + FindTransform transform, + FindPredicate predicate, + FindPredicate recursePredicate = null, + TState state = default, + FindOptions options = FindOptions.None); + } + + public static class DirectoryInfo + { + public static IEnumerable Enumerate( + FindTransform transform, + FindPredicate predicate, + FindPredicate recursePredicate = null, + TState state = default, + FindOptions options = FindOptions.None); + } + + /// + /// Used for processing and filtering find results. + /// + public ref struct RawFindData + { + // This will have private members that hold the native data and + // will lazily fill in data for properties where such data is not + // immediately available in the current platform's native results. + + // The full path to the directory the current result is in + public string Directory { get; } + + // The full path to the starting directory for enumeration + public string OriginalDirectory { get; } + + // The path to the starting directory as passed to the enumerable constructor + public string OriginalUserDirectory { get; } + + // Note: using a span allows us to reduce unneeded allocations + public ReadOnlySpan FileName { get; } + public FileAttributes Attributes { get; } + public long Length { get; } + + public DateTime CreationTimeUtc { get; } + public DateTime LastAccessTimeUtc { get; } + public DateTime LastWriteTimeUtc { get; } + } +} +``` + +### Transforms & Predicates + +We'll provide common predicates transforms for building searches. + +``` C# +namespace System.IO +{ + internal static partial class FindPredicates + { + internal static bool NotDotOrDotDot(ref RawFindData findData) + internal static bool IsDirectory(ref RawFindData findData) + } + + public static partial class FindTransforms + { + public static DirectoryInfo AsDirectoryInfo(ref RawFindData findData) + public static FileInfo AsFileInfo(ref RawFindData findData) + public static FileSystemInfo AsFileSystemInfo(ref RawFindData findData) + public static string AsFileName(ref RawFindData findData) + public static string AsFullPath(ref RawFindData findData) + } +} + +``` + +### DosMatcher + +We currently have an implementation of the algorithm used for matching files on Windows in FileSystemWatcher. Providing this publicly will allow consistently matching names cross platform according to Windows rules if such behavior is desired. + +``` C# +namespace System.IO +{ + public static class DosMatcher + { + /// + /// Change '*' and '?' to '<', '>' and '"' to match Win32 behavior. For compatibility, Windows + /// changes some wildcards to provide a closer match to historical DOS 8.3 filename matching. + /// + public unsafe static string TranslateExpression(string expression) + + /// + /// Return true if the given expression matches the given name. + /// + public unsafe static bool MatchPattern(string expression, ReadOnlySpan name, bool ignoreCase = true) + } +} +``` + +### Samples + +Getting full path of all files matching a given name pattern (close to what FindFiles does, but returning the full path): + +``` C# +public static FindEnumerable GetFiles(string directory, + string expression = "*", + bool recursive = false) +{ + return new FindEnumerable( + directory, + (ref RawFindData findData, string expr) => FindTransforms.AsFullPath(ref findData), + (ref RawFindData findData, string expr) => + { + return FindPredicates.NotDotOrDotDot(ref findData) + && !FindPredicates.IsDirectory(ref findData) + && DosMatcher.MatchPattern(expr, findData.FileName, ignoreCase: true); + }, + state: DosMatcher.TranslateExpression(expression), + options: recursive ? FindOptions.Recurse : FindOptions.None); +} + +``` + +### Existing API summary + +``` C# +namespace System.IO +{ + public static class Directory + { + public static IEnumerable EnumerateDirectories(string path, string searchPattern, SearchOption searchOption); + public static IEnumerable EnumerateFiles(string path, string searchPattern, SearchOption searchOption); + public static IEnumerable EnumerateFileSystemEntries(string path, string searchPattern, SearchOption searchOption); + public static string[] GetDirectories(string path, string searchPattern, SearchOption searchOption); + public static string[] GetFiles(string path, string searchPattern, SearchOption searchOption); + public static string[] GetFileSystemEntries(string path, string searchPattern, SearchOption searchOption); + } + + public sealed class DirectoryInfo : FileSystemInfo + { + public IEnumerable EnumerateDirectories(string searchPattern, SearchOption searchOption); + public IEnumerable EnumerateFiles(string searchPattern, SearchOption searchOption); + public IEnumerable EnumerateFileSystemInfos(string searchPattern, SearchOption searchOption); + public DirectoryInfo[] GetDirectories(string searchPattern, SearchOption searchOption); + public FileInfo[] GetFiles(string searchPattern, SearchOption searchOption); + public FileSystemInfo[] GetFileSystemInfos(string searchPattern, SearchOption searchOption); + } + + public enum SearchOption + { + AllDirectories, + TopDirectoryOnly + } +} +``` + + +## Q & A \ No newline at end of file From fea0bb229a4cf73b9a84cda050c65178397f2ed6 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Tue, 5 Dec 2017 11:03:25 -0800 Subject: [PATCH 02/10] Updates per feedback More to be made, pushing what I have to save progress. --- accepted/file-enumeration.md | 44 ++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 6192fb9cd..f5390022a 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -6,7 +6,7 @@ Enumerating files in .NET provides limited configurability. You can specify a si Recursive enumeration is also problematic in that there is no way to handle error states such as access issues or cycles created by links. -These restrictions have a significant impact on file system intensive applications, a key example being MSBuild. We can address these restrictions and provide performant, configurable enumeration. +These restrictions have a significant impact on file system intensive applications, a key example being MSBuild. This document proposes a new set of primitive file and directory traversal APIs that are optimized for providing more flexibility while keeping the overhead to a minimum so that enumeration becomes both more powerful as well as more performant. ## Scenarios and User Experience @@ -15,20 +15,31 @@ These restrictions have a significant impact on file system intensive applicatio ## Requirements -1. Filtering based on common file system data is possible - a. Name - b. Attributes - c. Time stamps - d. File size -2. Result transforms can be of any type -3. We provide common filters and transforms -4. Recursive behavior is configurable -5. Error handling is configurable ### Goals -1. API is cross platform generic -2. API minimizes allocations while meeting #1 + +1. Custom filtering based on common file system data + - Name + - Attributes + - Time stamps + - File size +2. Result transforms can be of any desired output type + - Like Linq Select(), but keeps FileData on the stack +3. API minimizes allocations +4. API is cross platform abstract +3. We provide common filters and transforms + - To file/directory name + - To full path + - To File/Directory/FileSystemInfo + - DOS style filters (Legacy- `*/?` with DOS semantics, e.g. `*.` is all files without an extension) + - Simple Regex filter + - Simpler globbing (`*/?` without DOS style variants) + - Set of extensions (`*.c`, `*.cpp`, `*.cc`, `*.cxx`, etc.) +4. Recursive behavior is configurable + - On/Off + - Predicate based on FileData +5. Can avoid throwing access denied exceptions ### Non-Goals @@ -45,12 +56,12 @@ namespace System.IO /// /// Delegate for filtering out find results. /// - internal delegate bool FindPredicate(ref RawFindData findData, TState state); + public delegate bool FindPredicate(ref RawFindData findData, TState state); /// /// Delegate for transforming raw find data into a result. /// - internal delegate TResult FindTransform(ref RawFindData findData, TState state); + public delegate TResult FindTransform(ref RawFindData findData, TState state); [Flags] public enum FindOptions @@ -89,7 +100,7 @@ namespace System.IO FindOptions options = FindOptions.None); } - public static class DirectoryInfo + public class DirectoryInfo { public static IEnumerable Enumerate( FindTransform transform, @@ -191,8 +202,7 @@ public static FindEnumerable GetFiles(string directory, (ref RawFindData findData, string expr) => FindTransforms.AsFullPath(ref findData), (ref RawFindData findData, string expr) => { - return FindPredicates.NotDotOrDotDot(ref findData) - && !FindPredicates.IsDirectory(ref findData) + return !FindPredicates.IsDirectory(ref findData) && DosMatcher.MatchPattern(expr, findData.FileName, ignoreCase: true); }, state: DosMatcher.TranslateExpression(expression), From 32f64408b4bd5c9f34ce61035d16444131675acf Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Wed, 6 Dec 2017 15:10:15 -0800 Subject: [PATCH 03/10] Rewrite scenarios and goals/non goals --- accepted/file-enumeration.md | 178 ++++++++++++++++++++++++----------- 1 file changed, 124 insertions(+), 54 deletions(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index f5390022a..186eb7181 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -10,41 +10,131 @@ These restrictions have a significant impact on file system intensive applicatio ## Scenarios and User Experience -1. MSBuild can custom filter filesystem entries with limited allocations and form the results in any desired format. -2. Users can build custom enumerations utilizing completely custom or provided commonly used filters and transforms. +### Getting full paths + +To get full paths for files, one would currently have to do the following: + +``` +public static IEnumerable GetFileFullPaths(string directory, + string expression = "*", + bool recursive = false) +{ + return new DirectoryInfo(directory) + .GetFiles(expression, recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly) + .Select(r => r.FullName); +} + +``` + +While not complicated to write, it allocates far more than it needs to. A `FileInfo` will be allocated for every result, even though it strictly isn't needed. A new, lower allocating, full-path wrapper could be written as follows: + +``` C# +public static IEnumerable GetFileFullPaths(string directory, + string expression = "*", + bool recursive = false) +{ + return new FindEnumerable( + directory, + (ref FindData findData) => FindTransforms.AsUserFullPath(ref findData), + (ref FindData findData) => + { + return !FindPredicates.IsDirectory(ref findData) + && DosMatcher.MatchPattern(findData.State, findData.FileName, ignoreCase: true); + }, + state = DosMatcher.TranslateExpression(expression), + recursive = recursive); +} + +``` + +While using anonymous delegates isn't strictly necessary, they are cached, so all examples are written with them. The second argument above, for example, could simply be `FindTransforms.AsUserFullPath`, but it wouldn't take advantage of delegate caching. + + +### Getting full paths of a given extension + +Again, here is the current way to do this: + +``` C# +public static IEnumerable GetFilePathsWithExtensions(string directory, bool recursive, params string[] extensions) +{ + return new DirectoryInfo(directory) + .GetFiles("*", recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly) + .Where(f => extensions.Any(e => f.Name.EndsWith(e, StringComparison.OrdinalIgnoreCase))) + .Select(r => r.FullName); +} +``` + +Again, not complicated to write, but this can do an enormous amount of extra allocations. You have to create full strings and FileInfo for every single item in the file system. We can cut this down significantly with the extension point: + +``` C# +public static IEnumerable GetFileFullPathsWithExtension(string directory, + bool recursive, params string[] extensions) +{ + return new FindEnumerable( + directory, + (ref FindData findData) => FindTransforms.AsUserFullPath(ref findData), + (ref FindData findData) => + { + return !FindPredicates.IsDirectory(ref findData) + && findData.State.Any(s => findData.FileName.EndsWith(s, StringComparison.OrdinalIgnoreCase)); + }, + state = extensions, + recursive = recursive); +} +``` + +The number of allocation reductions with the above solution is significant. + +- No `FileInfo` allocations +- No fullpath string allocations for paths that don't match +- No filename allocations for paths that don't match (as the filename will still be in the native buffer at this point) ## Requirements ### Goals - -1. Custom filtering based on common file system data +The key goal is to provide an advanced API that keeps allocations to a minimum by allowing access to enumeration internals. We're willing to sacrifice some usability to achieve this- but will keep the complexity in-line with similar `Span` based APIs, such as `String.Create()`. + +1. Keep allocations to a minimum + - Per result allocations are the priority + - Initial allocations should also be minimized, but not at the cost of per-result +2. Considers filter/transform method call overhead for large sets + - Delegates appear to be a better solution than using Interfaces +3. Filtering can be done on existing FileSystemInfo exposed data - Name - Attributes - Time stamps - File size -2. Result transforms can be of any desired output type +4. Transforms can provide results in any type - Like Linq Select(), but keeps FileData on the stack -3. API minimizes allocations -4. API is cross platform abstract -3. We provide common filters and transforms - - To file/directory name - - To full path - - To File/Directory/FileSystemInfo - - DOS style filters (Legacy- `*/?` with DOS semantics, e.g. `*.` is all files without an extension) - - Simple Regex filter - - Simpler globbing (`*/?` without DOS style variants) - - Set of extensions (`*.c`, `*.cpp`, `*.cc`, `*.cxx`, etc.) +5. Filters and transforms will be provided + - Criteria considered for inclusion + - Expected to be commonly used + - Can be optimized by being part of the platform + - Via access to internals + - Can abstract away expected future improvements (e.g. adding Span support to Regex) + - Expected initial transforms + - To file/directory name + - To full path + - To File/Directory/FileSystemInfo + - Expected initial filters + - (Pri0) DOS style filters (Legacy- `*/?` with DOS semantics, e.g. `*.` is all files without an extension) + - (Pri1) Simple Regex filter (e.g. IsMatch()) + - (Pri2) Simpler globbing (`*/?` without DOS style variants) + - (Pri2) Set of extensions (`*.c`, `*.cpp`, `*.cc`, `*.cxx`, etc.) 4. Recursive behavior is configurable - - On/Off + - On/Off via flag - Predicate based on FileData -5. Can avoid throwing access denied exceptions +5. Can avoid throwing access denied / security exceptions via Options flags +6. Can hint buffer allocation size via flag ### Non-Goals -1. API will not expose platform specific data -3. Error handling configuration is fully customizable +1. This will not replace existing APIs- this is an advanced way for people to write performance solutions +2. We will not provide a way to plug in custom platform APIs to provide the FileData +2. We will not expose platform specific data in this release +3. We will not expose raw errors for filtering ## Design @@ -56,12 +146,12 @@ namespace System.IO /// /// Delegate for filtering out find results. /// - public delegate bool FindPredicate(ref RawFindData findData, TState state); + public delegate bool FindPredicate(ref FindData findData, TState state); /// /// Delegate for transforming raw find data into a result. /// - public delegate TResult FindTransform(ref RawFindData findData, TState state); + public delegate TResult FindTransform(ref FindData findData, TState state); [Flags] public enum FindOptions @@ -71,8 +161,11 @@ namespace System.IO // Enumerate subdirectories Recurse = 0x1, - // Skip files/directories when access is denied - IgnoreAccessDenied = 0x2, + // Skip files/directories when access is denied (e.g. AccessDeniedException/SecurityException) + IgnoreInaccessable = 0x2, + + // Hint to use larger buffers for getting data (notably to help address remote enumeration perf) + UseLargeBuffer = 0x4 // Future: Add flags for tracking cycles, etc. } @@ -113,7 +206,7 @@ namespace System.IO /// /// Used for processing and filtering find results. /// - public ref struct RawFindData + public ref struct FindData { // This will have private members that hold the native data and // will lazily fill in data for properties where such data is not @@ -149,17 +242,17 @@ namespace System.IO { internal static partial class FindPredicates { - internal static bool NotDotOrDotDot(ref RawFindData findData) - internal static bool IsDirectory(ref RawFindData findData) + internal static bool NotDotOrDotDot(ref FindData findData) + internal static bool IsDirectory(ref FindData findData) } public static partial class FindTransforms { - public static DirectoryInfo AsDirectoryInfo(ref RawFindData findData) - public static FileInfo AsFileInfo(ref RawFindData findData) - public static FileSystemInfo AsFileSystemInfo(ref RawFindData findData) - public static string AsFileName(ref RawFindData findData) - public static string AsFullPath(ref RawFindData findData) + public static DirectoryInfo AsDirectoryInfo(ref FindData findData) + public static FileInfo AsFileInfo(ref FindData findData) + public static FileSystemInfo AsFileSystemInfo(ref FindData findData) + public static string AsFileName(ref FindData findData) + public static string AsFullPath(ref FindData findData) } } @@ -188,29 +281,6 @@ namespace System.IO } ``` -### Samples - -Getting full path of all files matching a given name pattern (close to what FindFiles does, but returning the full path): - -``` C# -public static FindEnumerable GetFiles(string directory, - string expression = "*", - bool recursive = false) -{ - return new FindEnumerable( - directory, - (ref RawFindData findData, string expr) => FindTransforms.AsFullPath(ref findData), - (ref RawFindData findData, string expr) => - { - return !FindPredicates.IsDirectory(ref findData) - && DosMatcher.MatchPattern(expr, findData.FileName, ignoreCase: true); - }, - state: DosMatcher.TranslateExpression(expression), - options: recursive ? FindOptions.Recurse : FindOptions.None); -} - -``` - ### Existing API summary ``` C# From cb3288b50262263b76d71a468a2f63206adc5242 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Wed, 6 Dec 2017 15:42:26 -0800 Subject: [PATCH 04/10] Add a flag for skipping locking Clean up flag goals a bit. --- accepted/file-enumeration.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 186eb7181..0fa1c18ee 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -123,11 +123,14 @@ The key goal is to provide an advanced API that keeps allocations to a minimum b - (Pri1) Simple Regex filter (e.g. IsMatch()) - (Pri2) Simpler globbing (`*/?` without DOS style variants) - (Pri2) Set of extensions (`*.c`, `*.cpp`, `*.cc`, `*.cxx`, etc.) -4. Recursive behavior is configurable +6. Recursive behavior is configurable - On/Off via flag - Predicate based on FileData -5. Can avoid throwing access denied / security exceptions via Options flags -6. Can hint buffer allocation size via flag +7. Behavior flags + 1. Recurse + 2. Ignore inaccessible files (e.g. no rights) + 3. Hint to use larger buffers (for remote access) + 4. Optimization to skip locking for single-thread enumeration ### Non-Goals @@ -165,7 +168,13 @@ namespace System.IO IgnoreInaccessable = 0x2, // Hint to use larger buffers for getting data (notably to help address remote enumeration perf) - UseLargeBuffer = 0x4 + UseLargeBuffer = 0x4, + + // Allow .NET to skip locking if you know the enumerator won't be used on multiple threads + // (Enumerating is inherently not thread-safe, but .NET needs to still lock by default to + // avoid access violations with native access should someone actually try to use the + // the same enumerator on multiple threads.) + NoLocking = 0x8 // Future: Add flags for tracking cycles, etc. } From 76ae3d59e3d34937069fa217e5d4053a0b4ac6f9 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Thu, 7 Dec 2017 14:14:04 -0800 Subject: [PATCH 05/10] Minor cleanup --- accepted/file-enumeration.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 0fa1c18ee..8dacf6f48 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -72,8 +72,8 @@ public static IEnumerable GetFileFullPathsWithExtension(string directory { return new FindEnumerable( directory, - (ref FindData findData) => FindTransforms.AsUserFullPath(ref findData), - (ref FindData findData) => + (ref FindData findData) => FindTransforms.AsUserFullPath(ref findData), + (ref FindData findData) => { return !FindPredicates.IsDirectory(ref findData) && findData.State.Any(s => findData.FileName.EndsWith(s, StringComparison.OrdinalIgnoreCase)); @@ -105,7 +105,7 @@ The key goal is to provide an advanced API that keeps allocations to a minimum b - Name - Attributes - Time stamps - - File size + - File size 4. Transforms can provide results in any type - Like Linq Select(), but keeps FileData on the stack 5. Filters and transforms will be provided @@ -167,14 +167,14 @@ namespace System.IO // Skip files/directories when access is denied (e.g. AccessDeniedException/SecurityException) IgnoreInaccessable = 0x2, - // Hint to use larger buffers for getting data (notably to help address remote enumeration perf) - UseLargeBuffer = 0x4, + // Hint to use larger buffers for getting data (notably to help address remote enumeration perf) + UseLargeBuffer = 0x4, - // Allow .NET to skip locking if you know the enumerator won't be used on multiple threads - // (Enumerating is inherently not thread-safe, but .NET needs to still lock by default to - // avoid access violations with native access should someone actually try to use the - // the same enumerator on multiple threads.) - NoLocking = 0x8 + // Allow .NET to skip locking if you know the enumerator won't be used on multiple threads + // (Enumerating is inherently not thread-safe, but .NET needs to still lock by default to + // avoid access violations with native access should someone actually try to use the + // the same enumerator on multiple threads.) + NoLocking = 0x8 // Future: Add flags for tracking cycles, etc. } @@ -197,7 +197,7 @@ namespace System.IO string path, FindTransform transform, FindPredicate predicate, - FindPredicate recursePredicate = null, + FindPredicate recursePredicate = null, TState state = default, FindOptions options = FindOptions.None); } @@ -207,7 +207,7 @@ namespace System.IO public static IEnumerable Enumerate( FindTransform transform, FindPredicate predicate, - FindPredicate recursePredicate = null, + FindPredicate recursePredicate = null, TState state = default, FindOptions options = FindOptions.None); } @@ -221,7 +221,7 @@ namespace System.IO // will lazily fill in data for properties where such data is not // immediately available in the current platform's native results. - // The full path to the directory the current result is in + // The full path to the directory the current result is in public string Directory { get; } // The full path to the starting directory for enumeration From 7edad28285702884015b633d0d99c847a634bea6 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Thu, 7 Dec 2017 17:06:31 -0800 Subject: [PATCH 06/10] Some Q&A --- accepted/file-enumeration.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 8dacf6f48..943f5ac39 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -324,4 +324,17 @@ namespace System.IO ``` -## Q & A \ No newline at end of file +## Q & A + +#### Why aren't you providing a filter that does _`X`_? + +We want to only provide pre-made filters that have broad applicability. Based on feedback we can and will consider adding new filters in the future. + +#### Why did you put data in the struct that is expensive to get on Unix? + +While Windows gives all of the data you see in `FindData` in a single enumeration call, this isn't true for Unix. We're trying to match the current `System.IO.*Info` class data, not the intersection of OS call data. We will lazily get the extra data (time stamps, for example) on Unix to avoid unnecessarily taxing solutions that don't need it. + +#### Why doesn’t the filename data type match the OS format? + +We believe that it is easier and more predictable to write cross-plat solutions based on `char` rather than have to deal directly with encoding. The current plan is that we'll optimize here by not converting from UTF-8 until/if needed and we'll also minimize/eliminate GC impact by keeping the converted data on the stack or in a cached array. + From 295198b2fa0f33e91e399a92a33a9e0954da1af6 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Fri, 8 Dec 2017 15:27:52 -0800 Subject: [PATCH 07/10] Missing on FindData struct. --- accepted/file-enumeration.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 943f5ac39..914880c9b 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -215,7 +215,7 @@ namespace System.IO /// /// Used for processing and filtering find results. /// - public ref struct FindData + public ref struct FindData { // This will have private members that hold the native data and // will lazily fill in data for properties where such data is not @@ -238,6 +238,8 @@ namespace System.IO public DateTime CreationTimeUtc { get; } public DateTime LastAccessTimeUtc { get; } public DateTime LastWriteTimeUtc { get; } + + public TState State; } } ``` From 9a010fc2cad4f52bf25e7100ffd4054be6075cd9 Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Sun, 10 Dec 2017 16:12:01 -0800 Subject: [PATCH 08/10] Add testable FindData variant Lay out a potential variant for FindData to allow unit testing. --- accepted/file-enumeration.md | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 914880c9b..0ca53522c 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -292,6 +292,49 @@ namespace System.IO } ``` +### Potential testability variant + +Having `FindData` be a struct is critical to meet the performance goals for the feature. To allow testing of Filters and Predicates we could also expose an interface of `IFindData`: + +``` C# +namespace System.IO +{ + public interface IFindData + { + string Directory { get; } + string OriginalDirectory{ get; } + string OriginalUserDirectory { get; } + TState State { get; } + + ReadOnlySpan FileName { get; } + FileAttributes Attributes { get; } + long Length { get; } + + DateTime CreationTimeUtc { get; } + DateTime LastAccessTimeUtc { get; } + DateTime LastWriteTimeUtc { get; } + } + + public unsafe struct FindData : IFindData + // ... same as defined earlier +} +``` + +Predicates and transforms could then be modified as in the following example: + +```C# +namespace System.IO +{ + public static partial class FindTransforms + { + public static DirectoryInfo AsDirectoryInfo(ref TFindData findData) + where TFindData : struct, IFindData + } +} +``` + +This adds a bit more syntactical complexity, but should allow testing transforms and predicates against mocked IFindData structs. + ### Existing API summary ``` C# From 9bdf78a136933f7fee49ddc1f0ed2bb87169eefe Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Mon, 11 Dec 2017 16:42:01 -0800 Subject: [PATCH 09/10] Move future discussion to it's own header Also add some more details for future thoughts. --- accepted/file-enumeration.md | 138 ++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 41 deletions(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 0ca53522c..4383ac9be 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -14,7 +14,7 @@ These restrictions have a significant impact on file system intensive applicatio To get full paths for files, one would currently have to do the following: -``` +``` C# public static IEnumerable GetFileFullPaths(string directory, string expression = "*", bool recursive = false) @@ -292,9 +292,76 @@ namespace System.IO } ``` -### Potential testability variant +### Existing API summary + +``` C# +namespace System.IO +{ + public static class Directory + { + public static IEnumerable EnumerateDirectories(string path, string searchPattern, SearchOption searchOption); + public static IEnumerable EnumerateFiles(string path, string searchPattern, SearchOption searchOption); + public static IEnumerable EnumerateFileSystemEntries(string path, string searchPattern, SearchOption searchOption); + public static string[] GetDirectories(string path, string searchPattern, SearchOption searchOption); + public static string[] GetFiles(string path, string searchPattern, SearchOption searchOption); + public static string[] GetFileSystemEntries(string path, string searchPattern, SearchOption searchOption); + } + + public sealed class DirectoryInfo : FileSystemInfo + { + public IEnumerable EnumerateDirectories(string searchPattern, SearchOption searchOption); + public IEnumerable EnumerateFiles(string searchPattern, SearchOption searchOption); + public IEnumerable EnumerateFileSystemInfos(string searchPattern, SearchOption searchOption); + public DirectoryInfo[] GetDirectories(string searchPattern, SearchOption searchOption); + public FileInfo[] GetFiles(string searchPattern, SearchOption searchOption); + public FileSystemInfo[] GetFileSystemInfos(string searchPattern, SearchOption searchOption); + } + + public enum SearchOption + { + AllDirectories, + TopDirectoryOnly + } +} +``` + + +## Q & A + +#### Why aren't you providing a filter that does _`X`_? + +We want to only provide pre-made filters that have broad applicability. Based on feedback we can and will consider adding new filters in the future. + +#### Why did you put data in the struct that is expensive to get on Unix? + +While Windows gives all of the data you see in `FindData` in a single enumeration call, this isn't true for Unix. We're trying to match the current `System.IO.*Info` class data, not the intersection of OS call data. We will lazily get the extra data (time stamps, for example) on Unix to avoid unnecessarily taxing solutions that don't need it. + +#### Why doesn’t the filename data type match the OS format? + +We believe that it is easier and more predictable to write cross-plat solutions based on `char` rather than have to deal directly with encoding. The current plan is that we'll optimize here by not converting from UTF-8 until/if needed and we'll also minimize/eliminate GC impact by keeping the converted data on the stack or in a cached array. + +#### How do I get platform specific data? + +This is something we're investigating for the future. See discussions below. + +## Future + +### Native Data Access + +Right now we provide a single cross platform view. Some scenario might benefit from viewing OS specific data, such as the `inode` or the underlying UTF-8 data. A few options: + +1. Provide interfaces for platform specific data on `FindData`, such as `UnixFindData`. +2. Make raw OS data structs public and provide a method to get the data: `bool TryGetNativeData(ref T nativeData)` -Having `FindData` be a struct is critical to meet the performance goals for the feature. To allow testing of Filters and Predicates we could also expose an interface of `IFindData`: +Related to this, we could potentially provide `FindOptions` flags to specify exactly what raw data we get. On Windows, for example, there are a number of different data structs that can be returned from [NtQueryDirectoryFile](https://msdn.microsoft.com/en-us/library/windows/hardware/ff567047.aspx). We could abstract this away. If we went this route we'd need to carefully measure performance impact of having conditions on our `FindData` properties. + +### Testability + +Having `FindData` be a struct is critical to meet the performance goals for the feature. Using a struct and keeping the performance goals met makes unit testing filters and predicates a little difficult. + +#### Interface Testability Option + +One option to allow testing of Filters and Predicates we could also expose an interface of `IFindData`: ``` C# namespace System.IO @@ -305,24 +372,22 @@ namespace System.IO string OriginalDirectory{ get; } string OriginalUserDirectory { get; } TState State { get; } - ReadOnlySpan FileName { get; } FileAttributes Attributes { get; } long Length { get; } - DateTime CreationTimeUtc { get; } DateTime LastAccessTimeUtc { get; } DateTime LastWriteTimeUtc { get; } } public unsafe struct FindData : IFindData - // ... same as defined earlier + // ... same as defined in the main proposal } ``` Predicates and transforms could then be modified as in the following example: -```C# +``` C# namespace System.IO { public static partial class FindTransforms @@ -333,53 +398,44 @@ namespace System.IO } ``` -This adds a bit more syntactical complexity, but should allow testing transforms and predicates against mocked IFindData structs. +This adds a fair bit more syntactical complexity, but should allow testing transforms and predicates against mocked IFindData structs. -### Existing API summary +#### Constructor Testability Option + +Another option for testability would involve providing a constructor that takes either the fields or perhaps an interface as in the interface option above: ``` C# namespace System.IO { - public static class Directory + public interface IFindData { - public static IEnumerable EnumerateDirectories(string path, string searchPattern, SearchOption searchOption); - public static IEnumerable EnumerateFiles(string path, string searchPattern, SearchOption searchOption); - public static IEnumerable EnumerateFileSystemEntries(string path, string searchPattern, SearchOption searchOption); - public static string[] GetDirectories(string path, string searchPattern, SearchOption searchOption); - public static string[] GetFiles(string path, string searchPattern, SearchOption searchOption); - public static string[] GetFileSystemEntries(string path, string searchPattern, SearchOption searchOption); + string Directory { get; } + string OriginalDirectory{ get; } + string OriginalUserDirectory { get; } + TState State { get; } + ReadOnlySpan FileName { get; } + FileAttributes Attributes { get; } + long Length { get; } + DateTime CreationTimeUtc { get; } + DateTime LastAccessTimeUtc { get; } + DateTime LastWriteTimeUtc { get; } } - public sealed class DirectoryInfo : FileSystemInfo + public unsafe struct FindData // : IFindData <- Possibly could still do this but not use for our APIs { - public IEnumerable EnumerateDirectories(string searchPattern, SearchOption searchOption); - public IEnumerable EnumerateFiles(string searchPattern, SearchOption searchOption); - public IEnumerable EnumerateFileSystemInfos(string searchPattern, SearchOption searchOption); - public DirectoryInfo[] GetDirectories(string searchPattern, SearchOption searchOption); - public FileInfo[] GetFiles(string searchPattern, SearchOption searchOption); - public FileSystemInfo[] GetFileSystemInfos(string searchPattern, SearchOption searchOption); + public FindData(IFindData data); + // ... same as defined in the main proposal } - public enum SearchOption + // *** OR *** + + public unsafe struct FindData { - AllDirectories, - TopDirectoryOnly + public FindData(string directory, string originalDirectory /*, etc */); + // ... same as defined in the main proposal } + } ``` - -## Q & A - -#### Why aren't you providing a filter that does _`X`_? - -We want to only provide pre-made filters that have broad applicability. Based on feedback we can and will consider adding new filters in the future. - -#### Why did you put data in the struct that is expensive to get on Unix? - -While Windows gives all of the data you see in `FindData` in a single enumeration call, this isn't true for Unix. We're trying to match the current `System.IO.*Info` class data, not the intersection of OS call data. We will lazily get the extra data (time stamps, for example) on Unix to avoid unnecessarily taxing solutions that don't need it. - -#### Why doesn’t the filename data type match the OS format? - -We believe that it is easier and more predictable to write cross-plat solutions based on `char` rather than have to deal directly with encoding. The current plan is that we'll optimize here by not converting from UTF-8 until/if needed and we'll also minimize/eliminate GC impact by keeping the converted data on the stack or in a cached array. - +These routes would require adding some overhead to check for the "mock" data on every property call. Perf impact would need to be measured. \ No newline at end of file From 50271fbb1284fbeff16ef0baf7e34d519ec789bc Mon Sep 17 00:00:00 2001 From: Jeremy Kuhne Date: Tue, 12 Dec 2017 12:40:28 -0800 Subject: [PATCH 10/10] Add more Q&A --- accepted/file-enumeration.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/accepted/file-enumeration.md b/accepted/file-enumeration.md index 4383ac9be..0c3b672b2 100644 --- a/accepted/file-enumeration.md +++ b/accepted/file-enumeration.md @@ -49,6 +49,8 @@ public static IEnumerable GetFileFullPaths(string directory, While using anonymous delegates isn't strictly necessary, they are cached, so all examples are written with them. The second argument above, for example, could simply be `FindTransforms.AsUserFullPath`, but it wouldn't take advantage of delegate caching. +> Note that while you can write a solution that doesn't allocate a `FileInfo` by using the `string[]` APIs and `GetFullPath()` it would still allocate unneeded strings and introduce costly normalization overhead. + ### Getting full paths of a given extension @@ -328,6 +330,19 @@ namespace System.IO ## Q & A +#### Why is this so complicated? + +This isn't intended to be a common-use API. The existing APIs will be kept, maintained, and extended based on customer demand. We don't want to: + +1. Have scenarios blocked waiting for new APIs to work their way through the system +2. Have to write "normal" APIs to address more corner cases + +In order to make this a usable extension point we have to sacrifice some usability to get the neccessary characteristics. Note that what people build on this will directly impact our future designs of the standard, usability focused, APIs. + +#### Why are you using Linq in your examples? + +For example clarity. Clearly if we were trying to fully optimize for perf/allocations we wouldn't. + #### Why aren't you providing a filter that does _`X`_? We want to only provide pre-made filters that have broad applicability. Based on feedback we can and will consider adding new filters in the future. @@ -351,7 +366,7 @@ This is something we're investigating for the future. See discussions below. Right now we provide a single cross platform view. Some scenario might benefit from viewing OS specific data, such as the `inode` or the underlying UTF-8 data. A few options: 1. Provide interfaces for platform specific data on `FindData`, such as `UnixFindData`. -2. Make raw OS data structs public and provide a method to get the data: `bool TryGetNativeData(ref T nativeData)` +2. Make raw OS data structs public and provide a method to get the data: `bool TryGetNativeData(ref T nativeData, ref FindData findData)` Related to this, we could potentially provide `FindOptions` flags to specify exactly what raw data we get. On Windows, for example, there are a number of different data structs that can be returned from [NtQueryDirectoryFile](https://msdn.microsoft.com/en-us/library/windows/hardware/ff567047.aspx). We could abstract this away. If we went this route we'd need to carefully measure performance impact of having conditions on our `FindData` properties.