From 7a61e69b2eef40a97a7b8d8e62698490e90481c9 Mon Sep 17 00:00:00 2001 From: Rushi Patel <84736613+rnpmsft@users.noreply.github.com> Date: Tue, 8 Jun 2021 14:30:44 -0400 Subject: [PATCH] Initial creation of filesystem scanner for DMLib (#21492) --- .../CHANGELOG.md | 2 +- .../README.md | 4 + .../Azure.Storage.Blobs.DataMovement.csproj | 2 +- ...re.Storage.Blobs.DataMovement.Tests.csproj | 17 +- .../CHANGELOG.md | 2 +- .../README.md | 4 + .../Azure.Storage.Common.DataMovement.csproj | 2 +- .../src/FilesystemScanner.cs | 114 ++++++++++ ...e.Storage.Common.DataMovement.Tests.csproj | 17 +- .../tests/DataMovementTestBase.cs | 3 +- .../tests/FilesystemScannerTests.cs | 205 ++++++++++++++++++ 11 files changed, 350 insertions(+), 22 deletions(-) create mode 100644 sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs create mode 100644 sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md b/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md index ca4624cb29e41..08895a1208a3d 100644 --- a/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md +++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md @@ -1,6 +1,6 @@ # Release History -## 3.0.0-preview.1 (Unreleased) +## 12.0.0-preview.1 (Unreleased) This preview is the first release of a ground-up rewrite of our client libraries to ensure consistency, idiomatic design, productivity, and an excellent developer experience. It was created following the Azure SDK Design diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md b/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md index 135b1537e382b..127ff79ef5080 100644 --- a/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md +++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md @@ -32,6 +32,10 @@ Here's an example using the Azure CLI: az storage account create --name MyStorageAccount --resource-group MyResourceGroup --location westus --sku Standard_LRS ``` +### Authenticate the Client + +Authentication works the same as in [Azure.Storage.Blobs][authenticating_with_blobs]. + ## Key concepts Blob storage is designed for: diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj b/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj index eb3b833ad3fd6..321dbc85456eb 100644 --- a/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj +++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj @@ -4,7 +4,7 @@ Microsoft Azure.Storage.DataMovement.Blobs client library - 0.1.0-beta.1 + 12.0.0-preview.1 BlobSDK;$(DefineConstants) Microsoft Azure Storage Blobs DataMovement;Microsoft;Azure;Blobs;Blob;Storage;DataMovement;StorageScalable;$(PackageCommonTags) diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj b/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj index d648ff83d5920..4cbf5807cae68 100644 --- a/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj +++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj @@ -1,20 +1,19 @@  - $(RequiredTargetFrameworks) Microsoft Azure.Storage.Common.DataMovement client library tests false - Azure.Storage.Tests - - + - - - - - + + + + PreserveNewest + + + \ No newline at end of file diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md b/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md index ca4624cb29e41..08895a1208a3d 100644 --- a/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md +++ b/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md @@ -1,6 +1,6 @@ # Release History -## 3.0.0-preview.1 (Unreleased) +## 12.0.0-preview.1 (Unreleased) This preview is the first release of a ground-up rewrite of our client libraries to ensure consistency, idiomatic design, productivity, and an excellent developer experience. It was created following the Azure SDK Design diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/README.md b/sdk/storage/Azure.Storage.Common.DataMovement/README.md index ce678ceec9985..8b53fc9837b51 100644 --- a/sdk/storage/Azure.Storage.Common.DataMovement/README.md +++ b/sdk/storage/Azure.Storage.Common.DataMovement/README.md @@ -38,6 +38,10 @@ Here's an example using the Azure CLI: az storage account create --name MyStorageAccount --resource-group MyResourceGroup --location westus --sku Standard_LRS ``` +### Authenticate the Client + +Authentication details to be written. + ## Key concepts The Azure Storage Common client library contains shared infrastructure like diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj b/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj index 0050f2c455b84..7e2635ffe3aa8 100644 --- a/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj +++ b/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj @@ -4,7 +4,7 @@ Microsoft Azure.Storage.Common.DataMovement client library - 0.1.0-beta.1 + 12.0.0-preview.1 CommonSDK;$(DefineConstants) Microsoft Azure Storage Common DataMovement, Microsoft, Azure, StorageScalable, azureofficial diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs b/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs new file mode 100644 index 0000000000000..7364c49213716 --- /dev/null +++ b/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Azure.Storage.Common.DataMovement +{ + /// + /// FilesystemScanner class. + /// + public static class FilesystemScanner + { + /// + /// Enumerates all files pointed to by the provided path, including those in subdirectories (if path is a directory). + /// + /// Filesystem location. + /// Enumerable list of absolute paths containing all relevant files the user has permission to access. + public static IEnumerable ScanLocation(string path) + { + // Path type is ambiguous at start + bool isDirectory = false; + + try + { + // Make sure we're dealing with absolute, well-formatted path + path = Path.GetFullPath(path); + + // Check if path points to a directory + if ((File.GetAttributes(path) & FileAttributes.Directory) == FileAttributes.Directory) + { + isDirectory = true; + } + } + catch (Exception) + { + // If there's an error here, there aren't any valid entries to scan at the given path; + // the path is either invalid or nonexistant. In this case, throw the resulting exception. + // + // TODO: Logging for invalid path exceptions + throw; + } + + // If we're given a directory, parse its children recursively + if (isDirectory) + { + // Create a queue of folders to enumerate files from, starting with provided path + Queue folders = new(); + folders.Enqueue(path); + + while (folders.Count > 0) + { + // Grab a folder from the queue + string dir = folders.Dequeue(); + + // Try to enumerate and queue all subdirectories of the current folder + try + { + foreach (string subdir in Directory.EnumerateDirectories(dir)) + { + folders.Enqueue(subdir); + } + } + // If we lack permissions to enumerate, skip the folder and continue processing + // the rest of the queue + catch (Exception) + { + // TODO: Logging for missing permissions to enumerate folder + if (dir == path) + { + // If we can't even enumerate the path supplied by the user, throw + // the error + throw; + } + + // Otherwise, just log the failed subdirectory and continue to list as many + // files as accessible. Maybe let users decide whether to always throw here? + continue; + } + + // Add all files in the directory to be returned + foreach (string file in Directory.EnumerateFiles(dir)) + { + yield return file; + } + } + } + // Otherwise we can just return the original path + else + { + yield return path; + } + } + + /// + /// Enumerates files pointed to by several paths. + /// + /// Filesystem locations. + /// Enumerable list of absolute paths containing all relevant files the user has permission to access. + public static IEnumerable ScanLocations(string[] paths) + { + // Redirect all paths provided to ScanLocation(), and collect all results together + foreach (string path in paths) + { + foreach (string file in ScanLocation(path)) + { + yield return file; + } + } + } + } +} diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj b/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj index 5cdbea667d3c5..b4fe49459f04b 100644 --- a/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj +++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj @@ -1,20 +1,21 @@  - $(RequiredTargetFrameworks) Microsoft Azure.Storage.Common.DataMovement client library tests false - Azure.Storage.Tests - + + - + + + + + + PreserveNewest + - - - - diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs b/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs index 54e15f5be3a65..4fdcb1e79e57d 100644 --- a/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs +++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. + using NUnit.Framework; using Azure.Core; using Azure.Core.Pipeline; @@ -21,7 +22,7 @@ namespace Azure.Storage.Tests public abstract class DataMovementTestBase : CommonTestBase { public DataMovementTestBase(bool async, BlobClientOptions.ServiceVersion serviceVersion, RecordedTestMode? mode = null) - : base(async, serviceVersion, null /* RecordedTestMode.Record /* to re-record */) + : base(async, serviceVersion, mode /* RecordedTestMode.Record /* to re-record */) { } diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs b/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs new file mode 100644 index 0000000000000..4d66be1251946 --- /dev/null +++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.IO; +using System.Threading.Tasks; +using Azure.Storage.Common.DataMovement; +using NUnit.Framework; +using System.Reflection; +using System.Security.Principal; +using System.Security.AccessControl; +using System.Runtime.InteropServices; +#if !NETFRAMEWORK +using Mono.Unix.Native; +#endif + +namespace Azure.Storage.Tests +{ + public class FilesystemScannerTests + { + private readonly string _temp = Path.GetTempPath(); + private readonly FileSystemAccessRule _winAcl; + + public FilesystemScannerTests() + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + string currentUser = WindowsIdentity.GetCurrent().Name; + _winAcl = new FileSystemAccessRule(currentUser, FileSystemRights.ReadData, AccessControlType.Deny); + } + } + + [Test] + public void ScanFolderContainingMixedPermissions() + { + // Arrange + string folder = CreateRandomDirectory(_temp); + string openChild = CreateRandomFile(folder); + string lockedChild = CreateRandomFile(folder); + + string openSubfolder = CreateRandomDirectory(folder); + string openSubchild = CreateRandomFile(openSubfolder); + + string lockedSubfolder = CreateRandomDirectory(folder); + string lockedSubchild = CreateRandomFile(lockedSubfolder); + + AllowReadData(lockedChild, false, false); + AllowReadData(lockedSubfolder, true, false); + + // Act + IEnumerable result = FilesystemScanner.ScanLocation(folder); + + // Assert + Assert.Multiple(() => + { + CollectionAssert.Contains(result, openChild); + CollectionAssert.Contains(result, openSubchild); + CollectionAssert.Contains(result, lockedChild); // No permissions on file, but that should be dealt with by caller + CollectionAssert.DoesNotContain(result, lockedSubchild); // No permissions to enumerate folder, children not returned + }); + + // Cleanup + AllowReadData(lockedChild, false, true); + AllowReadData(lockedSubfolder, true, true); + + Directory.Delete(folder, true); + } + + [Test] + public void ScanFolderWithoutReadPermissions() + { + // Arrange + string folder = CreateRandomDirectory(_temp); + string child = CreateRandomFile(folder); + + AllowReadData(folder, true, false); + + // Act + IEnumerable result = FilesystemScanner.ScanLocation(folder); + + // Assert + Assert.Throws(() => { + result.GetEnumerator().MoveNext(); + }); + + // Cleanup + AllowReadData(folder, true, true); + + Directory.Delete(folder, true); + } + + [Test] + public void ScanSingleFilePath() + { + // Arrange + string file = CreateRandomFile(_temp); + + // Act + IEnumerable result = FilesystemScanner.ScanLocation(file); + + // Assert + CollectionAssert.IsNotEmpty(result); + + // Cleanup + File.Delete(file); + } + + [Test] + public void ScanNonexistantItem() + { + // Arrange + string file = Path.Combine(_temp, Path.GetRandomFileName()); + + // Act + IEnumerable result = FilesystemScanner.ScanLocation(file); + + // Assert + Assert.IsFalse(File.Exists(file)); + Assert.Throws(() => { + result.GetEnumerator().MoveNext(); + }); + } + + [Test] + public void ScanMultiplePaths() + { + // Arrange + string folder = CreateRandomDirectory(_temp); + string openChild = CreateRandomFile(folder); + string lockedChild = CreateRandomFile(folder); + + string openSubfolder = CreateRandomDirectory(folder); + string openSubchild = CreateRandomFile(openSubfolder); + + string lockedSubfolder = CreateRandomDirectory(folder); + string lockedSubchild = CreateRandomFile(lockedSubfolder); + + AllowReadData(lockedChild, false, false); + AllowReadData(lockedSubfolder, true, false); + + string file = CreateRandomFile(_temp); + string[] paths = { folder, file }; + + // Act + IEnumerable result = FilesystemScanner.ScanLocations(paths); + + // Assert + Assert.Multiple(() => + { + CollectionAssert.Contains(result, openChild); + CollectionAssert.Contains(result, openSubchild); + CollectionAssert.Contains(result, lockedChild); // No permissions on file, but that should be dealt with by caller + CollectionAssert.DoesNotContain(result, lockedSubchild); // No permissions to enumerate folder, children not returned + CollectionAssert.Contains(result, file); + }); + + // Cleanup + AllowReadData(lockedChild, false, true); + AllowReadData(lockedSubfolder, true, true); + + File.Delete(file); + Directory.Delete(folder, true); + } + + private static string CreateRandomDirectory(string parentPath) + { + return Directory.CreateDirectory(Path.Combine(parentPath, Path.GetRandomFileName())).FullName; + } + + private static string CreateRandomFile(string parentPath) + { + using (FileStream fs = File.Create(Path.Combine(parentPath, Path.GetRandomFileName()))) + { + return fs.Name; + } + } + + private void AllowReadData(string path, bool isDirectory, bool allowRead) + { + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + // Dynamically will be set to correct type supplied by user + dynamic fsInfo = isDirectory ? new DirectoryInfo(path) : new FileInfo(path); + dynamic fsSec = FileSystemAclExtensions.GetAccessControl(fsInfo); + + fsSec.ModifyAccessRule(allowRead ? AccessControlModification.Remove : AccessControlModification.Add, _winAcl, out bool result); + + FileSystemAclExtensions.SetAccessControl(fsInfo, fsSec); + } +#if !NETFRAMEWORK + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + FilePermissions permissions = (allowRead ? + (FilePermissions.S_IRWXU | FilePermissions.S_IRWXG | FilePermissions.S_IRWXO) : + (FilePermissions.S_IWUSR | FilePermissions.S_IWGRP | FilePermissions.S_IWOTH)); + + Syscall.chmod(path, permissions); + } +#endif + } + } +}