From 7a61e69b2eef40a97a7b8d8e62698490e90481c9 Mon Sep 17 00:00:00 2001
From: Rushi Patel <84736613+rnpmsft@users.noreply.github.com>
Date: Tue, 8 Jun 2021 14:30:44 -0400
Subject: [PATCH] Initial creation of filesystem scanner for DMLib (#21492)
---
.../CHANGELOG.md | 2 +-
.../README.md | 4 +
.../Azure.Storage.Blobs.DataMovement.csproj | 2 +-
...re.Storage.Blobs.DataMovement.Tests.csproj | 17 +-
.../CHANGELOG.md | 2 +-
.../README.md | 4 +
.../Azure.Storage.Common.DataMovement.csproj | 2 +-
.../src/FilesystemScanner.cs | 114 ++++++++++
...e.Storage.Common.DataMovement.Tests.csproj | 17 +-
.../tests/DataMovementTestBase.cs | 3 +-
.../tests/FilesystemScannerTests.cs | 205 ++++++++++++++++++
11 files changed, 350 insertions(+), 22 deletions(-)
create mode 100644 sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs
create mode 100644 sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs
diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md b/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md
index ca4624cb29e41..08895a1208a3d 100644
--- a/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md
+++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/CHANGELOG.md
@@ -1,6 +1,6 @@
# Release History
-## 3.0.0-preview.1 (Unreleased)
+## 12.0.0-preview.1 (Unreleased)
This preview is the first release of a ground-up rewrite of our client
libraries to ensure consistency, idiomatic design, productivity, and an
excellent developer experience. It was created following the Azure SDK Design
diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md b/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md
index 135b1537e382b..127ff79ef5080 100644
--- a/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md
+++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/README.md
@@ -32,6 +32,10 @@ Here's an example using the Azure CLI:
az storage account create --name MyStorageAccount --resource-group MyResourceGroup --location westus --sku Standard_LRS
```
+### Authenticate the Client
+
+Authentication works the same as in [Azure.Storage.Blobs][authenticating_with_blobs].
+
## Key concepts
Blob storage is designed for:
diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj b/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj
index eb3b833ad3fd6..321dbc85456eb 100644
--- a/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj
+++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/src/Azure.Storage.Blobs.DataMovement.csproj
@@ -4,7 +4,7 @@
Microsoft Azure.Storage.DataMovement.Blobs client library
- 0.1.0-beta.1
+ 12.0.0-preview.1
BlobSDK;$(DefineConstants)
Microsoft Azure Storage Blobs DataMovement;Microsoft;Azure;Blobs;Blob;Storage;DataMovement;StorageScalable;$(PackageCommonTags)
diff --git a/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj b/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj
index d648ff83d5920..4cbf5807cae68 100644
--- a/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj
+++ b/sdk/storage/Azure.Storage.Blobs.DataMovement/tests/Azure.Storage.Blobs.DataMovement.Tests.csproj
@@ -1,20 +1,19 @@
-
$(RequiredTargetFrameworks)
Microsoft Azure.Storage.Common.DataMovement client library tests
false
- Azure.Storage.Tests
-
-
+
-
-
-
-
-
+
+
+
+ PreserveNewest
+
+
+
\ No newline at end of file
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md b/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md
index ca4624cb29e41..08895a1208a3d 100644
--- a/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/CHANGELOG.md
@@ -1,6 +1,6 @@
# Release History
-## 3.0.0-preview.1 (Unreleased)
+## 12.0.0-preview.1 (Unreleased)
This preview is the first release of a ground-up rewrite of our client
libraries to ensure consistency, idiomatic design, productivity, and an
excellent developer experience. It was created following the Azure SDK Design
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/README.md b/sdk/storage/Azure.Storage.Common.DataMovement/README.md
index ce678ceec9985..8b53fc9837b51 100644
--- a/sdk/storage/Azure.Storage.Common.DataMovement/README.md
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/README.md
@@ -38,6 +38,10 @@ Here's an example using the Azure CLI:
az storage account create --name MyStorageAccount --resource-group MyResourceGroup --location westus --sku Standard_LRS
```
+### Authenticate the Client
+
+Authentication details to be written.
+
## Key concepts
The Azure Storage Common client library contains shared infrastructure like
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj b/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj
index 0050f2c455b84..7e2635ffe3aa8 100644
--- a/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/src/Azure.Storage.Common.DataMovement.csproj
@@ -4,7 +4,7 @@
Microsoft Azure.Storage.Common.DataMovement client library
- 0.1.0-beta.1
+ 12.0.0-preview.1
CommonSDK;$(DefineConstants)
Microsoft Azure Storage Common DataMovement, Microsoft, Azure, StorageScalable, azureofficial
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs b/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs
new file mode 100644
index 0000000000000..7364c49213716
--- /dev/null
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/src/FilesystemScanner.cs
@@ -0,0 +1,114 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Azure.Storage.Common.DataMovement
+{
+ ///
+ /// FilesystemScanner class.
+ ///
+ public static class FilesystemScanner
+ {
+ ///
+ /// Enumerates all files pointed to by the provided path, including those in subdirectories (if path is a directory).
+ ///
+ /// Filesystem location.
+ /// Enumerable list of absolute paths containing all relevant files the user has permission to access.
+ public static IEnumerable ScanLocation(string path)
+ {
+ // Path type is ambiguous at start
+ bool isDirectory = false;
+
+ try
+ {
+ // Make sure we're dealing with absolute, well-formatted path
+ path = Path.GetFullPath(path);
+
+ // Check if path points to a directory
+ if ((File.GetAttributes(path) & FileAttributes.Directory) == FileAttributes.Directory)
+ {
+ isDirectory = true;
+ }
+ }
+ catch (Exception)
+ {
+ // If there's an error here, there aren't any valid entries to scan at the given path;
+ // the path is either invalid or nonexistant. In this case, throw the resulting exception.
+ //
+ // TODO: Logging for invalid path exceptions
+ throw;
+ }
+
+ // If we're given a directory, parse its children recursively
+ if (isDirectory)
+ {
+ // Create a queue of folders to enumerate files from, starting with provided path
+ Queue folders = new();
+ folders.Enqueue(path);
+
+ while (folders.Count > 0)
+ {
+ // Grab a folder from the queue
+ string dir = folders.Dequeue();
+
+ // Try to enumerate and queue all subdirectories of the current folder
+ try
+ {
+ foreach (string subdir in Directory.EnumerateDirectories(dir))
+ {
+ folders.Enqueue(subdir);
+ }
+ }
+ // If we lack permissions to enumerate, skip the folder and continue processing
+ // the rest of the queue
+ catch (Exception)
+ {
+ // TODO: Logging for missing permissions to enumerate folder
+ if (dir == path)
+ {
+ // If we can't even enumerate the path supplied by the user, throw
+ // the error
+ throw;
+ }
+
+ // Otherwise, just log the failed subdirectory and continue to list as many
+ // files as accessible. Maybe let users decide whether to always throw here?
+ continue;
+ }
+
+ // Add all files in the directory to be returned
+ foreach (string file in Directory.EnumerateFiles(dir))
+ {
+ yield return file;
+ }
+ }
+ }
+ // Otherwise we can just return the original path
+ else
+ {
+ yield return path;
+ }
+ }
+
+ ///
+ /// Enumerates files pointed to by several paths.
+ ///
+ /// Filesystem locations.
+ /// Enumerable list of absolute paths containing all relevant files the user has permission to access.
+ public static IEnumerable ScanLocations(string[] paths)
+ {
+ // Redirect all paths provided to ScanLocation(), and collect all results together
+ foreach (string path in paths)
+ {
+ foreach (string file in ScanLocation(path))
+ {
+ yield return file;
+ }
+ }
+ }
+ }
+}
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj b/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj
index 5cdbea667d3c5..b4fe49459f04b 100644
--- a/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/Azure.Storage.Common.DataMovement.Tests.csproj
@@ -1,20 +1,21 @@
-
$(RequiredTargetFrameworks)
Microsoft Azure.Storage.Common.DataMovement client library tests
false
- Azure.Storage.Tests
-
+
+
-
+
+
+
+
+
+ PreserveNewest
+
-
-
-
-
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs b/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs
index 54e15f5be3a65..4fdcb1e79e57d 100644
--- a/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/DataMovementTestBase.cs
@@ -1,5 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
+
using NUnit.Framework;
using Azure.Core;
using Azure.Core.Pipeline;
@@ -21,7 +22,7 @@ namespace Azure.Storage.Tests
public abstract class DataMovementTestBase : CommonTestBase
{
public DataMovementTestBase(bool async, BlobClientOptions.ServiceVersion serviceVersion, RecordedTestMode? mode = null)
- : base(async, serviceVersion, null /* RecordedTestMode.Record /* to re-record */)
+ : base(async, serviceVersion, mode /* RecordedTestMode.Record /* to re-record */)
{
}
diff --git a/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs b/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs
new file mode 100644
index 0000000000000..4d66be1251946
--- /dev/null
+++ b/sdk/storage/Azure.Storage.Common.DataMovement/tests/FilesystemScannerTests.cs
@@ -0,0 +1,205 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.IO;
+using System.Threading.Tasks;
+using Azure.Storage.Common.DataMovement;
+using NUnit.Framework;
+using System.Reflection;
+using System.Security.Principal;
+using System.Security.AccessControl;
+using System.Runtime.InteropServices;
+#if !NETFRAMEWORK
+using Mono.Unix.Native;
+#endif
+
+namespace Azure.Storage.Tests
+{
+ public class FilesystemScannerTests
+ {
+ private readonly string _temp = Path.GetTempPath();
+ private readonly FileSystemAccessRule _winAcl;
+
+ public FilesystemScannerTests()
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ string currentUser = WindowsIdentity.GetCurrent().Name;
+ _winAcl = new FileSystemAccessRule(currentUser, FileSystemRights.ReadData, AccessControlType.Deny);
+ }
+ }
+
+ [Test]
+ public void ScanFolderContainingMixedPermissions()
+ {
+ // Arrange
+ string folder = CreateRandomDirectory(_temp);
+ string openChild = CreateRandomFile(folder);
+ string lockedChild = CreateRandomFile(folder);
+
+ string openSubfolder = CreateRandomDirectory(folder);
+ string openSubchild = CreateRandomFile(openSubfolder);
+
+ string lockedSubfolder = CreateRandomDirectory(folder);
+ string lockedSubchild = CreateRandomFile(lockedSubfolder);
+
+ AllowReadData(lockedChild, false, false);
+ AllowReadData(lockedSubfolder, true, false);
+
+ // Act
+ IEnumerable result = FilesystemScanner.ScanLocation(folder);
+
+ // Assert
+ Assert.Multiple(() =>
+ {
+ CollectionAssert.Contains(result, openChild);
+ CollectionAssert.Contains(result, openSubchild);
+ CollectionAssert.Contains(result, lockedChild); // No permissions on file, but that should be dealt with by caller
+ CollectionAssert.DoesNotContain(result, lockedSubchild); // No permissions to enumerate folder, children not returned
+ });
+
+ // Cleanup
+ AllowReadData(lockedChild, false, true);
+ AllowReadData(lockedSubfolder, true, true);
+
+ Directory.Delete(folder, true);
+ }
+
+ [Test]
+ public void ScanFolderWithoutReadPermissions()
+ {
+ // Arrange
+ string folder = CreateRandomDirectory(_temp);
+ string child = CreateRandomFile(folder);
+
+ AllowReadData(folder, true, false);
+
+ // Act
+ IEnumerable result = FilesystemScanner.ScanLocation(folder);
+
+ // Assert
+ Assert.Throws(() => {
+ result.GetEnumerator().MoveNext();
+ });
+
+ // Cleanup
+ AllowReadData(folder, true, true);
+
+ Directory.Delete(folder, true);
+ }
+
+ [Test]
+ public void ScanSingleFilePath()
+ {
+ // Arrange
+ string file = CreateRandomFile(_temp);
+
+ // Act
+ IEnumerable result = FilesystemScanner.ScanLocation(file);
+
+ // Assert
+ CollectionAssert.IsNotEmpty(result);
+
+ // Cleanup
+ File.Delete(file);
+ }
+
+ [Test]
+ public void ScanNonexistantItem()
+ {
+ // Arrange
+ string file = Path.Combine(_temp, Path.GetRandomFileName());
+
+ // Act
+ IEnumerable result = FilesystemScanner.ScanLocation(file);
+
+ // Assert
+ Assert.IsFalse(File.Exists(file));
+ Assert.Throws(() => {
+ result.GetEnumerator().MoveNext();
+ });
+ }
+
+ [Test]
+ public void ScanMultiplePaths()
+ {
+ // Arrange
+ string folder = CreateRandomDirectory(_temp);
+ string openChild = CreateRandomFile(folder);
+ string lockedChild = CreateRandomFile(folder);
+
+ string openSubfolder = CreateRandomDirectory(folder);
+ string openSubchild = CreateRandomFile(openSubfolder);
+
+ string lockedSubfolder = CreateRandomDirectory(folder);
+ string lockedSubchild = CreateRandomFile(lockedSubfolder);
+
+ AllowReadData(lockedChild, false, false);
+ AllowReadData(lockedSubfolder, true, false);
+
+ string file = CreateRandomFile(_temp);
+ string[] paths = { folder, file };
+
+ // Act
+ IEnumerable result = FilesystemScanner.ScanLocations(paths);
+
+ // Assert
+ Assert.Multiple(() =>
+ {
+ CollectionAssert.Contains(result, openChild);
+ CollectionAssert.Contains(result, openSubchild);
+ CollectionAssert.Contains(result, lockedChild); // No permissions on file, but that should be dealt with by caller
+ CollectionAssert.DoesNotContain(result, lockedSubchild); // No permissions to enumerate folder, children not returned
+ CollectionAssert.Contains(result, file);
+ });
+
+ // Cleanup
+ AllowReadData(lockedChild, false, true);
+ AllowReadData(lockedSubfolder, true, true);
+
+ File.Delete(file);
+ Directory.Delete(folder, true);
+ }
+
+ private static string CreateRandomDirectory(string parentPath)
+ {
+ return Directory.CreateDirectory(Path.Combine(parentPath, Path.GetRandomFileName())).FullName;
+ }
+
+ private static string CreateRandomFile(string parentPath)
+ {
+ using (FileStream fs = File.Create(Path.Combine(parentPath, Path.GetRandomFileName())))
+ {
+ return fs.Name;
+ }
+ }
+
+ private void AllowReadData(string path, bool isDirectory, bool allowRead)
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ // Dynamically will be set to correct type supplied by user
+ dynamic fsInfo = isDirectory ? new DirectoryInfo(path) : new FileInfo(path);
+ dynamic fsSec = FileSystemAclExtensions.GetAccessControl(fsInfo);
+
+ fsSec.ModifyAccessRule(allowRead ? AccessControlModification.Remove : AccessControlModification.Add, _winAcl, out bool result);
+
+ FileSystemAclExtensions.SetAccessControl(fsInfo, fsSec);
+ }
+#if !NETFRAMEWORK
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux) || RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ FilePermissions permissions = (allowRead ?
+ (FilePermissions.S_IRWXU | FilePermissions.S_IRWXG | FilePermissions.S_IRWXO) :
+ (FilePermissions.S_IWUSR | FilePermissions.S_IWGRP | FilePermissions.S_IWOTH));
+
+ Syscall.chmod(path, permissions);
+ }
+#endif
+ }
+ }
+}