Skip to content

Commit

Permalink
Minor File.ReadAllBytes* improvements (#61519)
Browse files Browse the repository at this point in the history
* switch from FileStream to RandomAccess

* use Array.MaxLength as a limit for File.ReadAllBytes and fix an edge case bug for files: Array.MaxLength < Length < int.MaxValue

* there is no gain of using FileOptions.SequentialScan on Unix, as it requires an additional sys call

Co-authored-by: Dan Moseley <[email protected]>
  • Loading branch information
adamsitnik and danmoseley authored Nov 17, 2021
1 parent 10e107d commit d1b3816
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 41 deletions.
15 changes: 15 additions & 0 deletions src/libraries/System.IO.FileSystem/tests/File/ReadWriteAllBytes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ public void ReadFileOver2GB()
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
}

[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
public void ReadFileOverMaxArrayLength()
{
string path = GetTestFilePath();
using (FileStream fs = File.Create(path))
{
fs.SetLength(Array.MaxLength + 1L);
}

// File is too large for ReadAllBytes at once
Assert.Throws<IOException>(() => File.ReadAllBytes(path));
}

[Fact]
public void Overwrite()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,31 @@ public Task AlreadyCanceledAsync()
[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
public Task ReadFileOver2GBAsync()
public async Task ReadFileOver2GBAsync()
{
string path = GetTestFilePath();
using (FileStream fs = File.Create(path))
{
fs.SetLength(int.MaxValue + 1L);
}

// File is too large for ReadAllBytes at once
return Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
// File is too large for ReadAllBytesAsync at once
await Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
}

[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/45954", TestPlatforms.Browser)]
public async Task ReadFileOverMaxArrayLengthAsync()
{
string path = GetTestFilePath();
using (FileStream fs = File.Create(path))
{
fs.SetLength(Array.MaxLength + 1L);
}

// File is too large for ReadAllBytesAsync at once
await Assert.ThrowsAsync<IOException>(async () => await File.ReadAllBytesAsync(path));
}

[Fact]
Expand Down
73 changes: 35 additions & 38 deletions src/libraries/System.Private.CoreLib/src/System/IO/File.cs
Original file line number Diff line number Diff line change
Expand Up @@ -251,27 +251,33 @@ public static void WriteAllText(string path, string? contents, Encoding encoding

public static byte[] ReadAllBytes(string path)
{
// bufferSize == 1 used to avoid unnecessary buffer in FileStream
using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, FileOptions.SequentialScan))
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None;
using (SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options))
{
long fileLength = 0;
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
{
throw new IOException(SR.IO_FileTooLong2GB);
}

#if DEBUG
fileLength = 0; // improve the test coverage for ReadAllBytesUnknownLength
#endif

if (fileLength == 0)
{
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there is non-seekable file stream.
// Some file systems (e.g. procfs on Linux) return 0 for length even when there's content; also there are non-seekable files.
// Thus we need to assume 0 doesn't mean empty.
return ReadAllBytesUnknownLength(fs);
return ReadAllBytesUnknownLength(sfh);
}

int index = 0;
int count = (int)fileLength;
byte[] bytes = new byte[count];
while (count > 0)
{
int n = fs.Read(bytes, index, count);
int n = RandomAccess.ReadAtOffset(sfh, bytes.AsSpan(index, count), index);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
Expand Down Expand Up @@ -519,44 +525,35 @@ private static async Task<string> InternalReadAllTextAsync(string path, Encoding
return Task.FromCanceled<byte[]>(cancellationToken);
}

var fs = new FileStream(
path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, // bufferSize == 1 used to avoid unnecessary buffer in FileStream
FileOptions.Asynchronous | FileOptions.SequentialScan);
// SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes.
FileOptions options = FileOptions.Asynchronous | (OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None);
SafeFileHandle sfh = OpenHandle(path, FileMode.Open, FileAccess.Read, FileShare.Read, options);

bool returningInternalTask = false;
try
long fileLength = 0L;
if (sfh.CanSeek && (fileLength = RandomAccess.GetFileLength(sfh)) > Array.MaxLength)
{
long fileLength = 0L;
if (fs.CanSeek && (fileLength = fs.Length) > int.MaxValue)
{
var e = new IOException(SR.IO_FileTooLong2GB);
ExceptionDispatchInfo.SetCurrentStackTrace(e);
return Task.FromException<byte[]>(e);
}

returningInternalTask = true;
return fileLength > 0 ?
InternalReadAllBytesAsync(fs, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(fs, cancellationToken);
}
finally
{
if (!returningInternalTask)
{
fs.Dispose();
}
sfh.Dispose();
return Task.FromException<byte[]>(ExceptionDispatchInfo.SetCurrentStackTrace(new IOException(SR.IO_FileTooLong2GB)));
}

#if DEBUG
fileLength = 0; // improve the test coverage for InternalReadAllBytesUnknownLengthAsync
#endif

return fileLength > 0 ?
InternalReadAllBytesAsync(sfh, (int)fileLength, cancellationToken) :
InternalReadAllBytesUnknownLengthAsync(sfh, cancellationToken);
}

private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int count, CancellationToken cancellationToken)
private static async Task<byte[]> InternalReadAllBytesAsync(SafeFileHandle sfh, int count, CancellationToken cancellationToken)
{
using (fs)
using (sfh)
{
int index = 0;
byte[] bytes = new byte[count];
do
{
int n = await fs.ReadAsync(new Memory<byte>(bytes, index, count - index), cancellationToken).ConfigureAwait(false);
int n = await RandomAccess.ReadAtOffsetAsync(sfh, bytes.AsMemory(index), index, cancellationToken).ConfigureAwait(false);
if (n == 0)
{
ThrowHelper.ThrowEndOfFileException();
Expand All @@ -569,7 +566,7 @@ private static async Task<byte[]> InternalReadAllBytesAsync(FileStream fs, int c
}
}

private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStream fs, CancellationToken cancellationToken)
private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(SafeFileHandle sfh, CancellationToken cancellationToken)
{
byte[] rentedArray = ArrayPool<byte>.Shared.Rent(512);
try
Expand All @@ -595,7 +592,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
}

Debug.Assert(bytesRead < rentedArray.Length);
int n = await fs.ReadAsync(rentedArray.AsMemory(bytesRead), cancellationToken).ConfigureAwait(false);
int n = await RandomAccess.ReadAtOffsetAsync(sfh, rentedArray.AsMemory(bytesRead), bytesRead, cancellationToken).ConfigureAwait(false);
if (n == 0)
{
return rentedArray.AsSpan(0, bytesRead).ToArray();
Expand All @@ -605,7 +602,7 @@ private static async Task<byte[]> InternalReadAllBytesUnknownLengthAsync(FileStr
}
finally
{
fs.Dispose();
sfh.Dispose();
ArrayPool<byte>.Shared.Return(rentedArray);
}
}
Expand Down Expand Up @@ -775,7 +772,7 @@ private static void Validate(string path, Encoding encoding)
throw new ArgumentException(SR.Argument_EmptyPath, nameof(path));
}

private static byte[] ReadAllBytesUnknownLength(FileStream fs)
private static byte[] ReadAllBytesUnknownLength(SafeFileHandle sfh)
{
byte[]? rentedArray = null;
Span<byte> buffer = stackalloc byte[512];
Expand Down Expand Up @@ -803,7 +800,7 @@ private static byte[] ReadAllBytesUnknownLength(FileStream fs)
}

Debug.Assert(bytesRead < buffer.Length);
int n = fs.Read(buffer.Slice(bytesRead));
int n = RandomAccess.ReadAtOffset(sfh, buffer.Slice(bytesRead), bytesRead);
if (n == 0)
{
return buffer.Slice(0, bytesRead).ToArray();
Expand Down

0 comments on commit d1b3816

Please sign in to comment.