Skip to content

Commit

Permalink
[storage][blob & datalake] Jumbo blob (#9480)
Browse files Browse the repository at this point in the history
* Jumbo putBlock, putBlob, datalake file append

* undo upload ArrayBuffer

* minor edit

Co-authored-by: Lin Jian <[email protected]>
  • Loading branch information
ljian3377 and Lin Jian authored Jun 12, 2020
1 parent 5deb5f5 commit e563ca4
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 11 deletions.
1 change: 1 addition & 0 deletions sdk/storage/storage-blob/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 12.2.0-preview.1 (2020.06)

- Supported quick query. Added a new API `BlockBlobClient.query()`.
- Increased the maximum block size for Block Blob from 100MiB to 4000MiB(~4GB). And thereby supporting ~200TB maximum size for Block Blob.
- Added support for blob versioning.

## 12.1.2 (2020.05)
Expand Down
2 changes: 1 addition & 1 deletion sdk/storage/storage-blob/src/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export const SDK_VERSION: string = "12.2.0-preview";
export const SERVICE_VERSION: string = "2019-12-12";

export const BLOCK_BLOB_MAX_UPLOAD_BLOB_BYTES: number = 256 * 1024 * 1024; // 256MB
export const BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES: number = 100 * 1024 * 1024; // 100MB
export const BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES: number = 4000 * 1024 * 1024; // 4000MB
export const BLOCK_BLOB_MAX_BLOCKS: number = 50000;
export const DEFAULT_BLOCK_BUFFER_SIZE_BYTES: number = 8 * 1024 * 1024; // 8MB
export const DEFAULT_BLOB_DOWNLOAD_BLOCK_BYTES: number = 4 * 1024 * 1024; // 4MB
Expand Down
34 changes: 33 additions & 1 deletion sdk/storage/storage-blob/test/node/highlevel.node.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { RetriableReadableStreamOptions } from "../../src/utils/RetriableReadabl
import { record, Recorder } from "@azure/test-utils-recorder";
import { ContainerClient, BlobClient, BlockBlobClient, BlobServiceClient } from "../../src";
import { readStreamToLocalFileWithLogs } from "../utils/testutils.node";
import { BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES } from "../../src/utils/constants";

// tslint:disable:no-empty
describe("Highlevel", () => {
Expand Down Expand Up @@ -38,7 +39,7 @@ describe("Highlevel", () => {
blockBlobClient = blobClient.getBlockBlobClient();
});

afterEach(async function() {
afterEach(async function () {
if (!this.currentTest?.isPending()) {
await containerClient.delete();
recorder.stop();
Expand All @@ -64,6 +65,22 @@ describe("Highlevel", () => {
recorder.stop();
});

it("put blob with maximum size", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");
const MB = 1024 * 1024
const maxPutBlobSizeLimitInMB = 5000;
const tempFile = await createRandomLocalFile(tempFolderPath, maxPutBlobSizeLimitInMB, MB);
const inputStream = fs.createReadStream(tempFile);

try {
await blockBlobClient.upload(() => inputStream, maxPutBlobSizeLimitInMB * MB, {
abortSignal: AbortController.timeout(20 * 1000) // takes too long to upload the file
});
} catch (err) {
assert.equal(err.name, 'AbortError');
}
}).timeout(timeoutForLargeFileUploadingTest);

it("uploadFile should success when blob >= BLOCK_BLOB_MAX_UPLOAD_BLOB_BYTES", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");
await blockBlobClient.uploadFile(tempFileLarge, {
Expand Down Expand Up @@ -193,6 +210,21 @@ describe("Highlevel", () => {
assert.ok(eventTriggered);
});

it("uploadFile should succeed with blockSize = BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");
const tempFile = await createRandomLocalFile(tempFolderPath, BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES / (1024 * 1024) + 1, 1024 * 1024);
try {
await blockBlobClient.uploadFile(tempFile, {
blockSize: BLOCK_BLOB_MAX_STAGE_BLOCK_BYTES,
abortSignal: AbortController.timeout(20 * 1000) // takes too long to upload the file
});
} catch (err) {
assert.equal(err.name, 'AbortError');
}

fs.unlinkSync(tempFile);
}).timeout(timeoutForLargeFileUploadingTest);

it("uploadStream should success", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");
const rs = fs.createReadStream(tempFileLarge);
Expand Down
5 changes: 5 additions & 0 deletions sdk/storage/storage-file-datalake/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Release History

## 12.1.0-preview.1 (2020.06)

- Increased the maximum block size for file from 100MiB to 4000MiB(~4GB). And thereby supporting ~200TB maximum size for file.
- Added more mappings for Blob and DFS endpoints. [issue #8744](https://github.com/Azure/azure-sdk-for-js/issues/8744).

## 12.0.1 (2020.05)

- Fix data corruption failure error [issue #6411](https://github.com/Azure/azure-sdk-for-js/issues/6411) when downloading compressed files. [PR #7993](https://github.com/Azure/azure-sdk-for-js/pull/7993)
Expand Down
2 changes: 1 addition & 1 deletion sdk/storage/storage-file-datalake/src/clients.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ export class DataLakeFileClient extends DataLakePathClient {
if (numBlocks > BLOCK_BLOB_MAX_BLOCKS) {
throw new RangeError(
`The data's size is too big or the chunkSize is too small;` +
`the number of chunks must be <= ${BLOCK_BLOB_MAX_BLOCKS}`
`the number of chunks must be <= ${BLOCK_BLOB_MAX_BLOCKS}`
);
}

Expand Down
22 changes: 19 additions & 3 deletions sdk/storage/storage-file-datalake/src/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const TB: number = GB * 1024;

export const DEFAULT_HIGH_LEVEL_CONCURRENCY: number = 5;
export const FILE_MAX_SINGLE_UPLOAD_THRESHOLD: number = 100 * MB;
export const FILE_UPLOAD_MAX_CHUNK_SIZE: number = 100 * MB;
export const FILE_UPLOAD_MAX_CHUNK_SIZE: number = 4000 * MB;
export const FILE_UPLOAD_DEFAULT_CHUNK_SIZE: number = 8 * MB;
export const BLOCK_BLOB_MAX_BLOCKS: number = 50000;
export const FILE_MAX_SIZE_BYTES: number = BLOCK_BLOB_MAX_BLOCKS * FILE_UPLOAD_MAX_CHUNK_SIZE;
Expand Down Expand Up @@ -196,8 +196,24 @@ export const DevelopmentConnectionString = `DefaultEndpointsProtocol=http;Accoun

// Mapping pairs to transform url from dfs endpoint to blob endpoint
// Customize this value to add more mapping patterns
export const ToBlobEndpointHostMappings = [["dfs.core.windows.net", "blob.core.windows.net"]];
export const ToBlobEndpointHostMappings = [
["dfs.preprod.core.windows.net", "blob.preprod.core.windows.net"],
["dfs.core.windows.net", "blob.core.windows.net"],
["dfs.core.chinacloudapi.cn", "blob.core.chinacloudapi.cn"],
["dfs.core.usgovcloudapi.net", "blob.core.usgovcloudapi.net"],
["dfs.core.cloudapi.de", "blob.core.cloudapi.de"],
["dfs.core.microsoft.scloud", "blob.core.microsoft.scloud"],
["dfs.core.eaglex.ic.gov", "blob.core.eaglex.ic.gov"]
];

// Mapping pairs to transform url from blob endpoint to dfs endpoint
// Customize this value to add more mapping patterns
export const ToDfsEndpointHostMappings = [["blob.core.windows.net", "dfs.core.windows.net"]];
export const ToDfsEndpointHostMappings = [
["blob.preprod.core.windows.net", "dfs.preprod.core.windows.net"],
["blob.core.windows.net", "dfs.core.windows.net"],
["blob.core.chinacloudapi.cn", "dfs.core.chinacloudapi.cn"],
["blob.core.usgovcloudapi.net", "dfs.core.usgovcloudapi.net"],
["blob.core.cloudapi.de", "dfs.core.cloudapi.de"],
["blob.core.microsoft.scloud", "dfs.core.microsoft.scloud"],
["blob.core.eaglex.ic.gov", "dfs.core.eaglex.ic.gov"]
];
41 changes: 36 additions & 5 deletions sdk/storage/storage-file-datalake/test/node/highlevel.node.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ import {
MB,
GB,
FILE_MAX_SINGLE_UPLOAD_THRESHOLD,
BLOCK_BLOB_MAX_BLOCKS
BLOCK_BLOB_MAX_BLOCKS,
FILE_UPLOAD_MAX_CHUNK_SIZE
} from "../../src/utils/constants";
import { readStreamToLocalFileWithLogs } from "../../test/utils/testutils.node";
const { Readable } = require("stream");
Expand All @@ -35,7 +36,7 @@ describe("Highlevel Node.js only", () => {

let recorder: any;

beforeEach(async function() {
beforeEach(async function () {
recorder = record(this, recorderEnvSetup);
const serviceClient = getDataLakeServiceClient();
fileSystemName = recorder.getUniqueName("filesystem");
Expand All @@ -45,14 +46,14 @@ describe("Highlevel Node.js only", () => {
fileClient = fileSystemClient.getFileClient(fileName);
});

afterEach(async function() {
afterEach(async function () {
if (!this.currentTest?.isPending()) {
await fileSystemClient.delete();
recorder.stop();
}
});

before(async function() {
before(async function () {
recorder = record(this, recorderEnvSetup);
if (!fs.existsSync(tempFolderPath)) {
fs.mkdirSync(tempFolderPath);
Expand All @@ -65,7 +66,7 @@ describe("Highlevel Node.js only", () => {
recorder.stop();
});

after(async function() {
after(async function () {
recorder = record(this, recorderEnvSetup);
fs.unlinkSync(tempFileLarge);
fs.unlinkSync(tempFileSmall);
Expand Down Expand Up @@ -484,6 +485,36 @@ describe("Highlevel Node.js only", () => {
fs.unlinkSync(tempFileEmpty);
});

it("uploadFile with chunkSize = FILE_UPLOAD_MAX_CHUNK_SIZE should succeed", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");
const fileSize = FILE_UPLOAD_MAX_CHUNK_SIZE * 2 + MB;
const tempFile = await createRandomLocalFile(tempFolderPath, fileSize / MB, MB);
try {
await fileClient.uploadFile(tempFile, {
chunkSize: FILE_UPLOAD_MAX_CHUNK_SIZE,
abortSignal: AbortController.timeout(20 * 1000) // takes too long to upload the file
});
} catch (err) {
assert.equal(err.name, 'AbortError');
}

fs.unlinkSync(tempFile);
}).timeout(timeoutForLargeFileUploadingTest);

// Skipped because it throw "invalid typed array length" error. Probably due to bugs underlying.
it.skip("upload with chunkSize = FILE_UPLOAD_MAX_CHUNK_SIZE should succeed", async () => {
const fileSize = FILE_UPLOAD_MAX_CHUNK_SIZE * 2 + MB;
const arrayBuf = new ArrayBuffer(fileSize);
try {
await fileClient.upload(arrayBuf, {
chunkSize: FILE_UPLOAD_MAX_CHUNK_SIZE,
abortSignal: AbortController.timeout(20 * 1000) // takes too long to upload the file
});
} catch (err) {
assert.equal(err.name, 'AbortError');
}
}).timeout(timeoutForLargeFileUploadingTest);

it("readToBuffer should work", async () => {
recorder.skip("node", "Temp file - recorder doesn't support saving the file");

Expand Down

0 comments on commit e563ca4

Please sign in to comment.