From cb78e697bdfe6e3bbd09cf5e5b5fd4b935cffeb4 Mon Sep 17 00:00:00 2001 From: Dan Aprahamian Date: Mon, 4 Nov 2019 13:19:02 -0500 Subject: [PATCH] fix(encryption): encryption uses smaller batch size Part of NODE-2255 --- lib/bulk/common.js | 14 +- lib/bulk/ordered.js | 13 +- lib/bulk/unordered.js | 13 +- .../client_side_encryption_prose_tests.js | 218 ++++++++++++++++++ 4 files changed, 246 insertions(+), 12 deletions(-) diff --git a/lib/bulk/common.js b/lib/bulk/common.js index f2e1a46b93..8e63ba6767 100644 --- a/lib/bulk/common.js +++ b/lib/bulk/common.js @@ -749,14 +749,19 @@ class BulkOperationBase { // Handle to the bson serializer, used to calculate running sizes const bson = topology.bson; - // Set max byte size const isMaster = topology.lastIsMaster(); - const maxBatchSizeBytes = + + // If we have autoEncryption on, batch-splitting must be done on 2mb chunks, but single documents + // over 2mb are still allowed + const autoEncrypter = topology.s.options && topology.s.options.autoEncrypter; + const maxBsonObjectSize = isMaster && isMaster.maxBsonObjectSize ? isMaster.maxBsonObjectSize : 1024 * 1024 * 16; + const maxBatchSizeBytes = autoEncrypter ? 1024 * 1024 * 2 : maxBsonObjectSize; const maxWriteBatchSize = isMaster && isMaster.maxWriteBatchSize ? isMaster.maxWriteBatchSize : 1000; + // Calculates the largest possible size of an Array key, represented as a BSON string // element. This calculation: // 1 byte for BSON type @@ -805,8 +810,9 @@ class BulkOperationBase { // Write concern writeConcern: writeConcern, // Max batch size options - maxBatchSizeBytes: maxBatchSizeBytes, - maxWriteBatchSize: maxWriteBatchSize, + maxBsonObjectSize, + maxBatchSizeBytes, + maxWriteBatchSize, maxKeySize, // Namespace namespace: namespace, diff --git a/lib/bulk/ordered.js b/lib/bulk/ordered.js index e2507b86b7..a976bed210 100644 --- a/lib/bulk/ordered.js +++ b/lib/bulk/ordered.js @@ -27,8 +27,8 @@ function addToOperationsList(bulkOperation, docType, document) { }); // Throw error if the doc is bigger than the max BSON size - if (bsonSize >= bulkOperation.s.maxBatchSizeBytes) - throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBatchSizeBytes); + if (bsonSize >= bulkOperation.s.maxBsonObjectSize) + throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBsonObjectSize); // Create a new batch object if we don't have a current one if (bulkOperation.s.currentBatch == null) @@ -38,9 +38,14 @@ function addToOperationsList(bulkOperation, docType, document) { // Check if we need to create a new batch if ( + // New batch if we exceed the max batch op size bulkOperation.s.currentBatchSize + 1 >= bulkOperation.s.maxWriteBatchSize || - bulkOperation.s.currentBatchSizeBytes + maxKeySize + bsonSize >= - bulkOperation.s.maxBatchSizeBytes || + // New batch if we exceed the maxBatchSizeBytes. Only matters if batch already has a doc, + // since we can't sent an empty batch + (bulkOperation.s.currentBatchSize > 0 && + bulkOperation.s.currentBatchSizeBytes + maxKeySize + bsonSize >= + bulkOperation.s.maxBatchSizeBytes) || + // New batch if the new op does not have the same op type as the current batch bulkOperation.s.currentBatch.batchType !== docType ) { // Save the batch to the execution stack diff --git a/lib/bulk/unordered.js b/lib/bulk/unordered.js index 999de3c466..37a278370e 100644 --- a/lib/bulk/unordered.js +++ b/lib/bulk/unordered.js @@ -26,8 +26,8 @@ function addToOperationsList(bulkOperation, docType, document) { ignoreUndefined: false }); // Throw error if the doc is bigger than the max BSON size - if (bsonSize >= bulkOperation.s.maxBatchSizeBytes) - throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBatchSizeBytes); + if (bsonSize >= bulkOperation.s.maxBsonObjectSize) + throw toError('document is larger than the maximum size ' + bulkOperation.s.maxBsonObjectSize); // Holds the current batch bulkOperation.s.currentBatch = null; // Get the right type of batch @@ -47,9 +47,14 @@ function addToOperationsList(bulkOperation, docType, document) { // Check if we need to create a new batch if ( + // New batch if we exceed the max batch op size bulkOperation.s.currentBatch.size + 1 >= bulkOperation.s.maxWriteBatchSize || - bulkOperation.s.currentBatch.sizeBytes + maxKeySize + bsonSize >= - bulkOperation.s.maxBatchSizeBytes || + // New batch if we exceed the maxBatchSizeBytes. Only matters if batch already has a doc, + // since we can't sent an empty batch + (bulkOperation.s.currentBatch.size > 0 && + bulkOperation.s.currentBatch.sizeBytes + maxKeySize + bsonSize >= + bulkOperation.s.maxBatchSizeBytes) || + // New batch if the new op does not have the same op type as the current batch bulkOperation.s.currentBatch.batchType !== docType ) { // Save the batch to the execution stack diff --git a/test/functional/client_side_encryption_prose_tests.js b/test/functional/client_side_encryption_prose_tests.js index 7ae6a90388..ac2e82c9a2 100644 --- a/test/functional/client_side_encryption_prose_tests.js +++ b/test/functional/client_side_encryption_prose_tests.js @@ -464,5 +464,223 @@ describe( }); }); }); + + describe('BSON size limits and batch splitting', function() { + const fs = require('fs'); + const path = require('path'); + const EJSON = require('mongodb-extjson'); + function loadLimits(file) { + return EJSON.parse( + fs.readFileSync(path.resolve(__dirname, 'spec', 'client-side-encryption', 'limits', file)) + ); + } + + const limitsSchema = loadLimits('limits-schema.json'); + const limitsKey = loadLimits('limits-key.json'); + const limitsDoc = loadLimits('limits-doc.json'); + + before(function() { + // First, perform the setup. + + // #. Create a MongoClient without encryption enabled (referred to as ``client``). + this.client = this.configuration.newClient( + {}, + { useNewUrlParser: true, useUnifiedTopology: true } + ); + + this.events = new Set(); + + return ( + this.client + .connect() + // #. Using ``client``, drop and create the collection ``db.coll`` configured with the included JSON schema `limits/limits-schema.json <../limits/limits-schema.json>`_. + .then(() => { + return this.client + .db(dataDbName) + .dropCollection(dataCollName) + .catch(noop); + }) + .then(() => { + return this.client.db(dataDbName).createCollection(dataCollName, { + validator: { $jsonSchema: limitsSchema } + }); + }) + // #. Using ``client``, drop the collection ``admin.datakeys``. Insert the document `limits/limits-key.json <../limits/limits-key.json>`_ + .then(() => { + return this.client + .db(keyVaultDbName) + .dropCollection(keyVaultCollName) + .catch(noop); + }) + .then(() => { + return this.client + .db(keyVaultDbName) + .collection(keyVaultCollName) + .insertOne(limitsKey); + }) + ); + }); + + beforeEach(function() { + // #. Create a MongoClient configured with auto encryption (referred to as ``client_encrypted``) + // Configure with the ``local`` KMS provider as follows: + // .. code:: javascript + // { "local": { "key": } } + // Configure with the ``keyVaultNamespace`` set to ``admin.datakeys``. + this.clientEncrypted = this.configuration.newClient( + {}, + { + useNewUrlParser: true, + useUnifiedTopology: true, + monitorCommands: true, + autoEncryption: { + keyVaultNamespace, + kmsProviders + } + } + ); + return this.clientEncrypted.connect().then(() => { + this.encryptedColl = this.clientEncrypted.db(dataDbName).collection(dataCollName); + this.events.clear(); + this.clientEncrypted.on('commandStarted', e => { + if (e.commandName === 'insert') { + this.events.add(e); + } + }); + }); + }); + + afterEach(function() { + if (this.clientEncrypted) { + this.clientEncrypted.removeAllListeners('commandStarted'); + return this.clientEncrypted.close(); + } + }); + + after(function() { + return this.client && this.client.close(); + }); + + // Using ``client_encrypted`` perform the following operations: + + function repeatedChar(char, length) { + return Array.from({ length }) + .map(() => char) + .join(''); + } + + const testCases = [ + // #. Insert ``{ "_id": "over_2mib_under_16mib", "unencrypted": }``. + // Expect this to succeed since this is still under the ``maxBsonObjectSize`` limit. + { + description: 'should succeed for over_2mib_under_16mib', + docs: () => [{ _id: 'over_2mib_under_16mib', unencrypted: repeatedChar('a', 2097152) }], + expectedEvents: [{ commandName: 'insert' }] + }, + // #. Insert the document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }`` + // Note: limits-doc.json is a 1005 byte BSON document that encrypts to a ~10,000 byte document. + // Expect this to succeed since after encryption this still is below the normal maximum BSON document size. + // Note, before auto encryption this document is under the 2 MiB limit. After encryption it exceeds the 2 MiB limit, but does NOT exceed the 16 MiB limit. + { + description: 'should succeed for encryption_exceeds_2mib', + docs: () => [ + Object.assign({}, limitsDoc, { + _id: 'encryption_exceeds_2mib', + unencrypted: repeatedChar('a', 2097152 - 2000) + }) + ], + expectedEvents: [{ commandName: 'insert' }] + }, + // #. Bulk insert the following: + // - ``{ "_id": "over_2mib_1", "unencrypted": }`` + // - ``{ "_id": "over_2mib_2", "unencrypted": }`` + // Expect the bulk write to succeed and split after first doc (i.e. two inserts occur). This may be verified using `command monitoring `_. + { + description: 'should succeed for bulk over_2mib', + docs: () => [ + { _id: 'over_2mib_1', unencrypted: repeatedChar('a', 2097152) }, + { _id: 'over_2mib_2', unencrypted: repeatedChar('a', 2097152) } + ], + expectedEvents: [{ commandName: 'insert' }, { commandName: 'insert' }] + }, + // #. Bulk insert the following: + // - The document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib_1", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }`` + // - The document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_2mib_2", "unencrypted": < the string "a" repeated (2097152 - 2000) times > }`` + // Expect the bulk write to succeed and split after first doc (i.e. two inserts occur). This may be verified using `command monitoring `_. + { + description: 'should succeed for bulk encryption_exceeds_2mib', + docs: () => [ + Object.assign({}, limitsDoc, { + _id: 'encryption_exceeds_2mib_1', + unencrypted: repeatedChar('a', 2097152 - 2000) + }), + Object.assign({}, limitsDoc, { + _id: 'encryption_exceeds_2mib_2', + unencrypted: repeatedChar('a', 2097152 - 2000) + }) + ], + expectedEvents: [{ commandName: 'insert' }, { commandName: 'insert' }] + }, + // #. Insert ``{ "_id": "under_16mib", "unencrypted": ``. + // Expect this to succeed since this is still (just) under the ``maxBsonObjectSize`` limit. + { + description: 'should succeed for under_16mib', + docs: () => [{ _id: 'under_16mib', unencrypted: repeatedChar('a', 16777216 - 2000) }], + expectedEvents: [{ commandName: 'insert' }] + }, + // #. Insert the document `limits/limits-doc.json <../limits/limits-doc.json>`_ concatenated with ``{ "_id": "encryption_exceeds_16mib", "unencrypted": < the string "a" repeated (16777216 - 2000) times > }`` + // Expect this to fail since encryption results in a document exceeding the ``maxBsonObjectSize`` limit. + { + description: 'should fail for encryption_exceeds_16mib', + docs: () => [ + Object.assign({}, limitsDoc, { + _id: 'encryption_exceeds_16mib', + unencrypted: repeatedChar('a', 16777216 - 2000) + }) + ], + error: true + } + ]; + + testCases.forEach(testCase => { + it(testCase.description, function() { + return this.encryptedColl.insertMany(testCase.docs()).then( + () => { + if (testCase.error) { + throw new Error('Expected this insert to fail, but it succeeded'); + } + const expectedEvents = Array.from(testCase.expectedEvents); + const actualEvents = pruneEvents(this.events); + + expect(actualEvents) + .to.have.a.lengthOf(expectedEvents.length) + .and.to.containSubset(expectedEvents); + }, + err => { + if (!testCase.error) { + throw err; + } + } + ); + }); + }); + + function pruneEvents(events) { + return Array.from(events).map(event => { + // We are pruning out the bunch of repeating As, mostly + // b/c an error failure will try to print 2mb of 'a's + // and not have a good time. + event.command = Object.assign({}, event.command); + event.command.documents = event.command.documents.map(doc => { + doc = Object.assign({}, doc); + if (doc.unencrypted) { + doc.unencrypted = "Lots of repeating 'a's"; + } + return doc; + }); + return event; + }); + } + }); } );