From b8fcf148abde7fe905e8287b2164ea1343ec82c8 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 19 Sep 2024 16:00:55 +0200 Subject: [PATCH] feat(NODE-6337): implement client bulk write batching --- src/cmap/commands.ts | 76 +++- src/cursor/client_bulk_write_cursor.ts | 4 +- src/error.ts | 31 +- src/index.ts | 3 +- .../client_bulk_write/command_builder.ts | 142 ++++++- src/operations/client_bulk_write/executor.ts | 26 +- .../client_bulk_write/results_merger.ts | 12 +- src/sdam/server_description.ts | 12 + src/sdam/topology_description.ts | 21 + test/integration/crud/crud.prose.test.ts | 129 ++++++ test/unit/cmap/commands.test.ts | 6 +- test/unit/index.test.ts | 3 +- .../client_bulk_write/command_builder.test.ts | 64 ++- .../client_bulk_write/results_merger.test.ts | 402 +++++++++++------- 14 files changed, 735 insertions(+), 196 deletions(-) diff --git a/src/cmap/commands.ts b/src/cmap/commands.ts index 9322fc53414..e04897eedf8 100644 --- a/src/cmap/commands.ts +++ b/src/cmap/commands.ts @@ -429,10 +429,66 @@ export interface OpMsgOptions { /** @internal */ export class DocumentSequence { + field: string; documents: Document[]; + serializedDocumentsLength: number; + private chunks: Uint8Array[]; + private header?: Buffer; - constructor(documents: Document[]) { - this.documents = documents; + /** + * Create a new document sequence for the provided field. + * @param field - The field it will replace. + */ + constructor(field: string, documents?: Document[]) { + this.field = field; + this.documents = []; + this.chunks = []; + this.serializedDocumentsLength = 0; + this.init(); + if (documents) { + for (const doc of documents) { + this.push(doc, BSON.serialize(doc)); + } + } + } + + /** + * Initialize the buffer chunks. + */ + private init() { + // Document sequences starts with type 1 at the first byte. + const buffer = Buffer.allocUnsafe(1 + 4 + this.field.length + 1); + buffer[0] = 1; + // Third part is the field name at offset 5 with trailing null byte. + encodeUTF8Into(buffer, `${this.field}\0`, 5); + this.chunks.push(buffer); + this.header = buffer; + } + + /** + * Push a document to the document sequence. Will serialize the document + * as well and return the current serialized length of all documents. + * @param document - The document to add. + * @param buffer - The serialized document in raw BSON. + * @returns The new totoal document sequence length. + */ + push(document: Document, buffer: Uint8Array): number { + this.serializedDocumentsLength += buffer.length; + // Push the document. + this.documents.push(document); + // Push the document raw bson. + this.chunks.push(buffer); + // Write the new length. + this.header?.writeInt32LE(4 + this.field.length + 1 + this.serializedDocumentsLength, 1); + return this.serializedDocumentsLength + (this.header?.length ?? 0); + } + + /** + * Get the fully serialized bytes for the document sequence section. + * @returns The section bytes. + */ + toBin(): Uint8Array { + return Buffer.concat(this.chunks); } } @@ -543,21 +599,7 @@ export class OpMsgRequest { const chunks = []; for (const [key, value] of Object.entries(document)) { if (value instanceof DocumentSequence) { - // Document sequences starts with type 1 at the first byte. - const buffer = Buffer.allocUnsafe(1 + 4 + key.length + 1); - buffer[0] = 1; - // Third part is the field name at offset 5 with trailing null byte. - encodeUTF8Into(buffer, `${key}\0`, 5); - chunks.push(buffer); - // Fourth part are the documents' bytes. - let docsLength = 0; - for (const doc of value.documents) { - const docBson = this.serializeBson(doc); - docsLength += docBson.length; - chunks.push(docBson); - } - // Second part of the sequence is the length at offset 1; - buffer.writeInt32LE(4 + key.length + 1 + docsLength, 1); + chunks.push(value.toBin()); // Why are we removing the field from the command? This is because it needs to be // removed in the OP_MSG request first section, and DocumentSequence is not a // BSON type and is specific to the MongoDB wire protocol so there's nothing diff --git a/src/cursor/client_bulk_write_cursor.ts b/src/cursor/client_bulk_write_cursor.ts index a1ae31fba30..cd853a46477 100644 --- a/src/cursor/client_bulk_write_cursor.ts +++ b/src/cursor/client_bulk_write_cursor.ts @@ -1,6 +1,6 @@ import type { Document } from '../bson'; import { type ClientBulkWriteCursorResponse } from '../cmap/wire_protocol/responses'; -import { MongoBulkWriteCursorError } from '../error'; +import { MongoClientBulkWriteCursorError } from '../error'; import type { MongoClient } from '../mongo_client'; import { ClientBulkWriteOperation } from '../operations/client_bulk_write/client_bulk_write'; import { type ClientBulkWriteOptions } from '../operations/client_bulk_write/common'; @@ -44,7 +44,7 @@ export class ClientBulkWriteCursor extends AbstractCursor { */ get response(): ClientBulkWriteCursorResponse { if (this.cursorResponse) return this.cursorResponse; - throw new MongoBulkWriteCursorError( + throw new MongoClientBulkWriteCursorError( 'No client bulk write cursor response returned from the server.' ); } diff --git a/src/error.ts b/src/error.ts index c9652877cb2..4aed6b93146 100644 --- a/src/error.ts +++ b/src/error.ts @@ -622,7 +622,7 @@ export class MongoGCPError extends MongoOIDCError { * @public * @category Error */ -export class MongoBulkWriteCursorError extends MongoRuntimeError { +export class MongoClientBulkWriteCursorError extends MongoRuntimeError { /** * **Do not use this constructor!** * @@ -639,7 +639,34 @@ export class MongoBulkWriteCursorError extends MongoRuntimeError { } override get name(): string { - return 'MongoBulkWriteCursorError'; + return 'MongoClientBulkWriteCursorError'; + } +} + +/** + * An error indicating that an error occurred on the client when executing a client bulk write. + * + * @public + * @category Error + */ +export class MongoClientBulkWriteExecutionError extends MongoRuntimeError { + /** + * **Do not use this constructor!** + * + * Meant for internal use only. + * + * @remarks + * This class is only meant to be constructed within the driver. This constructor is + * not subject to semantic versioning compatibility guarantees and may change at any time. + * + * @public + **/ + constructor(message: string) { + super(message); + } + + override get name(): string { + return 'MongoClientBulkWriteExecutionError'; } } diff --git a/src/index.ts b/src/index.ts index f68dd7699e0..97f964ce546 100644 --- a/src/index.ts +++ b/src/index.ts @@ -44,8 +44,9 @@ export { MongoAWSError, MongoAzureError, MongoBatchReExecutionError, - MongoBulkWriteCursorError, MongoChangeStreamError, + MongoClientBulkWriteCursorError, + MongoClientBulkWriteExecutionError, MongoCompatibilityError, MongoCursorExhaustedError, MongoCursorInUseError, diff --git a/src/operations/client_bulk_write/command_builder.ts b/src/operations/client_bulk_write/command_builder.ts index ad7ab953605..6b809a08c58 100644 --- a/src/operations/client_bulk_write/command_builder.ts +++ b/src/operations/client_bulk_write/command_builder.ts @@ -1,4 +1,4 @@ -import { type Document } from '../../bson'; +import { BSON, type Document } from '../../bson'; import { DocumentSequence } from '../../cmap/commands'; import { type PkFactory } from '../../mongo_client'; import type { Filter, OptionalId, UpdateFilter, WithoutId } from '../../mongo_types'; @@ -28,6 +28,11 @@ export interface ClientBulkWriteCommand { comment?: any; } +/** + * The bytes overhead for the extra fields added post command generation. + */ +const MESSAGE_OVERHEAD_BYTES = 1000; + /** @internal */ export class ClientBulkWriteCommandBuilder { models: AnyClientBulkWriteModel[]; @@ -62,32 +67,148 @@ export class ClientBulkWriteCommandBuilder { /** * Build the bulk write commands from the models. */ - buildCommands(): ClientBulkWriteCommand[] { + buildCommands(maxMessageSizeBytes: number, maxWriteBatchSize: number): ClientBulkWriteCommand[] { // Iterate the models to build the ops and nsInfo fields. - const operations = []; + // We need to do this in a loop which creates one command each up + // to the max bson size or max message size. + const commands: ClientBulkWriteCommand[] = []; + let currentCommandLength = 0; let currentNamespaceIndex = 0; + let currentCommand: ClientBulkWriteCommand = this.baseCommand(); const namespaces = new Map(); + for (const model of this.models) { const ns = model.namespace; const index = namespaces.get(ns); + + /** + * Convenience function for resetting everything when a new batch + * is started. + */ + const reset = () => { + commands.push(currentCommand); + namespaces.clear(); + currentNamespaceIndex = 0; + currentCommand = this.baseCommand(); + namespaces.set(ns, currentNamespaceIndex); + }; + if (index != null) { - operations.push(buildOperation(model, index, this.pkFactory)); + // Pushing to the ops document sequence returns the bytes length added. + const operation = buildOperation(model, index, this.pkFactory); + const operationBuffer = BSON.serialize(operation); + + // Check if the operation buffer can fit in the current command. If it can, + // then add the operation to the document sequence and increment the + // current length as long as the ops don't exceed the maxWriteBatchSize. + if ( + currentCommandLength + operationBuffer.length < maxMessageSizeBytes && + currentCommand.ops.documents.length < maxWriteBatchSize + ) { + // Pushing to the ops document sequence returns the bytes length added. + currentCommandLength = + MESSAGE_OVERHEAD_BYTES + this.addOperation(currentCommand, operation, operationBuffer); + } else { + // We need to batch. Push the current command to the commands + // array and create a new current command. We aslo need to clear the namespaces + // map for the new command. + reset(); + + const nsInfo = { ns: ns }; + const nsInfoBuffer = BSON.serialize(nsInfo); + currentCommandLength = + MESSAGE_OVERHEAD_BYTES + + this.addOperationAndNsInfo( + currentCommand, + operation, + operationBuffer, + nsInfo, + nsInfoBuffer + ); + } } else { namespaces.set(ns, currentNamespaceIndex); - operations.push(buildOperation(model, currentNamespaceIndex, this.pkFactory)); + const nsInfo = { ns: ns }; + const nsInfoBuffer = BSON.serialize(nsInfo); + const operation = buildOperation(model, currentNamespaceIndex, this.pkFactory); + const operationBuffer = BSON.serialize(operation); + + // Check if the operation and nsInfo buffers can fit in the command. If they + // can, then add the operation and nsInfo to their respective document + // sequences and increment the current length as long as the ops don't exceed + // the maxWriteBatchSize. + if ( + currentCommandLength + nsInfoBuffer.length + operationBuffer.length < + maxMessageSizeBytes && + currentCommand.ops.documents.length < maxWriteBatchSize + ) { + currentCommandLength = + MESSAGE_OVERHEAD_BYTES + + this.addOperationAndNsInfo( + currentCommand, + operation, + operationBuffer, + nsInfo, + nsInfoBuffer + ); + } else { + // We need to batch. Push the current command to the commands + // array and create a new current command. Aslo clear the namespaces map. + reset(); + + currentCommandLength = + MESSAGE_OVERHEAD_BYTES + + this.addOperationAndNsInfo( + currentCommand, + operation, + operationBuffer, + nsInfo, + nsInfoBuffer + ); + } + // We've added a new namespace, increment the namespace index. currentNamespaceIndex++; } } - const nsInfo = Array.from(namespaces.keys(), ns => ({ ns })); + // After we've finisihed iterating all the models put the last current command + // only if there are operations in it. + if (currentCommand.ops.documents.length > 0) { + commands.push(currentCommand); + } - // The base command. + return commands; + } + + private addOperation( + command: ClientBulkWriteCommand, + operation: Document, + operationBuffer: Uint8Array + ): number { + // Pushing to the ops document sequence returns the bytes length added. + return command.ops.push(operation, operationBuffer); + } + + private addOperationAndNsInfo( + command: ClientBulkWriteCommand, + operation: Document, + operationBuffer: Uint8Array, + nsInfo: Document, + nsInfoBuffer: Uint8Array + ): number { + // Pushing to the nsInfo document sequence returns the bytes length added. + const nsInfoLength = command.nsInfo.push(nsInfo, nsInfoBuffer); + const opsLength = this.addOperation(command, operation, operationBuffer); + return nsInfoLength + opsLength; + } + + private baseCommand(): ClientBulkWriteCommand { const command: ClientBulkWriteCommand = { bulkWrite: 1, errorsOnly: this.errorsOnly, ordered: this.options.ordered ?? true, - ops: new DocumentSequence(operations), - nsInfo: new DocumentSequence(nsInfo) + ops: new DocumentSequence('ops'), + nsInfo: new DocumentSequence('nsInfo') }; // Add bypassDocumentValidation if it was present in the options. if (this.options.bypassDocumentValidation != null) { @@ -103,7 +224,8 @@ export class ClientBulkWriteCommandBuilder { if (this.options.comment !== undefined) { command.comment = this.options.comment; } - return [command]; + + return command; } } diff --git a/src/operations/client_bulk_write/executor.ts b/src/operations/client_bulk_write/executor.ts index 74511ede9dd..1c02a42add6 100644 --- a/src/operations/client_bulk_write/executor.ts +++ b/src/operations/client_bulk_write/executor.ts @@ -1,6 +1,7 @@ import { type Document } from 'bson'; import { ClientBulkWriteCursor } from '../../cursor/client_bulk_write_cursor'; +import { MongoClientBulkWriteExecutionError } from '../../error'; import { type MongoClient } from '../../mongo_client'; import { WriteConcern } from '../../write_concern'; import { executeOperation } from '../execute_operation'; @@ -49,6 +50,23 @@ export class ClientBulkWriteExecutor { * @returns The result. */ async execute(): Promise { + const topologyDescription = this.client.topology?.description; + const maxMessageSizeBytes = topologyDescription?.maxMessageSizeBytes; + const maxWriteBatchSize = topologyDescription?.maxWriteBatchSize; + // If we don't know the maxMessageSizeBytes or for some reason it's 0 + // then we cannot calculate the batch. + if (!maxMessageSizeBytes) { + throw new MongoClientBulkWriteExecutionError( + 'No maxMessageSizeBytes value found - client bulk writes cannot execute without this value set from the monitoring connections.' + ); + } + + if (!maxWriteBatchSize) { + throw new MongoClientBulkWriteExecutionError( + 'No maxWriteBatchSize value found - client bulk writes cannot execute without this value set from the monitoring connections.' + ); + } + // The command builder will take the user provided models and potential split the batch // into multiple commands due to size. const pkFactory = this.client.s.options.pkFactory; @@ -57,7 +75,7 @@ export class ClientBulkWriteExecutor { this.options, pkFactory ); - const commands = commandBuilder.buildCommands(); + const commands = commandBuilder.buildCommands(maxMessageSizeBytes, maxWriteBatchSize); if (this.options.writeConcern?.w === 0) { return await executeUnacknowledged(this.client, this.options, commands); } @@ -75,10 +93,14 @@ async function executeAcknowledged( ): Promise { const resultsMerger = new ClientBulkWriteResultsMerger(options); // For each command will will create and exhaust a cursor for the results. + let currentBatchOffset = 0; for (const command of commands) { const cursor = new ClientBulkWriteCursor(client, command, options); const docs = await cursor.toArray(); - resultsMerger.merge(command.ops.documents, cursor.response, docs); + const operations = command.ops.documents; + resultsMerger.merge(currentBatchOffset, operations, cursor.response, docs); + // Set the new batch index so we can back back to the index in the original models. + currentBatchOffset += operations.length; } return resultsMerger.result; } diff --git a/src/operations/client_bulk_write/results_merger.ts b/src/operations/client_bulk_write/results_merger.ts index 48169b93b7d..ca5f3f16048 100644 --- a/src/operations/client_bulk_write/results_merger.ts +++ b/src/operations/client_bulk_write/results_merger.ts @@ -42,11 +42,13 @@ export class ClientBulkWriteResultsMerger { /** * Merge the results in the cursor to the existing result. + * @param currentBatchOffset - The offset index to the original models. * @param response - The cursor response. * @param documents - The documents in the cursor. * @returns The current result. */ merge( + currentBatchOffset: number, operations: Document[], response: ClientBulkWriteCursorResponse, documents: Document[] @@ -67,7 +69,9 @@ export class ClientBulkWriteResultsMerger { const operation = operations[document.idx]; // Handle insert results. if ('insert' in operation) { - this.result.insertResults?.set(document.idx, { insertedId: operation.document._id }); + this.result.insertResults?.set(document.idx + currentBatchOffset, { + insertedId: operation.document._id + }); } // Handle update results. if ('update' in operation) { @@ -80,11 +84,13 @@ export class ClientBulkWriteResultsMerger { if (document.upserted) { result.upsertedId = document.upserted._id; } - this.result.updateResults?.set(document.idx, result); + this.result.updateResults?.set(document.idx + currentBatchOffset, result); } // Handle delete results. if ('delete' in operation) { - this.result.deleteResults?.set(document.idx, { deletedCount: document.n }); + this.result.deleteResults?.set(document.idx + currentBatchOffset, { + deletedCount: document.n + }); } } } diff --git a/src/sdam/server_description.ts b/src/sdam/server_description.ts index cd32f4968b6..320a43bc168 100644 --- a/src/sdam/server_description.ts +++ b/src/sdam/server_description.ts @@ -18,6 +18,12 @@ const DATA_BEARING_SERVER_TYPES = new Set([ ServerType.LoadBalancer ]); +/** Default in case we are in load balanced mode. */ +const MAX_MESSAGE_SIZE_BYTES = 48000000; + +/** Default in case we are in load balanced mode. */ +const MAX_WRITE_BATCH_SIZE = 100000; + /** @public */ export interface TopologyVersion { processId: ObjectId; @@ -69,6 +75,10 @@ export class ServerDescription { setVersion: number | null; electionId: ObjectId | null; logicalSessionTimeoutMinutes: number | null; + /** The max message size in bytes for the server. */ + maxMessageSizeBytes: number; + /** The max number of writes in a bulk write command. */ + maxWriteBatchSize: number; // NOTE: does this belong here? It seems we should gossip the cluster time at the CMAP level $clusterTime?: ClusterTime; @@ -111,6 +121,8 @@ export class ServerDescription { this.setVersion = hello?.setVersion ?? null; this.electionId = hello?.electionId ?? null; this.logicalSessionTimeoutMinutes = hello?.logicalSessionTimeoutMinutes ?? null; + this.maxMessageSizeBytes = hello?.maxMessageSizeBytes ?? MAX_MESSAGE_SIZE_BYTES; + this.maxWriteBatchSize = hello?.maxWriteBatchSize ?? MAX_WRITE_BATCH_SIZE; this.primary = hello?.primary ?? null; this.me = hello?.me?.toLowerCase() ?? null; this.$clusterTime = hello?.$clusterTime ?? null; diff --git a/src/sdam/topology_description.ts b/src/sdam/topology_description.ts index 436321c7f1a..3f646975f28 100644 --- a/src/sdam/topology_description.ts +++ b/src/sdam/topology_description.ts @@ -43,6 +43,8 @@ export class TopologyDescription { heartbeatFrequencyMS: number; localThresholdMS: number; commonWireVersion: number; + maxMessageSizeBytes?: number; + maxWriteBatchSize?: number; /** * Create a TopologyDescription @@ -71,6 +73,25 @@ export class TopologyDescription { // determine server compatibility for (const serverDescription of this.servers.values()) { + // Find the lowest maxMessageSizeBytes from all the servers. + if (this.maxMessageSizeBytes == null) { + this.maxMessageSizeBytes = serverDescription.maxMessageSizeBytes; + } else { + this.maxMessageSizeBytes = Math.min( + this.maxMessageSizeBytes, + serverDescription.maxMessageSizeBytes + ); + } + + // Find the lowest maxWriteBatchSize from all the servers. + if (this.maxWriteBatchSize == null) { + this.maxWriteBatchSize = serverDescription.maxWriteBatchSize; + } else { + this.maxWriteBatchSize = Math.min( + this.maxWriteBatchSize, + serverDescription.maxWriteBatchSize + ); + } // Load balancer mode is always compatible. if ( serverDescription.type === ServerType.Unknown || diff --git a/test/integration/crud/crud.prose.test.ts b/test/integration/crud/crud.prose.test.ts index 3ddc126d333..bc081ed0aaa 100644 --- a/test/integration/crud/crud.prose.test.ts +++ b/test/integration/crud/crud.prose.test.ts @@ -3,6 +3,7 @@ import { once } from 'events'; import { type CommandStartedEvent } from '../../../mongodb'; import { + type AnyClientBulkWriteModel, type Collection, MongoBulkWriteError, type MongoClient, @@ -151,6 +152,134 @@ describe('CRUD Prose Spec Tests', () => { }); }); + describe('3. MongoClient.bulkWrite batch splits a writeModels input with greater than maxWriteBatchSize operations', function () { + // Test that MongoClient.bulkWrite properly handles writeModels inputs containing a number of writes greater than + // maxWriteBatchSize. + // This test must only be run on 8.0+ servers. This test must be skipped on Atlas Serverless. + // Construct a MongoClient (referred to as client) with command monitoring enabled to observe CommandStartedEvents. + // Perform a hello command using client and record the maxWriteBatchSize value contained in the response. Then, + // construct the following write model (referred to as model): + // InsertOne: { + // "namespace": "db.coll", + // "document": { "a": "b" } + // } + // Construct a list of write models (referred to as models) with model repeated maxWriteBatchSize + 1 times. Execute + // bulkWrite on client with models. Assert that the bulk write succeeds and returns a BulkWriteResult with an + // insertedCount value of maxWriteBatchSize + 1. + // Assert that two CommandStartedEvents (referred to as firstEvent and secondEvent) were observed for the bulkWrite + // command. Assert that the length of firstEvent.command.ops is maxWriteBatchSize. Assert that the length of + // secondEvent.command.ops is 1. If the driver exposes operationIds in its CommandStartedEvents, assert that + // firstEvent.operationId is equal to secondEvent.operationId. + let client: MongoClient; + let maxWriteBatchSize; + const models: AnyClientBulkWriteModel[] = []; + const commands: CommandStartedEvent[] = []; + + beforeEach(async function () { + client = this.configuration.newClient({}, { monitorCommands: true }); + await client.connect(); + const hello = await client.db('admin').command({ hello: 1 }); + maxWriteBatchSize = hello.maxWriteBatchSize; + + client.on('commandStarted', filterForCommands('bulkWrite', commands)); + commands.length = 0; + + Array.from({ length: maxWriteBatchSize + 1 }, () => { + models.push({ + namespace: 'db.coll', + name: 'insertOne', + document: { a: 'b' } + }); + }); + }); + + afterEach(async function () { + await client.close(); + }); + + it('splits the commands into 2 operations', { + metadata: { requires: { mongodb: '>=8.0.0' } }, + async test() { + const result = await client.bulkWrite(models); + expect(result.insertedCount).to.equal(maxWriteBatchSize + 1); + expect(commands.length).to.equal(2); + expect(commands[0].command.ops.length).to.equal(maxWriteBatchSize); + expect(commands[1].command.ops.length).to.equal(1); + } + }); + }); + + describe('4. MongoClient.bulkWrite batch splits when an ops payload exceeds maxMessageSizeBytes', function () { + // Test that MongoClient.bulkWrite properly handles a writeModels input which constructs an ops array larger + // than maxMessageSizeBytes. + // This test must only be run on 8.0+ servers. This test must be skipped on Atlas Serverless. + // Construct a MongoClient (referred to as client) with command monitoring enabled to observe CommandStartedEvents. + // Perform a hello command using client and record the following values from the response: maxBsonObjectSize + // and maxMessageSizeBytes. Then, construct the following document (referred to as document): + // { + // "a": "b".repeat(maxBsonObjectSize - 500) + // } + // Construct the following write model (referred to as model): + // InsertOne: { + // "namespace": "db.coll", + // "document": document + // } + // Use the following calculation to determine the number of inserts that should be provided to + // MongoClient.bulkWrite: maxMessageSizeBytes / maxBsonObjectSize + 1 (referred to as numModels). This number + // ensures that the inserts provided to MongoClient.bulkWrite will require multiple bulkWrite commands to be + // sent to the server. + // Construct as list of write models (referred to as models) with model repeated numModels times. Then execute + // bulkWrite on client with models. Assert that the bulk write succeeds and returns a BulkWriteResult with + // an insertedCount value of numModels. + // Assert that two CommandStartedEvents (referred to as firstEvent and secondEvent) were observed. Assert + // that the length of firstEvent.command.ops is numModels - 1. Assert that the length of secondEvent.command.ops + // is 1. If the driver exposes operationIds in its CommandStartedEvents, assert that firstEvent.operationId is + // equal to secondEvent.operationId. + let client: MongoClient; + let maxBsonObjectSize; + let maxMessageSizeBytes; + let numModels; + const models: AnyClientBulkWriteModel[] = []; + const commands: CommandStartedEvent[] = []; + + beforeEach(async function () { + client = this.configuration.newClient({}, { monitorCommands: true }); + await client.connect(); + const hello = await client.db('admin').command({ hello: 1 }); + maxBsonObjectSize = hello.maxBsonObjectSize; + maxMessageSizeBytes = hello.maxMessageSizeBytes; + numModels = Math.floor(maxMessageSizeBytes / maxBsonObjectSize + 1); + + client.on('commandStarted', filterForCommands('bulkWrite', commands)); + commands.length = 0; + + Array.from({ length: numModels }, () => { + models.push({ + name: 'insertOne', + namespace: 'db.coll', + document: { + a: 'b'.repeat(maxBsonObjectSize - 500) + } + }); + }); + }); + + afterEach(async function () { + await client.close(); + }); + + it('splits the commands into 2 operations', { + metadata: { requires: { mongodb: '>=8.0.0' } }, + async test() { + const result = await client.bulkWrite(models); + expect(result.insertedCount).to.equal(numModels); + expect(commands.length).to.equal(2); + expect(commands[0].command.ops.length).to.equal(numModels - 1); + expect(commands[1].command.ops.length).to.equal(1); + } + }); + }); + describe('14. `explain` helpers allow users to specify `maxTimeMS`', function () { let client: MongoClient; const commands: CommandStartedEvent[] = []; diff --git a/test/unit/cmap/commands.test.ts b/test/unit/cmap/commands.test.ts index f4b3fdf0252..5725f5b2490 100644 --- a/test/unit/cmap/commands.test.ts +++ b/test/unit/cmap/commands.test.ts @@ -15,7 +15,7 @@ describe('commands', function () { context('when there is one document sequence', function () { const command = { test: 1, - field: new DocumentSequence([{ test: 1 }]) + field: new DocumentSequence('field', [{ test: 1 }]) }; const msg = new OpMsgRequest('admin', command, {}); const buffers = msg.toBin(); @@ -53,8 +53,8 @@ describe('commands', function () { context('when there are multiple document sequences', function () { const command = { test: 1, - fieldOne: new DocumentSequence([{ test: 1 }]), - fieldTwo: new DocumentSequence([{ test: 1 }]) + fieldOne: new DocumentSequence('fieldOne', [{ test: 1 }]), + fieldTwo: new DocumentSequence('fieldTwo', [{ test: 1 }]) }; const msg = new OpMsgRequest('admin', command, {}); const buffers = msg.toBin(); diff --git a/test/unit/index.test.ts b/test/unit/index.test.ts index c8a1406a000..883cc4b4ba7 100644 --- a/test/unit/index.test.ts +++ b/test/unit/index.test.ts @@ -69,11 +69,12 @@ const EXPECTED_EXPORTS = [ 'MongoAWSError', 'MongoAzureError', 'MongoBatchReExecutionError', - 'MongoBulkWriteCursorError', 'MongoBulkWriteError', 'MongoChangeStreamError', 'MongoClient', 'MongoClientAuthProviders', + 'MongoClientBulkWriteCursorError', + 'MongoClientBulkWriteExecutionError', 'MongoCompatibilityError', 'MongoCryptAzureKMSRequestError', 'MongoCryptCreateDataKeyError', diff --git a/test/unit/operations/client_bulk_write/command_builder.test.ts b/test/unit/operations/client_bulk_write/command_builder.test.ts index 6b34ef9a817..fade57d408e 100644 --- a/test/unit/operations/client_bulk_write/command_builder.test.ts +++ b/test/unit/operations/client_bulk_write/command_builder.test.ts @@ -34,7 +34,7 @@ describe('ClientBulkWriteCommandBuilder', function () { ordered: false, comment: { bulk: 'write' } }); - const commands = builder.buildCommands(); + const commands = builder.buildCommands(48000000, 100000); it('sets the bulkWrite command', function () { expect(commands[0].bulkWrite).to.equal(1); @@ -79,7 +79,7 @@ describe('ClientBulkWriteCommandBuilder', function () { document: { _id: id, name: 1 } }; const builder = new ClientBulkWriteCommandBuilder([model], {}); - const commands = builder.buildCommands(); + const commands = builder.buildCommands(48000000, 100000); it('sets the bulkWrite command', function () { expect(commands[0].bulkWrite).to.equal(1); @@ -108,6 +108,60 @@ describe('ClientBulkWriteCommandBuilder', function () { }); context('when multiple models are provided', function () { + context('when exceeding the max batch size', function () { + const idOne = new ObjectId(); + const idTwo = new ObjectId(); + const modelOne: ClientInsertOneModel = { + name: 'insertOne', + namespace: 'test.coll', + document: { _id: idOne, name: 1 } + }; + const modelTwo: ClientInsertOneModel = { + name: 'insertOne', + namespace: 'test.coll', + document: { _id: idTwo, name: 2 } + }; + const builder = new ClientBulkWriteCommandBuilder([modelOne, modelTwo], {}); + const commands = builder.buildCommands(48000000, 1); + + it('splits the operations into multiple commands', function () { + expect(commands.length).to.equal(2); + expect(commands[0].ops.documents).to.deep.equal([ + { insert: 0, document: { _id: idOne, name: 1 } } + ]); + expect(commands[1].ops.documents).to.deep.equal([ + { insert: 0, document: { _id: idTwo, name: 2 } } + ]); + }); + }); + + context('when exceeding the max message size in bytes', function () { + const idOne = new ObjectId(); + const idTwo = new ObjectId(); + const modelOne: ClientInsertOneModel = { + name: 'insertOne', + namespace: 'test.coll', + document: { _id: idOne, name: 1 } + }; + const modelTwo: ClientInsertOneModel = { + name: 'insertOne', + namespace: 'test.coll', + document: { _id: idTwo, name: 2 } + }; + const builder = new ClientBulkWriteCommandBuilder([modelOne, modelTwo], {}); + const commands = builder.buildCommands(1090, 100000); + + it('splits the operations into multiple commands', function () { + expect(commands.length).to.equal(2); + expect(commands[0].ops.documents).to.deep.equal([ + { insert: 0, document: { _id: idOne, name: 1 } } + ]); + expect(commands[1].ops.documents).to.deep.equal([ + { insert: 0, document: { _id: idTwo, name: 2 } } + ]); + }); + }); + context('when the namespace is the same', function () { const idOne = new ObjectId(); const idTwo = new ObjectId(); @@ -122,7 +176,7 @@ describe('ClientBulkWriteCommandBuilder', function () { document: { _id: idTwo, name: 2 } }; const builder = new ClientBulkWriteCommandBuilder([modelOne, modelTwo], {}); - const commands = builder.buildCommands(); + const commands = builder.buildCommands(48000000, 100000); it('sets the bulkWrite command', function () { expect(commands[0].bulkWrite).to.equal(1); @@ -156,7 +210,7 @@ describe('ClientBulkWriteCommandBuilder', function () { document: { _id: idTwo, name: 2 } }; const builder = new ClientBulkWriteCommandBuilder([modelOne, modelTwo], {}); - const commands = builder.buildCommands(); + const commands = builder.buildCommands(48000000, 100000); it('sets the bulkWrite command', function () { expect(commands[0].bulkWrite).to.equal(1); @@ -199,7 +253,7 @@ describe('ClientBulkWriteCommandBuilder', function () { document: { _id: idThree, name: 2 } }; const builder = new ClientBulkWriteCommandBuilder([modelOne, modelTwo, modelThree], {}); - const commands = builder.buildCommands(); + const commands = builder.buildCommands(48000000, 100000); it('sets the bulkWrite command', function () { expect(commands[0].bulkWrite).to.equal(1); diff --git a/test/unit/operations/client_bulk_write/results_merger.test.ts b/test/unit/operations/client_bulk_write/results_merger.test.ts index ec43843af65..342502eebb4 100644 --- a/test/unit/operations/client_bulk_write/results_merger.test.ts +++ b/test/unit/operations/client_bulk_write/results_merger.test.ts @@ -28,180 +28,282 @@ describe('ClientBulkWriteResultsMerger', function () { describe('#merge', function () { context('when the bulk write is acknowledged', function () { - context('when requesting verbose results', function () { - // An example verbose response from the server without errors: - // { - // cursor: { - // id: Long('0'), - // firstBatch: [ { ok: 1, idx: 0, n: 1 }, { ok: 1, idx: 1, n: 1 } ], - // ns: 'admin.$cmd.bulkWrite' - // }, - // nErrors: 0, - // nInserted: 2, - // nMatched: 0, - // nModified: 0, - // nUpserted: 0, - // nDeleted: 0, - // ok: 1 - // } - context('when there are no errors', function () { - const operations = [ - { insert: 0, document: { _id: 1 } }, - { update: 0 }, - { update: 0 }, - { delete: 0 } - ]; - const documents = [ - { ok: 1, idx: 0, n: 1 }, // Insert - { ok: 1, idx: 1, n: 1, nModified: 1 }, // Update match - { ok: 1, idx: 2, n: 0, upserted: { _id: 1 } }, // Update no match with upsert - { ok: 1, idx: 3, n: 1 } // Delete - ]; - const serverResponse = { - cursor: { - id: new Long('0'), - firstBatch: documents, - ns: 'admin.$cmd.bulkWrite' - }, - nErrors: 0, - nInserted: 1, - nMatched: 1, - nModified: 1, - nUpserted: 1, - nDeleted: 1, - ok: 1 - }; - const response = new ClientBulkWriteCursorResponse(BSON.serialize(serverResponse), 0); - const merger = new ClientBulkWriteResultsMerger({ verboseResults: true }); - let result: ClientBulkWriteResult; - - before(function () { - result = merger.merge(operations, response, documents); - }); + context('when merging on the first batch', function () { + context('when requesting verbose results', function () { + // An example verbose response from the server without errors: + // { + // cursor: { + // id: Long('0'), + // firstBatch: [ { ok: 1, idx: 0, n: 1 }, { ok: 1, idx: 1, n: 1 } ], + // ns: 'admin.$cmd.bulkWrite' + // }, + // nErrors: 0, + // nInserted: 2, + // nMatched: 0, + // nModified: 0, + // nUpserted: 0, + // nDeleted: 0, + // ok: 1 + // } + context('when there are no errors', function () { + const operations = [ + { insert: 0, document: { _id: 1 } }, + { update: 0 }, + { update: 0 }, + { delete: 0 } + ]; + const documents = [ + { ok: 1, idx: 0, n: 1 }, // Insert + { ok: 1, idx: 1, n: 1, nModified: 1 }, // Update match + { ok: 1, idx: 2, n: 0, upserted: { _id: 1 } }, // Update no match with upsert + { ok: 1, idx: 3, n: 1 } // Delete + ]; + const serverResponse = { + cursor: { + id: new Long('0'), + firstBatch: documents, + ns: 'admin.$cmd.bulkWrite' + }, + nErrors: 0, + nInserted: 1, + nMatched: 1, + nModified: 1, + nUpserted: 1, + nDeleted: 1, + ok: 1 + }; + const response = new ClientBulkWriteCursorResponse(BSON.serialize(serverResponse), 0); + const merger = new ClientBulkWriteResultsMerger({ verboseResults: true }); + let result: ClientBulkWriteResult; - it('merges the inserted count', function () { - expect(result.insertedCount).to.equal(1); - }); + before(function () { + result = merger.merge(0, operations, response, documents); + }); - it('sets insert results', function () { - expect(result.insertResults.get(0).insertedId).to.equal(1); - }); + it('merges the inserted count', function () { + expect(result.insertedCount).to.equal(1); + }); - it('merges the upserted count', function () { - expect(result.upsertedCount).to.equal(1); - }); + it('sets insert results', function () { + expect(result.insertResults.get(0).insertedId).to.equal(1); + }); - it('merges the matched count', function () { - expect(result.matchedCount).to.equal(1); - }); + it('merges the upserted count', function () { + expect(result.upsertedCount).to.equal(1); + }); - it('merges the modified count', function () { - expect(result.modifiedCount).to.equal(1); - }); + it('merges the matched count', function () { + expect(result.matchedCount).to.equal(1); + }); - it('sets the update results', function () { - expect(result.updateResults.get(1)).to.deep.equal({ - matchedCount: 1, - modifiedCount: 1, - didUpsert: false + it('merges the modified count', function () { + expect(result.modifiedCount).to.equal(1); }); - }); - it('sets the upsert results', function () { - expect(result.updateResults.get(2)).to.deep.equal({ - matchedCount: 0, - modifiedCount: 0, - upsertedId: 1, - didUpsert: true + it('sets the update results', function () { + expect(result.updateResults.get(1)).to.deep.equal({ + matchedCount: 1, + modifiedCount: 1, + didUpsert: false + }); + }); + + it('sets the upsert results', function () { + expect(result.updateResults.get(2)).to.deep.equal({ + matchedCount: 0, + modifiedCount: 0, + upsertedId: 1, + didUpsert: true + }); }); - }); - it('merges the deleted count', function () { - expect(result.deletedCount).to.equal(1); + it('merges the deleted count', function () { + expect(result.deletedCount).to.equal(1); + }); + + it('sets the delete results', function () { + expect(result.deleteResults.get(3).deletedCount).to.equal(1); + }); }); + }); + + context('when not requesting verbose results', function () { + // An example verbose response from the server without errors: + // { + // cursor: { + // id: Long('0'), + // firstBatch: [], + // ns: 'admin.$cmd.bulkWrite' + // }, + // nErrors: 0, + // nInserted: 2, + // nMatched: 0, + // nModified: 0, + // nUpserted: 0, + // nDeleted: 0, + // ok: 1 + // } + context('when there are no errors', function () { + const operations = [ + { insert: 0, document: { _id: 1 } }, + { update: 0 }, + { update: 0 }, + { delete: 0 } + ]; + const documents = []; + const serverResponse = { + cursor: { + id: new Long('0'), + firstBatch: documents, + ns: 'admin.$cmd.bulkWrite' + }, + nErrors: 0, + nInserted: 1, + nMatched: 1, + nModified: 1, + nUpserted: 1, + nDeleted: 1, + ok: 1 + }; + const response = new ClientBulkWriteCursorResponse(BSON.serialize(serverResponse), 0); + const merger = new ClientBulkWriteResultsMerger({ verboseResults: false }); + let result: ClientBulkWriteResult; + + before(function () { + result = merger.merge(0, operations, response, documents); + }); - it('sets the delete results', function () { - expect(result.deleteResults.get(3).deletedCount).to.equal(1); + it('merges the inserted count', function () { + expect(result.insertedCount).to.equal(1); + }); + + it('sets no insert results', function () { + expect(result.insertResults).to.equal(undefined); + }); + + it('merges the upserted count', function () { + expect(result.upsertedCount).to.equal(1); + }); + + it('merges the matched count', function () { + expect(result.matchedCount).to.equal(1); + }); + + it('merges the modified count', function () { + expect(result.modifiedCount).to.equal(1); + }); + + it('sets no update results', function () { + expect(result.updateResults).to.equal(undefined); + }); + + it('merges the deleted count', function () { + expect(result.deletedCount).to.equal(1); + }); + + it('sets no delete results', function () { + expect(result.deleteResults).to.equal(undefined); + }); }); }); }); - context('when not requesting verbose results', function () { - // An example verbose response from the server without errors: - // { - // cursor: { - // id: Long('0'), - // firstBatch: [], - // ns: 'admin.$cmd.bulkWrite' - // }, - // nErrors: 0, - // nInserted: 2, - // nMatched: 0, - // nModified: 0, - // nUpserted: 0, - // nDeleted: 0, - // ok: 1 - // } - context('when there are no errors', function () { - const operations = [ - { insert: 0, document: { _id: 1 } }, - { update: 0 }, - { update: 0 }, - { delete: 0 } - ]; - const documents = []; - const serverResponse = { - cursor: { - id: new Long('0'), - firstBatch: documents, - ns: 'admin.$cmd.bulkWrite' - }, - nErrors: 0, - nInserted: 1, - nMatched: 1, - nModified: 1, - nUpserted: 1, - nDeleted: 1, - ok: 1 - }; - const response = new ClientBulkWriteCursorResponse(BSON.serialize(serverResponse), 0); - const merger = new ClientBulkWriteResultsMerger({ verboseResults: false }); - let result: ClientBulkWriteResult; - - before(function () { - result = merger.merge(operations, response, documents); - }); + context('when merging on a later batch', function () { + context('when requesting verbose results', function () { + // An example verbose response from the server without errors: + // { + // cursor: { + // id: Long('0'), + // firstBatch: [ { ok: 1, idx: 0, n: 1 }, { ok: 1, idx: 1, n: 1 } ], + // ns: 'admin.$cmd.bulkWrite' + // }, + // nErrors: 0, + // nInserted: 2, + // nMatched: 0, + // nModified: 0, + // nUpserted: 0, + // nDeleted: 0, + // ok: 1 + // } + context('when there are no errors', function () { + const operations = [ + { insert: 0, document: { _id: 1 } }, + { update: 0 }, + { update: 0 }, + { delete: 0 } + ]; + const documents = [ + { ok: 1, idx: 0, n: 1 }, // Insert + { ok: 1, idx: 1, n: 1, nModified: 1 }, // Update match + { ok: 1, idx: 2, n: 0, upserted: { _id: 1 } }, // Update no match with upsert + { ok: 1, idx: 3, n: 1 } // Delete + ]; + const serverResponse = { + cursor: { + id: new Long('0'), + firstBatch: documents, + ns: 'admin.$cmd.bulkWrite' + }, + nErrors: 0, + nInserted: 1, + nMatched: 1, + nModified: 1, + nUpserted: 1, + nDeleted: 1, + ok: 1 + }; + const response = new ClientBulkWriteCursorResponse(BSON.serialize(serverResponse), 0); + const merger = new ClientBulkWriteResultsMerger({ verboseResults: true }); + let result: ClientBulkWriteResult; - it('merges the inserted count', function () { - expect(result.insertedCount).to.equal(1); - }); + before(function () { + result = merger.merge(20, operations, response, documents); + }); - it('sets no insert results', function () { - expect(result.insertResults).to.equal(undefined); - }); + it('merges the inserted count', function () { + expect(result.insertedCount).to.equal(1); + }); - it('merges the upserted count', function () { - expect(result.upsertedCount).to.equal(1); - }); + it('sets insert results', function () { + expect(result.insertResults.get(20).insertedId).to.equal(1); + }); - it('merges the matched count', function () { - expect(result.matchedCount).to.equal(1); - }); + it('merges the upserted count', function () { + expect(result.upsertedCount).to.equal(1); + }); - it('merges the modified count', function () { - expect(result.modifiedCount).to.equal(1); - }); + it('merges the matched count', function () { + expect(result.matchedCount).to.equal(1); + }); - it('sets no update results', function () { - expect(result.updateResults).to.equal(undefined); - }); + it('merges the modified count', function () { + expect(result.modifiedCount).to.equal(1); + }); - it('merges the deleted count', function () { - expect(result.deletedCount).to.equal(1); - }); + it('sets the update results', function () { + expect(result.updateResults.get(21)).to.deep.equal({ + matchedCount: 1, + modifiedCount: 1, + didUpsert: false + }); + }); + + it('sets the upsert results', function () { + expect(result.updateResults.get(22)).to.deep.equal({ + matchedCount: 0, + modifiedCount: 0, + upsertedId: 1, + didUpsert: true + }); + }); - it('sets no delete results', function () { - expect(result.deleteResults).to.equal(undefined); + it('merges the deleted count', function () { + expect(result.deletedCount).to.equal(1); + }); + + it('sets the delete results', function () { + expect(result.deleteResults.get(23).deletedCount).to.equal(1); + }); }); }); });