From 722fb5d83702c5b16ffa658b5861f35bd9235e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C3=B3=C5=BAd=C5=BA?= Date: Thu, 22 Aug 2024 21:05:32 +0200 Subject: [PATCH 01/26] feat(queue): observability for queue --- src/classes/queue.ts | 279 +++++++++++++++++++++++++- src/enums/index.ts | 1 + src/enums/opentelemetry-attributes.ts | 18 ++ src/interfaces/index.ts | 1 + src/interfaces/opentelemetry.ts | 25 +++ src/interfaces/queue-options.ts | 3 + 6 files changed, 317 insertions(+), 10 deletions(-) create mode 100644 src/enums/opentelemetry-attributes.ts create mode 100644 src/interfaces/opentelemetry.ts diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 0e6b6cded4..f588e7a31c 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -6,12 +6,14 @@ import { QueueOptions, RepeatableJob, RepeatOptions, + Tracer, } from '../interfaces'; import { FinishedStatus, JobsOptions, MinimalQueue } from '../types'; import { Job } from './job'; import { QueueGetters } from './queue-getters'; import { Repeat } from './repeat'; import { RedisConnection } from './redis-connection'; +import { OpenTelemetryAttributes } from '../enums'; export interface ObliterateOpts { /** @@ -97,6 +99,7 @@ export class Queue< jobsOpts: BaseJobOptions; opts: QueueOptions; private _repeat?: Repeat; + private tracer: Tracer | undefined; constructor( name: string, @@ -114,6 +117,8 @@ export class Queue< this.jobsOpts = opts?.defaultJobOptions ?? {}; + this.tracer = opts?.telemetry.tracer; + this.waitUntilReady() .then(client => { if (!this.closing) { @@ -220,6 +225,15 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { + let span; + if (this.tracer) { + const spanName = `${this.name}.${name} Queue.add`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + if (opts && opts.repeat) { if (opts.repeat.endDate) { if (+new Date(opts.repeat.endDate) < Date.now()) { @@ -227,11 +241,17 @@ export class Queue< } } - return (await this.repeat).addNextRepeatableJob< + const repeatableJob = (await this.repeat).addNextRepeatableJob< DataType, ResultType, NameType >(name, data, { ...this.jobsOpts, ...opts }, true); + + if (this.tracer) { + span.end(); + } + + return repeatableJob; } else { const jobId = opts?.jobId; @@ -250,6 +270,11 @@ export class Queue< }, ); this.emit('waiting', job); + + if (this.tracer) { + span.end(); + } + return job; } } @@ -264,7 +289,19 @@ export class Queue< addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - return this.Job.createBulk( + let span; + if (this.tracer) { + const jobsInBulk = jobs.map(job => job.name); + const spanName = `${this.name} Queue.addBulk`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.BulkNames]: jobsInBulk, + [OpenTelemetryAttributes.BulkCount]: jobsInBulk.length, + }); + } + + const bulk = this.Job.createBulk( this as MinimalQueue, jobs.map(job => ({ name: job.name, @@ -276,6 +313,12 @@ export class Queue< }, })), ); + + if (this.tracer) { + span.end(); + } + + return bulk; } /** @@ -290,7 +333,21 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.pause`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + await this.scripts.pause(true); + + if (this.tracer) { + span.end(); + } + this.emit('paused'); } @@ -299,12 +356,26 @@ export class Queue< * */ async close(): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.close`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + if (!this.closing) { if (this._repeat) { await this._repeat.close(); } } - return super.close(); + + super.close(); + + if (this.tracer) { + span.end(); + } } /** * Resumes the processing of this queue globally. @@ -313,7 +384,21 @@ export class Queue< * queue. */ async resume(): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.resume`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + await this.scripts.pause(false); + + if (this.tracer) { + span.end(); + } + this.emit('resumed'); } @@ -367,9 +452,22 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${name} Queue.removeRepeatable`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + const repeat = await this.repeat; const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); + if (this.tracer) { + span.end(); + } + return !removed; } @@ -379,9 +477,24 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${id} Queue.removeDebounceKey`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + const client = await this.client; - return client.del(`${this.keys.de}:${id}`); + const debounced = await client.del(`${this.keys.de}:${id}`); + + if (this.tracer) { + span.end(); + } + + return debounced; } /** @@ -395,9 +508,23 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${key} Queue.removeRepeatableByKey`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.JobKey]: key, + }); + } + const repeat = await this.repeat; const removed = await repeat.removeRepeatableByKey(key); + if (this.tracer) { + span.end(); + } + return !removed; } @@ -410,8 +537,27 @@ export class Queue< * @returns 1 if it managed to remove the job or 0 if the job or * any of its dependencies were locked. */ - remove(jobId: string, { removeChildren = true } = {}): Promise { - return this.scripts.remove(jobId, removeChildren); + async remove(jobId: string, { removeChildren = true } = {}): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${jobId} Queue.remove`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.JobId]: jobId, + [OpenTelemetryAttributes.JobOptions]: JSON.stringify({ + removeChildren, + }), + }); + } + + const status = await this.scripts.remove(jobId, removeChildren); + + if (this.tracer) { + span.end(); + } + + return status; } /** @@ -424,7 +570,22 @@ export class Queue< jobId: string, progress: number | object, ): Promise { - return this.scripts.updateProgress(jobId, progress); + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.updateJobProgress`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.JobId]: jobId, + [OpenTelemetryAttributes.JobProgress]: JSON.stringify(progress), + }); + } + + await this.scripts.updateProgress(jobId, progress); + + if (this.tracer) { + span.end(); + } } /** @@ -451,8 +612,22 @@ export class Queue< * @param delayed - Pass true if it should also clean the * delayed jobs. */ - drain(delayed = false): Promise { - return this.scripts.drain(delayed); + async drain(delayed = false): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.drain`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.QueueDrainDelay]: delayed, + }); + } + + await this.scripts.drain(delayed); + + if (this.tracer) { + span.end(); + } } /** @@ -477,12 +652,30 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.clean`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.QueueGrace]: grace, + [OpenTelemetryAttributes.JobType]: type, + }); + } + const maxCount = limit || Infinity; const maxCountPerCall = Math.min(10000, maxCount); const timestamp = Date.now() - grace; let deletedCount = 0; const deletedJobsIds: string[] = []; + if (this.tracer) { + span.setAttributes({ + [OpenTelemetryAttributes.QueueCleanLimit]: maxCount, + [OpenTelemetryAttributes.JobTimestamp]: timestamp, + }); + } + while (deletedCount < maxCount) { const jobsIds = await this.scripts.cleanJobsInSet( type, @@ -498,6 +691,15 @@ export class Queue< break; } } + + if (this.tracer) { + span.setAttributes({ + [OpenTelemetryAttributes.JobId]: deletedJobsIds, + }); + + span.end(); + } + return deletedJobsIds; } @@ -513,6 +715,15 @@ export class Queue< * @param opts - Obliterate options. */ async obliterate(opts?: ObliterateOpts): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.obliterate`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + }); + } + await this.pause(); let cursor = 0; @@ -523,6 +734,10 @@ export class Queue< ...opts, }); } while (cursor); + + if (this.tracer) { + span.end(); + } } /** @@ -538,6 +753,16 @@ export class Queue< async retryJobs( opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.retryJobs`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); + } + let cursor = 0; do { cursor = await this.scripts.retryJobs( @@ -546,6 +771,10 @@ export class Queue< opts.timestamp, ); } while (cursor); + + if (this.tracer) { + span.end(); + } } /** @@ -557,10 +786,24 @@ export class Queue< * @returns */ async promoteJobs(opts: { count?: number } = {}): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.promoteJobs`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); + } + let cursor = 0; do { cursor = await this.scripts.promoteJobs(opts.count); } while (cursor); + + if (this.tracer) { + span.end(); + } } /** @@ -569,8 +812,24 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} Queue.trimEvents`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [OpenTelemetryAttributes.QueueName]: this.name, + [OpenTelemetryAttributes.QueueEventMaxLength]: maxLength, + }); + } + const client = await this.client; - return client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); + const trim = await client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); + + if (this.tracer) { + span.end(); + } + + return trim; } /** diff --git a/src/enums/index.ts b/src/enums/index.ts index d6bae934f4..4574120c86 100644 --- a/src/enums/index.ts +++ b/src/enums/index.ts @@ -2,3 +2,4 @@ export * from './child-command'; export * from './error-code'; export * from './parent-command'; export * from './metrics-time'; +export * from './opentelemetry-attributes'; diff --git a/src/enums/opentelemetry-attributes.ts b/src/enums/opentelemetry-attributes.ts new file mode 100644 index 0000000000..f3e0783594 --- /dev/null +++ b/src/enums/opentelemetry-attributes.ts @@ -0,0 +1,18 @@ +export enum OpenTelemetryAttributes { + QueueName = 'bullmq.queue.name', + WorkerName = 'bullmq.worker.name', + BulkCount = 'bullmq.job.bulk.count', + BulkNames = 'bullmq.job.bulk.names', + JobName = 'bullmq.job.name', + JobId = 'bullmq.job.id', + JobKey = 'bullmq.job.key', + JobOptions = 'bullmq.job.options', + JobProgress = 'bullmq.job.progress', + QueueDrainDelay = 'bullmq.queue.drain.delay', + QueueGrace = 'bullmq.queue.grace', + QueueCleanLimit = 'bullmq.queue.clean.limit', + JobType = 'bullmq.job.type', + JobTimestamp = 'bullmq.job.timestamp', + QueueOptions = 'bullmq.queue.options', + QueueEventMaxLength = 'bullmq.queue.event.max.length', +} diff --git a/src/interfaces/index.ts b/src/interfaces/index.ts index 65dfed0396..6b06010ec3 100644 --- a/src/interfaces/index.ts +++ b/src/interfaces/index.ts @@ -23,3 +23,4 @@ export * from './repeat-options'; export * from './sandboxed-job-processor'; export * from './sandboxed-job'; export * from './worker-options'; +export * from './opentelemetry'; diff --git a/src/interfaces/opentelemetry.ts b/src/interfaces/opentelemetry.ts new file mode 100644 index 0000000000..1947274d3c --- /dev/null +++ b/src/interfaces/opentelemetry.ts @@ -0,0 +1,25 @@ +export interface Telemetry { + tracer: Tracer; +} + +export interface Tracer { + startSpan(name: string): Span; +} + +export interface Span { + setAttribute(key: string, value: Attribute): Span; + setAttributes(attributes: Attributes): Span; + end(): void; +} + +export interface Attributes { + [attribute: string]: Attribute | undefined; +} + +export type Attribute = + | string + | number + | boolean + | null + | undefined + | (null | undefined | string | number | boolean)[]; diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index c8f00bd78c..20e05bf16e 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -1,6 +1,7 @@ import { AdvancedRepeatOptions } from './advanced-options'; import { DefaultJobOptions } from './base-job-options'; import { ConnectionOptions } from './redis-options'; +import { Telemetry } from './opentelemetry'; export enum ClientType { blocking = 'blocking', @@ -55,6 +56,8 @@ export interface QueueOptions extends QueueBaseOptions { }; settings?: AdvancedRepeatOptions; + + telemetry?: Telemetry; } /** From 1b2fd0b56bd5aa6ac7284a6e012acc032ac67650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C3=B3=C5=BAd=C5=BA?= Date: Fri, 23 Aug 2024 17:16:15 +0200 Subject: [PATCH 02/26] feat(worker): observability for worker --- src/classes/queue.ts | 76 ++++---- src/classes/worker.ts | 177 +++++++++++++++++- src/enums/index.ts | 2 +- ...-attributes.ts => telemetry-attributes.ts} | 9 +- src/interfaces/index.ts | 2 +- src/interfaces/queue-options.ts | 2 +- .../{opentelemetry.ts => telemetry.ts} | 0 src/interfaces/worker-options.ts | 3 + 8 files changed, 229 insertions(+), 42 deletions(-) rename src/enums/{opentelemetry-attributes.ts => telemetry-attributes.ts} (62%) rename src/interfaces/{opentelemetry.ts => telemetry.ts} (100%) diff --git a/src/classes/queue.ts b/src/classes/queue.ts index f588e7a31c..aa25343c63 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -13,7 +13,7 @@ import { Job } from './job'; import { QueueGetters } from './queue-getters'; import { Repeat } from './repeat'; import { RedisConnection } from './redis-connection'; -import { OpenTelemetryAttributes } from '../enums'; +import { TelemetryAttributes } from '../enums'; export interface ObliterateOpts { /** @@ -230,7 +230,7 @@ export class Queue< const spanName = `${this.name}.${name} Queue.add`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -272,6 +272,10 @@ export class Queue< this.emit('waiting', job); if (this.tracer) { + span.setAttributes({ + [TelemetryAttributes.JobId]: job.id, + }); + span.end(); } @@ -286,7 +290,7 @@ export class Queue< * @param jobs - The array of jobs to add to the queue. Each job is defined by 3 * properties, 'name', 'data' and 'opts'. They follow the same signature as 'Queue.add'. */ - addBulk( + async addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { let span; @@ -295,13 +299,13 @@ export class Queue< const spanName = `${this.name} Queue.addBulk`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.BulkNames]: jobsInBulk, - [OpenTelemetryAttributes.BulkCount]: jobsInBulk.length, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.BulkNames]: jobsInBulk, + [TelemetryAttributes.BulkCount]: jobsInBulk.length, }); } - const bulk = this.Job.createBulk( + const bulk = await this.Job.createBulk( this as MinimalQueue, jobs.map(job => ({ name: job.name, @@ -338,7 +342,7 @@ export class Queue< const spanName = `${this.name} Queue.pause`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -361,7 +365,7 @@ export class Queue< const spanName = `${this.name} Queue.close`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -371,7 +375,7 @@ export class Queue< } } - super.close(); + await super.close(); if (this.tracer) { span.end(); @@ -389,7 +393,7 @@ export class Queue< const spanName = `${this.name} Queue.resume`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -457,7 +461,7 @@ export class Queue< const spanName = `${this.name} ${name} Queue.removeRepeatable`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -482,7 +486,7 @@ export class Queue< const spanName = `${this.name} ${id} Queue.removeDebounceKey`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -513,8 +517,8 @@ export class Queue< const spanName = `${this.name} ${key} Queue.removeRepeatableByKey`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.JobKey]: key, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobKey]: key, }); } @@ -543,9 +547,9 @@ export class Queue< const spanName = `${this.name} ${jobId} Queue.remove`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.JobId]: jobId, - [OpenTelemetryAttributes.JobOptions]: JSON.stringify({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobOptions]: JSON.stringify({ removeChildren, }), }); @@ -575,9 +579,9 @@ export class Queue< const spanName = `${this.name} Queue.updateJobProgress`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.JobId]: jobId, - [OpenTelemetryAttributes.JobProgress]: JSON.stringify(progress), + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobProgress]: JSON.stringify(progress), }); } @@ -618,8 +622,8 @@ export class Queue< const spanName = `${this.name} Queue.drain`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.QueueDrainDelay]: delayed, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueDrainDelay]: delayed, }); } @@ -657,9 +661,9 @@ export class Queue< const spanName = `${this.name} Queue.clean`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.QueueGrace]: grace, - [OpenTelemetryAttributes.JobType]: type, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueGrace]: grace, + [TelemetryAttributes.JobType]: type, }); } @@ -671,8 +675,8 @@ export class Queue< if (this.tracer) { span.setAttributes({ - [OpenTelemetryAttributes.QueueCleanLimit]: maxCount, - [OpenTelemetryAttributes.JobTimestamp]: timestamp, + [TelemetryAttributes.QueueCleanLimit]: maxCount, + [TelemetryAttributes.JobTimestamp]: timestamp, }); } @@ -694,7 +698,7 @@ export class Queue< if (this.tracer) { span.setAttributes({ - [OpenTelemetryAttributes.JobId]: deletedJobsIds, + [TelemetryAttributes.JobId]: deletedJobsIds, }); span.end(); @@ -720,7 +724,7 @@ export class Queue< const spanName = `${this.name} Queue.obliterate`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueName]: this.name, }); } @@ -758,8 +762,8 @@ export class Queue< const spanName = `${this.name} Queue.retryJobs`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.QueueOptions]: JSON.stringify(opts), + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), }); } @@ -791,8 +795,8 @@ export class Queue< const spanName = `${this.name} Queue.promoteJobs`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.QueueOptions]: JSON.stringify(opts), + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), }); } @@ -817,8 +821,8 @@ export class Queue< const spanName = `${this.name} Queue.trimEvents`; span = this.tracer.startSpan(spanName); span.setAttributes({ - [OpenTelemetryAttributes.QueueName]: this.name, - [OpenTelemetryAttributes.QueueEventMaxLength]: maxLength, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueEventMaxLength]: maxLength, }); } diff --git a/src/classes/worker.ts b/src/classes/worker.ts index bda6163f7e..344dfc2ca6 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -35,6 +35,8 @@ import { RATE_LIMIT_ERROR, WaitingChildrenError, } from './errors'; +import { Tracer } from '../interfaces'; +import { TelemetryAttributes } from '../enums'; // 10 seconds is the maximum time a BRPOPLPUSH can block. const maximumBlockTimeout = 10; @@ -190,6 +192,8 @@ export class Worker< protected processFn: Processor; protected running = false; + private tracer: Tracer | undefined; + static RateLimitError(): Error { return new RateLimitError(); } @@ -314,6 +318,8 @@ export class Worker< this.blockingConnection.on('ready', () => setTimeout(() => this.emit('ready'), 0), ); + + this.tracer = opts?.telemetry.tracer; } emit>( @@ -402,11 +408,30 @@ export class Worker< } async run() { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.run`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), + }); + } + if (!this.processFn) { + if (this.tracer) { + span.end(); + } + throw new Error('No process function is defined.'); } if (this.running) { + if (this.tracer) { + span.end(); + } + throw new Error('Worker is already running.'); } @@ -507,10 +532,14 @@ export class Worker< } this.running = false; - return asyncFifoQueue.waitAll(); + return await asyncFifoQueue.waitAll(); } catch (error) { this.running = false; throw error; + } finally { + if (this.tracer) { + span.end(); + } } } @@ -520,12 +549,30 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - return this._getNextJob( + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.getNextJob`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), + }); + } + + const nextJob = await this._getNextJob( await this.client, await this.blockingConnection.client, token, { block }, ); + + if (this.tracer) { + span.end(); + } + + return nextJob; } private async _getNextJob( @@ -585,6 +632,17 @@ export class Worker< * @param expireTimeMs - expire time in ms of this rate limit. */ async rateLimit(expireTimeMs: number): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.rateLimit`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, + }); + } + await this.client.then(client => client.set( this.keys.limiter, @@ -593,6 +651,10 @@ export class Worker< expireTimeMs, ), ); + + if (this.tracer) { + span.end(); + } } get minimumBlockTimeout(): number { @@ -743,7 +805,23 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.processJob`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.JobId]: job.id, + }); + } + if (!job || this.closing || this.paused) { + if (this.tracer) { + span.end(); + } + return; } @@ -808,6 +886,10 @@ will never work with more accuracy than 1ms. */ return handleFailed(err); } finally { jobsInProgress.delete(inProgressItem); + + if (this.tracer) { + span.end(); + } } } @@ -816,6 +898,17 @@ will never work with more accuracy than 1ms. */ * Pauses the processing of this queue only for this worker. */ async pause(doNotWaitActive?: boolean): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.pause`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, + }); + } + if (!this.paused) { this.paused = new Promise(resolve => { this.resumeWorker = function () { @@ -827,6 +920,10 @@ will never work with more accuracy than 1ms. */ await (!doNotWaitActive && this.whenCurrentJobsFinished()); this.emit('paused'); } + + if (this.tracer) { + span.end(); + } } /** @@ -834,10 +931,24 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.resume`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); + } + if (this.resumeWorker) { this.resumeWorker(); this.emit('resumed'); } + + if (this.tracer) { + span.end(); + } } /** @@ -872,7 +983,22 @@ will never work with more accuracy than 1ms. */ * @returns Promise that resolves when the worker has been closed. */ close(force = false): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.close`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerForceClose]: force, + }); + } + if (this.closing) { + if (this.tracer) { + span.end(); + } + return this.closing; } this.closing = (async () => { @@ -905,6 +1031,10 @@ will never work with more accuracy than 1ms. */ this.closed = true; this.emit('closed'); + + if (this.tracer) { + span.end(); + } })(); return this.closing; } @@ -922,6 +1052,16 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.startStalledCheckTimer`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); + } + if (!this.opts.skipStalledCheck) { clearTimeout(this.stalledCheckTimer); @@ -936,6 +1076,10 @@ will never work with more accuracy than 1ms. */ } } } + + if (this.tracer) { + span.end(); + } } private startLockExtenderTimer( @@ -1017,6 +1161,17 @@ will never work with more accuracy than 1ms. */ } protected async extendLocks(jobs: Job[]) { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.extendLocks`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerJobsInvolved]: JSON.stringify(jobs), + }); + } + try { const pipeline = (await this.client).pipeline(); for (const job of jobs) { @@ -1040,10 +1195,24 @@ will never work with more accuracy than 1ms. */ } } catch (err) { this.emit('error', err); + } finally { + if (this.tracer) { + span.end(); + } } } private async moveStalledJobsToWait() { + let span; + if (this.tracer) { + const spanName = `${this.name} ${this.id} Worker.moveStalledJobsToWait`; + span = this.tracer.startSpan(spanName); + span.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); + } + const chunkSize = 50; const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); @@ -1065,6 +1234,10 @@ will never work with more accuracy than 1ms. */ } this.notifyFailedJobs(await Promise.all(jobPromises)); + + if (this.tracer) { + span.end(); + } } private notifyFailedJobs(failedJobs: Job[]) { diff --git a/src/enums/index.ts b/src/enums/index.ts index 4574120c86..3cab38de6c 100644 --- a/src/enums/index.ts +++ b/src/enums/index.ts @@ -2,4 +2,4 @@ export * from './child-command'; export * from './error-code'; export * from './parent-command'; export * from './metrics-time'; -export * from './opentelemetry-attributes'; +export * from './telemetry-attributes'; diff --git a/src/enums/opentelemetry-attributes.ts b/src/enums/telemetry-attributes.ts similarity index 62% rename from src/enums/opentelemetry-attributes.ts rename to src/enums/telemetry-attributes.ts index f3e0783594..8ef06bfb53 100644 --- a/src/enums/opentelemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -1,4 +1,4 @@ -export enum OpenTelemetryAttributes { +export enum TelemetryAttributes { QueueName = 'bullmq.queue.name', WorkerName = 'bullmq.worker.name', BulkCount = 'bullmq.job.bulk.count', @@ -15,4 +15,11 @@ export enum OpenTelemetryAttributes { JobTimestamp = 'bullmq.job.timestamp', QueueOptions = 'bullmq.queue.options', QueueEventMaxLength = 'bullmq.queue.event.max.length', + WorkerOptions = 'bullmq.worker.options', + WorkerToken = 'bullmq.worker.token', + WorkerId = 'bullmq.worker.id', + WorkerRateLimit = 'bullmq.worker.rate.limit', + WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', + WorkerForceClose = 'bullmq.worker.force.close', + WorkerJobsInvolved = 'bullmq.worker.jobs.involved', } diff --git a/src/interfaces/index.ts b/src/interfaces/index.ts index 6b06010ec3..0d8b4e96d2 100644 --- a/src/interfaces/index.ts +++ b/src/interfaces/index.ts @@ -23,4 +23,4 @@ export * from './repeat-options'; export * from './sandboxed-job-processor'; export * from './sandboxed-job'; export * from './worker-options'; -export * from './opentelemetry'; +export * from './telemetry'; diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index 20e05bf16e..d581652918 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -1,7 +1,7 @@ import { AdvancedRepeatOptions } from './advanced-options'; import { DefaultJobOptions } from './base-job-options'; import { ConnectionOptions } from './redis-options'; -import { Telemetry } from './opentelemetry'; +import { Telemetry } from './telemetry'; export enum ClientType { blocking = 'blocking', diff --git a/src/interfaces/opentelemetry.ts b/src/interfaces/telemetry.ts similarity index 100% rename from src/interfaces/opentelemetry.ts rename to src/interfaces/telemetry.ts diff --git a/src/interfaces/worker-options.ts b/src/interfaces/worker-options.ts index 77a204a23f..5f6f3133f0 100644 --- a/src/interfaces/worker-options.ts +++ b/src/interfaces/worker-options.ts @@ -4,6 +4,7 @@ import { QueueBaseOptions } from './queue-options'; import { RateLimiterOptions } from './rate-limiter-options'; import { MetricsOptions } from './metrics-options'; import { KeepJobs } from './keep-jobs'; +import { Telemetry } from './telemetry'; /** * An async function that receives `Job`s and handles them. @@ -144,6 +145,8 @@ export interface WorkerOptions extends QueueBaseOptions { * @default false */ useWorkerThreads?: boolean; + + telemetry?: Telemetry; } export interface GetNextJobOptions { From e6dfb85c8edae089b4c294fa9143a5247f95468f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C3=B3=C5=BAd=C5=BA?= Date: Mon, 26 Aug 2024 18:59:41 +0200 Subject: [PATCH 03/26] feat(queue, worker): fix telemetry setup --- src/classes/queue.ts | 2 +- src/classes/worker.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/classes/queue.ts b/src/classes/queue.ts index aa25343c63..068d007dcc 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -117,7 +117,7 @@ export class Queue< this.jobsOpts = opts?.defaultJobOptions ?? {}; - this.tracer = opts?.telemetry.tracer; + this.tracer = opts?.telemetry?.tracer; this.waitUntilReady() .then(client => { diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 344dfc2ca6..dd096bc1ca 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -319,7 +319,7 @@ export class Worker< setTimeout(() => this.emit('ready'), 0), ); - this.tracer = opts?.telemetry.tracer; + this.tracer = opts?.telemetry?.tracer; } emit>( From a75a8dd4461a7a49fcfe58aa58278d59deeec7c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20G=C3=B3=C5=BAd=C5=BA?= Date: Tue, 27 Aug 2024 02:04:08 +0200 Subject: [PATCH 04/26] feat(queue, worker): describe features --- src/classes/queue.ts | 6 ++++++ src/classes/worker.ts | 5 +++++ src/interfaces/queue-options.ts | 3 +++ src/interfaces/worker-options.ts | 3 +++ 4 files changed, 17 insertions(+) diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 068d007dcc..39964b1b80 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -99,6 +99,12 @@ export class Queue< jobsOpts: BaseJobOptions; opts: QueueOptions; private _repeat?: Repeat; + + /** + * Instance of a telemetry client + * To use it create if statement in a method to observe with start and end of a span + * It will check if tracer is provided and if not it will continue as is + */ private tracer: Tracer | undefined; constructor( diff --git a/src/classes/worker.ts b/src/classes/worker.ts index dd096bc1ca..48121bd960 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -192,6 +192,11 @@ export class Worker< protected processFn: Processor; protected running = false; + /** + * Instance of a telemetry client + * To use it create if statement in a method to observe with start and end of a span + * It will check if tracer is provided and if not it will continue as is + */ private tracer: Tracer | undefined; static RateLimitError(): Error { diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index d581652918..f1a006b059 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -57,6 +57,9 @@ export interface QueueOptions extends QueueBaseOptions { settings?: AdvancedRepeatOptions; + /** + * Telemetry client + */ telemetry?: Telemetry; } diff --git a/src/interfaces/worker-options.ts b/src/interfaces/worker-options.ts index 5f6f3133f0..39ba799a16 100644 --- a/src/interfaces/worker-options.ts +++ b/src/interfaces/worker-options.ts @@ -146,6 +146,9 @@ export interface WorkerOptions extends QueueBaseOptions { */ useWorkerThreads?: boolean; + /** + * Telemetry client + */ telemetry?: Telemetry; } From 66b85c56115c2f12d3c1c8b4f10da609e8a2fc87 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Thu, 29 Aug 2024 09:44:06 +0200 Subject: [PATCH 05/26] feat(queue, worker): add exception handler --- src/classes/queue.ts | 31 ++++++++++++- src/classes/worker.ts | 72 +++++++++++++++++++++++++------ src/enums/telemetry-attributes.ts | 1 + src/interfaces/telemetry.ts | 42 ++++++++++++++++++ 4 files changed, 130 insertions(+), 16 deletions(-) diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 39964b1b80..5c267668c8 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -7,6 +7,7 @@ import { RepeatableJob, RepeatOptions, Tracer, + StatusCode, } from '../interfaces'; import { FinishedStatus, JobsOptions, MinimalQueue } from '../types'; import { Job } from './job'; @@ -243,7 +244,20 @@ export class Queue< if (opts && opts.repeat) { if (opts.repeat.endDate) { if (+new Date(opts.repeat.endDate) < Date.now()) { - throw new Error('End date must be greater than current timestamp'); + const error = 'End date must be greater than current timestamp'; + + try { + if (this.tracer) { + span.recordException(error); + span.setStatus({ code: StatusCode.ERROR, message: error }); + } + + throw new Error(error); + } finally { + if (this.tracer) { + span.end(); + } + } } } @@ -262,7 +276,20 @@ export class Queue< const jobId = opts?.jobId; if (jobId == '0' || jobId?.startsWith('0:')) { - throw new Error("JobId cannot be '0' or start with 0:"); + const error = "JobId cannot be '0' or start with 0:"; + + try { + if (this.tracer) { + span.recordException(error); + span.setStatus({ code: StatusCode.ERROR, message: error }); + } + + throw new Error(error); + } finally { + if (this.tracer) { + span.end(); + } + } } const job = await this.Job.create( diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 48121bd960..e8cd7e49db 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -13,6 +13,7 @@ import { JobJsonRaw, Processor, RedisClient, + StatusCode, WorkerOptions, } from '../interfaces'; import { MinimalQueue } from '../types'; @@ -425,25 +426,47 @@ export class Worker< } if (!this.processFn) { - if (this.tracer) { - span.end(); - } + const error = 'No process function is defined.'; + + try { + if (this.tracer) { + span.recordException(error); + span.setStatus({ code: StatusCode.ERROR, message: error }); + } - throw new Error('No process function is defined.'); + throw new Error(error); + } finally { + if (this.tracer) { + span.end(); + } + } } if (this.running) { - if (this.tracer) { - span.end(); - } + const error = 'Worker is already running.'; + + try { + if (this.tracer) { + span.recordException(error); + span.setStatus({ code: StatusCode.ERROR, message: error }); + } - throw new Error('Worker is already running.'); + throw new Error(error); + } finally { + if (this.tracer) { + span.end(); + } + } } try { this.running = true; if (this.closing) { + if (this.tracer) { + span.end(); + } + return; } @@ -539,6 +562,14 @@ export class Worker< this.running = false; return await asyncFifoQueue.waitAll(); } catch (error) { + if (this.tracer) { + span.recordException(JSON.stringify(error)); + span.setStatus({ + code: StatusCode.ERROR, + message: JSON.stringify(error), + }); + } + this.running = false; throw error; } finally { @@ -574,6 +605,10 @@ export class Worker< ); if (this.tracer) { + span.setAttributes({ + [TelemetryAttributes.JobId]: nextJob.id, + }); + span.end(); } @@ -987,7 +1022,7 @@ will never work with more accuracy than 1ms. */ * * @returns Promise that resolves when the worker has been closed. */ - close(force = false): Promise { + async close(force = false): Promise { let span; if (this.tracer) { const spanName = `${this.name} ${this.id} Worker.close`; @@ -1036,12 +1071,15 @@ will never work with more accuracy than 1ms. */ this.closed = true; this.emit('closed'); - - if (this.tracer) { - span.end(); - } })(); - return this.closing; + + await this.closing; + + if (this.tracer) { + span.end(); + } + + return; } /** @@ -1170,10 +1208,12 @@ will never work with more accuracy than 1ms. */ if (this.tracer) { const spanName = `${this.name} ${this.id} Worker.extendLocks`; span = this.tracer.startSpan(spanName); + const jobids = jobs.map(job => job.id); span.setAttributes({ [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerJobsInvolved]: JSON.stringify(jobs), + [TelemetryAttributes.WorkerJobsIdsInvolved]: jobids, }); } @@ -1241,6 +1281,10 @@ will never work with more accuracy than 1ms. */ this.notifyFailedJobs(await Promise.all(jobPromises)); if (this.tracer) { + span.setAttributes({ + [TelemetryAttributes.WorkerJobsIdsInvolved]: stalled, + }); + span.end(); } } diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index 8ef06bfb53..be2dcd9eea 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -22,4 +22,5 @@ export enum TelemetryAttributes { WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', WorkerForceClose = 'bullmq.worker.force.close', WorkerJobsInvolved = 'bullmq.worker.jobs.involved', + WorkerJobsIdsInvolved = 'bullmq.worker.jobs.ids.involved', } diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 1947274d3c..418ed57505 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -9,6 +9,8 @@ export interface Tracer { export interface Span { setAttribute(key: string, value: Attribute): Span; setAttributes(attributes: Attributes): Span; + recordException(exception: Exception, time?: Time): void; + setStatus(code: SpanStatus): Span; end(): void; } @@ -23,3 +25,43 @@ export type Attribute = | null | undefined | (null | undefined | string | number | boolean)[]; + +export type Exception = string | ExceptionType; + +export type ExceptionType = CodeException | MessageException | NameException; + +interface CodeException { + code: string | number; + name?: string; + message?: string; + stack?: string; +} + +interface MessageException { + code?: string | number; + name?: string; + message: string; + stack?: string; +} + +interface NameException { + code?: string | number; + name: string; + message?: string; + stack?: string; +} + +export type Time = HighResolutionTime | number | Date; + +type HighResolutionTime = [number, number]; + +interface SpanStatus { + code: StatusCode; + message?: string; +} + +export enum StatusCode { + UNSET = 0, + OK = 1, + ERROR = 2, +} From 54a2c18c64f9a899b82efded517598c6b62f2046 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Thu, 29 Aug 2024 17:08:49 +0200 Subject: [PATCH 06/26] feat(queue, worker): remove redundancy in code by creating helper method in queue base --- src/classes/queue-base.ts | 31 +- src/classes/queue.ts | 628 ++++++++++------------- src/classes/worker.ts | 853 ++++++++++++++------------------ src/interfaces/queue-options.ts | 5 + src/interfaces/telemetry.ts | 12 - 5 files changed, 671 insertions(+), 858 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index a22cf424ac..eaeddf4208 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,5 +1,5 @@ import { EventEmitter } from 'events'; -import { QueueBaseOptions, RedisClient } from '../interfaces'; +import { QueueBaseOptions, RedisClient, Span, Tracer } from '../interfaces'; import { MinimalQueue } from '../types'; import { delay, @@ -30,6 +30,13 @@ export class QueueBase extends EventEmitter implements MinimalQueue { protected connection: RedisConnection; public readonly qualifiedName: string; + /** + * Instance of a telemetry client + * To use it create if statement in a method to observe with start and end of a span + * It will check if tracer is provided and if not it will continue as is + */ + private tracer: Tracer | undefined; + /** * * @param name - The name of the queue. @@ -72,6 +79,8 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.keys = queueKeys.getKeys(name); this.toKey = (type: string) => queueKeys.toKey(name, type); this.setScripts(); + + this.tracer = opts?.telemetry?.tracer; } /** @@ -171,4 +180,24 @@ export class QueueBase extends EventEmitter implements MinimalQueue { } } } + + protected trace( + getSpanName: () => string, + callback: (span?: Span) => Promise | T, + ) { + if (!this.tracer) { + return callback(); + } + + const span = this.tracer.startSpan(getSpanName()); + + try { + return callback(span); + } catch (err) { + span.recordException(err as Error); + throw err; + } finally { + span.end(); + } + } } diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 5c267668c8..d28a65ce80 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -6,8 +6,6 @@ import { QueueOptions, RepeatableJob, RepeatOptions, - Tracer, - StatusCode, } from '../interfaces'; import { FinishedStatus, JobsOptions, MinimalQueue } from '../types'; import { Job } from './job'; @@ -101,13 +99,6 @@ export class Queue< opts: QueueOptions; private _repeat?: Repeat; - /** - * Instance of a telemetry client - * To use it create if statement in a method to observe with start and end of a span - * It will check if tracer is provided and if not it will continue as is - */ - private tracer: Tracer | undefined; - constructor( name: string, opts?: QueueOptions, @@ -124,8 +115,6 @@ export class Queue< this.jobsOpts = opts?.defaultJobOptions ?? {}; - this.tracer = opts?.telemetry?.tracer; - this.waitUntilReady() .then(client => { if (!this.closing) { @@ -232,88 +221,56 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { - let span; - if (this.tracer) { - const spanName = `${this.name}.${name} Queue.add`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } - - if (opts && opts.repeat) { - if (opts.repeat.endDate) { - if (+new Date(opts.repeat.endDate) < Date.now()) { - const error = 'End date must be greater than current timestamp'; - - try { - if (this.tracer) { - span.recordException(error); - span.setStatus({ code: StatusCode.ERROR, message: error }); - } + return this.trace( + () => `${this.name}.${name} Queue.add`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - throw new Error(error); - } finally { - if (this.tracer) { - span.end(); + if (opts && opts.repeat) { + if (opts.repeat.endDate) { + if (+new Date(opts.repeat.endDate) < Date.now()) { + throw new Error( + 'End date must be greater than current timestamp', + ); } } - } - } - - const repeatableJob = (await this.repeat).addNextRepeatableJob< - DataType, - ResultType, - NameType - >(name, data, { ...this.jobsOpts, ...opts }, true); - - if (this.tracer) { - span.end(); - } - return repeatableJob; - } else { - const jobId = opts?.jobId; + const repeatableJob = (await this.repeat).addNextRepeatableJob< + DataType, + ResultType, + NameType + >(name, data, { ...this.jobsOpts, ...opts }, true); - if (jobId == '0' || jobId?.startsWith('0:')) { - const error = "JobId cannot be '0' or start with 0:"; + return repeatableJob; + } else { + const jobId = opts?.jobId; - try { - if (this.tracer) { - span.recordException(error); - span.setStatus({ code: StatusCode.ERROR, message: error }); + if (jobId == '0' || jobId?.startsWith('0:')) { + throw new Error("JobId cannot be '0' or start with 0:"); } - throw new Error(error); - } finally { - if (this.tracer) { - span.end(); - } + const job = await this.Job.create( + this as MinimalQueue, + name, + data, + { + ...this.jobsOpts, + ...opts, + jobId, + }, + ); + this.emit('waiting', job); + + span?.setAttributes({ + [TelemetryAttributes.JobId]: job.id, + }); + + return job; } - } - - const job = await this.Job.create( - this as MinimalQueue, - name, - data, - { - ...this.jobsOpts, - ...opts, - jobId, - }, - ); - this.emit('waiting', job); - - if (this.tracer) { - span.setAttributes({ - [TelemetryAttributes.JobId]: job.id, - }); - - span.end(); - } - - return job; - } + }, + ); } /** @@ -326,36 +283,29 @@ export class Queue< async addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - let span; - if (this.tracer) { - const jobsInBulk = jobs.map(job => job.name); - const spanName = `${this.name} Queue.addBulk`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.BulkNames]: jobsInBulk, - [TelemetryAttributes.BulkCount]: jobsInBulk.length, - }); - } + return this.trace( + () => `${this.name} Queue.addBulk`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), + [TelemetryAttributes.BulkCount]: jobs.length, + }); - const bulk = await this.Job.createBulk( - this as MinimalQueue, - jobs.map(job => ({ - name: job.name, - data: job.data, - opts: { - ...this.jobsOpts, - ...job.opts, - jobId: job.opts?.jobId, - }, - })), + return await this.Job.createBulk( + this as MinimalQueue, + jobs.map(job => ({ + name: job.name, + data: job.data, + opts: { + ...this.jobsOpts, + ...job.opts, + jobId: job.opts?.jobId, + }, + })), + ); + }, ); - - if (this.tracer) { - span.end(); - } - - return bulk; } /** @@ -370,22 +320,18 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.pause`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } - - await this.scripts.pause(true); + this.trace( + () => `${this.name} Queue.pause`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - if (this.tracer) { - span.end(); - } + await this.scripts.pause(true); - this.emit('paused'); + this.emit('paused'); + }, + ); } /** @@ -393,26 +339,22 @@ export class Queue< * */ async close(): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.close`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } - - if (!this.closing) { - if (this._repeat) { - await this._repeat.close(); - } - } + this.trace( + () => `${this.name} Queue.close`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - await super.close(); + if (!this.closing) { + if (this._repeat) { + await this._repeat.close(); + } + } - if (this.tracer) { - span.end(); - } + await super.close(); + }, + ); } /** * Resumes the processing of this queue globally. @@ -421,22 +363,18 @@ export class Queue< * queue. */ async resume(): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.resume`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } - - await this.scripts.pause(false); + this.trace( + () => `${this.name} Queue.resume`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - if (this.tracer) { - span.end(); - } + await this.scripts.pause(false); - this.emit('resumed'); + this.emit('resumed'); + }, + ); } /** @@ -489,23 +427,19 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${name} Queue.removeRepeatable`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } + return this.trace( + () => `${this.name} ${name} Queue.removeRepeatable`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - const repeat = await this.repeat; - const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); + const repeat = await this.repeat; + const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); - if (this.tracer) { - span.end(); - } - - return !removed; + return !removed; + }, + ); } /** @@ -514,24 +448,18 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${id} Queue.removeDebounceKey`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } - - const client = await this.client; + return this.trace( + () => `${this.name} ${id} Queue.removeDebounceKey`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - const debounced = await client.del(`${this.keys.de}:${id}`); + const client = await this.client; - if (this.tracer) { - span.end(); - } - - return debounced; + return await client.del(`${this.keys.de}:${id}`); + }, + ); } /** @@ -545,24 +473,20 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${key} Queue.removeRepeatableByKey`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.JobKey]: key, - }); - } + return this.trace( + () => `${this.name} ${key} Queue.removeRepeatableByKey`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobKey]: key, + }); - const repeat = await this.repeat; - const removed = await repeat.removeRepeatableByKey(key); + const repeat = await this.repeat; + const removed = await repeat.removeRepeatableByKey(key); - if (this.tracer) { - span.end(); - } - - return !removed; + return !removed; + }, + ); } /** @@ -575,26 +499,20 @@ export class Queue< * any of its dependencies were locked. */ async remove(jobId: string, { removeChildren = true } = {}): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${jobId} Queue.remove`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.JobId]: jobId, - [TelemetryAttributes.JobOptions]: JSON.stringify({ - removeChildren, - }), - }); - } - - const status = await this.scripts.remove(jobId, removeChildren); - - if (this.tracer) { - span.end(); - } + return this.trace( + () => `${this.name} ${jobId} Queue.remove`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobOptions]: JSON.stringify({ + removeChildren, + }), + }); - return status; + return await this.scripts.remove(jobId, removeChildren); + }, + ); } /** @@ -607,22 +525,18 @@ export class Queue< jobId: string, progress: number | object, ): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.updateJobProgress`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.JobId]: jobId, - [TelemetryAttributes.JobProgress]: JSON.stringify(progress), - }); - } - - await this.scripts.updateProgress(jobId, progress); + this.trace( + () => `${this.name} Queue.updateJobProgress`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobProgress]: JSON.stringify(progress), + }); - if (this.tracer) { - span.end(); - } + await this.scripts.updateProgress(jobId, progress); + }, + ); } /** @@ -650,21 +564,17 @@ export class Queue< * delayed jobs. */ async drain(delayed = false): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.drain`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueDrainDelay]: delayed, - }); - } - - await this.scripts.drain(delayed); + this.trace( + () => `${this.name} Queue.drain`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueDrainDelay]: delayed, + }); - if (this.tracer) { - span.end(); - } + await this.scripts.drain(delayed); + }, + ); } /** @@ -689,55 +599,49 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.clean`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueGrace]: grace, - [TelemetryAttributes.JobType]: type, - }); - } - - const maxCount = limit || Infinity; - const maxCountPerCall = Math.min(10000, maxCount); - const timestamp = Date.now() - grace; - let deletedCount = 0; - const deletedJobsIds: string[] = []; + return this.trace( + () => `${this.name} Queue.clean`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueGrace]: grace, + [TelemetryAttributes.JobType]: type, + }); - if (this.tracer) { - span.setAttributes({ - [TelemetryAttributes.QueueCleanLimit]: maxCount, - [TelemetryAttributes.JobTimestamp]: timestamp, - }); - } + const maxCount = limit || Infinity; + const maxCountPerCall = Math.min(10000, maxCount); + const timestamp = Date.now() - grace; + let deletedCount = 0; + const deletedJobsIds: string[] = []; - while (deletedCount < maxCount) { - const jobsIds = await this.scripts.cleanJobsInSet( - type, - timestamp, - maxCountPerCall, - ); + span?.setAttributes({ + [TelemetryAttributes.QueueCleanLimit]: maxCount, + [TelemetryAttributes.JobTimestamp]: timestamp, + }); - this.emit('cleaned', jobsIds, type); - deletedCount += jobsIds.length; - deletedJobsIds.push(...jobsIds); + while (deletedCount < maxCount) { + const jobsIds = await this.scripts.cleanJobsInSet( + type, + timestamp, + maxCountPerCall, + ); - if (jobsIds.length < maxCountPerCall) { - break; - } - } + this.emit('cleaned', jobsIds, type); + deletedCount += jobsIds.length; + deletedJobsIds.push(...jobsIds); - if (this.tracer) { - span.setAttributes({ - [TelemetryAttributes.JobId]: deletedJobsIds, - }); + if (jobsIds.length < maxCountPerCall) { + break; + } + } - span.end(); - } + span?.setAttributes({ + [TelemetryAttributes.JobId]: deletedJobsIds, + }); - return deletedJobsIds; + return deletedJobsIds; + }, + ); } /** @@ -752,29 +656,25 @@ export class Queue< * @param opts - Obliterate options. */ async obliterate(opts?: ObliterateOpts): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.obliterate`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - } + this.trace( + () => `${this.name} Queue.obliterate`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - await this.pause(); + await this.pause(); - let cursor = 0; - do { - cursor = await this.scripts.obliterate({ - force: false, - count: 1000, - ...opts, - }); - } while (cursor); - - if (this.tracer) { - span.end(); - } + let cursor = 0; + do { + cursor = await this.scripts.obliterate({ + force: false, + count: 1000, + ...opts, + }); + } while (cursor); + }, + ); } /** @@ -790,28 +690,24 @@ export class Queue< async retryJobs( opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.retryJobs`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), - }); - } + this.trace( + () => `${this.name} Queue.retryJobs`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); - let cursor = 0; - do { - cursor = await this.scripts.retryJobs( - opts.state, - opts.count, - opts.timestamp, - ); - } while (cursor); - - if (this.tracer) { - span.end(); - } + let cursor = 0; + do { + cursor = await this.scripts.retryJobs( + opts.state, + opts.count, + opts.timestamp, + ); + } while (cursor); + }, + ); } /** @@ -823,24 +719,20 @@ export class Queue< * @returns */ async promoteJobs(opts: { count?: number } = {}): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.promoteJobs`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), - }); - } - - let cursor = 0; - do { - cursor = await this.scripts.promoteJobs(opts.count); - } while (cursor); + this.trace( + () => `${this.name} Queue.promoteJobs`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); - if (this.tracer) { - span.end(); - } + let cursor = 0; + do { + cursor = await this.scripts.promoteJobs(opts.count); + } while (cursor); + }, + ); } /** @@ -849,24 +741,18 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} Queue.trimEvents`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueEventMaxLength]: maxLength, - }); - } - - const client = await this.client; - const trim = await client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); - - if (this.tracer) { - span.end(); - } + return this.trace( + () => `${this.name} Queue.trimEvents`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueEventMaxLength]: maxLength, + }); - return trim; + const client = await this.client; + return await client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); + }, + ); } /** diff --git a/src/classes/worker.ts b/src/classes/worker.ts index e8cd7e49db..46afb57f1d 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -13,7 +13,6 @@ import { JobJsonRaw, Processor, RedisClient, - StatusCode, WorkerOptions, } from '../interfaces'; import { MinimalQueue } from '../types'; @@ -193,13 +192,6 @@ export class Worker< protected processFn: Processor; protected running = false; - /** - * Instance of a telemetry client - * To use it create if statement in a method to observe with start and end of a span - * It will check if tracer is provided and if not it will continue as is - */ - private tracer: Tracer | undefined; - static RateLimitError(): Error { return new RateLimitError(); } @@ -324,8 +316,6 @@ export class Worker< this.blockingConnection.on('ready', () => setTimeout(() => this.emit('ready'), 0), ); - - this.tracer = opts?.telemetry?.tracer; } emit>( @@ -414,169 +404,127 @@ export class Worker< } async run() { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.run`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), - }); - } - - if (!this.processFn) { - const error = 'No process function is defined.'; - - try { - if (this.tracer) { - span.recordException(error); - span.setStatus({ code: StatusCode.ERROR, message: error }); - } - - throw new Error(error); - } finally { - if (this.tracer) { - span.end(); - } - } - } - - if (this.running) { - const error = 'Worker is already running.'; + this.trace( + () => `${this.name} ${this.id} Worker.run`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), + }); - try { - if (this.tracer) { - span.recordException(error); - span.setStatus({ code: StatusCode.ERROR, message: error }); + if (!this.processFn) { + throw new Error('No process function is defined.'); } - throw new Error(error); - } finally { - if (this.tracer) { - span.end(); + if (this.running) { + throw new Error('Worker is already running.'); } - } - } - try { - this.running = true; - - if (this.closing) { - if (this.tracer) { - span.end(); - } - - return; - } - - await this.startStalledCheckTimer(); - - const jobsInProgress = new Set<{ job: Job; ts: number }>(); - this.startLockExtenderTimer(jobsInProgress); - - const asyncFifoQueue = (this.asyncFifoQueue = - new AsyncFifoQueue>()); - - let tokenPostfix = 0; - - const client = await this.client; - const bclient = await this.blockingConnection.client; - - /** - * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue - * as efficiently as possible, providing concurrency and minimal unnecessary calls - * to Redis. - */ - while (!this.closing) { - let numTotal = asyncFifoQueue.numTotal(); - - /** - * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job - * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) - */ - while ( - !this.waiting && - numTotal < this.opts.concurrency && - (!this.limitUntil || numTotal == 0) - ) { - const token = `${this.id}:${tokenPostfix++}`; - - const fetchedJob = this.retryIfFailed>( - () => this._getNextJob(client, bclient, token, { block: true }), - this.opts.runRetryDelay, - ); - asyncFifoQueue.add(fetchedJob); - - numTotal = asyncFifoQueue.numTotal(); + try { + this.running = true; - if (this.waiting && numTotal > 1) { - // We are waiting for jobs but we have others that we could start processing already - break; + if (this.closing) { + return; } - // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls - // to Redis in high concurrency scenarios. - const job = await fetchedJob; + await this.startStalledCheckTimer(); - // No more jobs waiting but we have others that could start processing already - if (!job && numTotal > 1) { - break; - } + const jobsInProgress = new Set<{ job: Job; ts: number }>(); + this.startLockExtenderTimer(jobsInProgress); - // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting - // for processing this job. - if (this.blockUntil) { - break; - } - } + const asyncFifoQueue = (this.asyncFifoQueue = + new AsyncFifoQueue>()); + + let tokenPostfix = 0; + + const client = await this.client; + const bclient = await this.blockingConnection.client; + + /** + * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue + * as efficiently as possible, providing concurrency and minimal unnecessary calls + * to Redis. + */ + while (!this.closing) { + let numTotal = asyncFifoQueue.numTotal(); + + /** + * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job + * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) + */ + while ( + !this.waiting && + numTotal < this.opts.concurrency && + (!this.limitUntil || numTotal == 0) + ) { + const token = `${this.id}:${tokenPostfix++}`; + + const fetchedJob = this.retryIfFailed>( + () => this._getNextJob(client, bclient, token, { block: true }), + this.opts.runRetryDelay, + ); + asyncFifoQueue.add(fetchedJob); + + numTotal = asyncFifoQueue.numTotal(); + + if (this.waiting && numTotal > 1) { + // We are waiting for jobs but we have others that we could start processing already + break; + } + + // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls + // to Redis in high concurrency scenarios. + const job = await fetchedJob; + + // No more jobs waiting but we have others that could start processing already + if (!job && numTotal > 1) { + break; + } + + // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting + // for processing this job. + if (this.blockUntil) { + break; + } + } - // Since there can be undefined jobs in the queue (when a job fails or queue is empty) - // we iterate until we find a job. - let job: Job | void; - do { - job = await asyncFifoQueue.fetch(); - } while (!job && asyncFifoQueue.numQueued() > 0); - - if (job) { - const token = job.token; - asyncFifoQueue.add( - this.retryIfFailed>( - () => - this.processJob( - >job, - token, - () => asyncFifoQueue.numTotal() <= this.opts.concurrency, - jobsInProgress, + // Since there can be undefined jobs in the queue (when a job fails or queue is empty) + // we iterate until we find a job. + let job: Job | void; + do { + job = await asyncFifoQueue.fetch(); + } while (!job && asyncFifoQueue.numQueued() > 0); + + if (job) { + const token = job.token; + asyncFifoQueue.add( + this.retryIfFailed>( + () => + this.processJob( + >job, + token, + () => asyncFifoQueue.numTotal() <= this.opts.concurrency, + jobsInProgress, + ), + this.opts.runRetryDelay, ), - this.opts.runRetryDelay, - ), - ); - } - } - - this.running = false; - return await asyncFifoQueue.waitAll(); - } catch (error) { - if (this.tracer) { - span.recordException(JSON.stringify(error)); - span.setStatus({ - code: StatusCode.ERROR, - message: JSON.stringify(error), - }); - } + ); + } + } - this.running = false; - throw error; - } finally { - if (this.tracer) { - span.end(); - } - } + this.running = false; + return await asyncFifoQueue.waitAll(); + } catch (error) { + this.running = false; + throw error; + } + }, + ); } /** @@ -585,34 +533,30 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.getNextJob`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, - [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), - }); - } - - const nextJob = await this._getNextJob( - await this.client, - await this.blockingConnection.client, - token, - { block }, - ); + return this.trace( + () => `${this.name} ${this.id} Worker.getNextJob`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), + }); - if (this.tracer) { - span.setAttributes({ - [TelemetryAttributes.JobId]: nextJob.id, - }); + const nextJob = await this._getNextJob( + await this.client, + await this.blockingConnection.client, + token, + { block }, + ); - span.end(); - } + span?.setAttributes({ + [TelemetryAttributes.JobId]: nextJob.id, + }); - return nextJob; + return nextJob; + }, + ); } private async _getNextJob( @@ -672,29 +616,25 @@ export class Worker< * @param expireTimeMs - expire time in ms of this rate limit. */ async rateLimit(expireTimeMs: number): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.rateLimit`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, - }); - } + this.trace( + () => `${this.name} ${this.id} Worker.rateLimit`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, + }); - await this.client.then(client => - client.set( - this.keys.limiter, - Number.MAX_SAFE_INTEGER, - 'PX', - expireTimeMs, - ), + await this.client.then(client => + client.set( + this.keys.limiter, + Number.MAX_SAFE_INTEGER, + 'PX', + expireTimeMs, + ), + ); + }, ); - - if (this.tracer) { - span.end(); - } } get minimumBlockTimeout(): number { @@ -845,92 +785,84 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.processJob`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, - [TelemetryAttributes.JobId]: job.id, - }); - } - - if (!job || this.closing || this.paused) { - if (this.tracer) { - span.end(); - } - - return; - } + return this.trace( + () => `${this.name} ${this.id} Worker.processJob`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.JobId]: job.id, + }); - const handleCompleted = async (result: ResultType) => { - if (!this.connection.closing) { - const completed = await job.moveToCompleted( - result, - token, - fetchNextCallback() && !(this.closing || this.paused), - ); - this.emit('completed', job, result, 'active'); - const [jobData, jobId, limitUntil, delayUntil] = completed || []; - this.updateDelays(limitUntil, delayUntil); + if (!job || this.closing || this.paused) { + return; + } - return this.nextJobFromJobData(jobData, jobId, token); - } - }; + const handleCompleted = async (result: ResultType) => { + if (!this.connection.closing) { + const completed = await job.moveToCompleted( + result, + token, + fetchNextCallback() && !(this.closing || this.paused), + ); + this.emit('completed', job, result, 'active'); + const [jobData, jobId, limitUntil, delayUntil] = completed || []; + this.updateDelays(limitUntil, delayUntil); - const handleFailed = async (err: Error) => { - if (!this.connection.closing) { - try { - if (err.message == RATE_LIMIT_ERROR) { - this.limitUntil = await this.moveLimitedBackToWait(job, token); - return; + return this.nextJobFromJobData(jobData, jobId, token); } + }; - if ( - err instanceof DelayedError || - err.name == 'DelayedError' || - err instanceof WaitingChildrenError || - err.name == 'WaitingChildrenError' - ) { - return; + const handleFailed = async (err: Error) => { + if (!this.connection.closing) { + try { + if (err.message == RATE_LIMIT_ERROR) { + this.limitUntil = await this.moveLimitedBackToWait(job, token); + return; + } + + if ( + err instanceof DelayedError || + err.name == 'DelayedError' || + err instanceof WaitingChildrenError || + err.name == 'WaitingChildrenError' + ) { + return; + } + + const result = await job.moveToFailed(err, token, true); + this.emit('failed', job, err, 'active'); + + if (result) { + const [jobData, jobId, limitUntil, delayUntil] = result; + this.updateDelays(limitUntil, delayUntil); + return this.nextJobFromJobData(jobData, jobId, token); + } + } catch (err) { + this.emit('error', err); + // It probably means that the job has lost the lock before completion + // A worker will (or already has) moved the job back + // to the waiting list (as stalled) + } } + }; - const result = await job.moveToFailed(err, token, true); - this.emit('failed', job, err, 'active'); + this.emit('active', job, 'waiting'); - if (result) { - const [jobData, jobId, limitUntil, delayUntil] = result; - this.updateDelays(limitUntil, delayUntil); - return this.nextJobFromJobData(jobData, jobId, token); - } + const inProgressItem = { job, ts: Date.now() }; + + try { + jobsInProgress.add(inProgressItem); + const result = await this.callProcessJob(job, token); + return await handleCompleted(result); } catch (err) { - this.emit('error', err); - // It probably means that the job has lost the lock before completion - // A worker will (or already has) moved the job back - // to the waiting list (as stalled) + return handleFailed(err); + } finally { + jobsInProgress.delete(inProgressItem); } - } - }; - - this.emit('active', job, 'waiting'); - - const inProgressItem = { job, ts: Date.now() }; - - try { - jobsInProgress.add(inProgressItem); - const result = await this.callProcessJob(job, token); - return await handleCompleted(result); - } catch (err) { - return handleFailed(err); - } finally { - jobsInProgress.delete(inProgressItem); - - if (this.tracer) { - span.end(); - } - } + }, + ); } /** @@ -938,32 +870,28 @@ will never work with more accuracy than 1ms. */ * Pauses the processing of this queue only for this worker. */ async pause(doNotWaitActive?: boolean): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.pause`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, - }); - } - - if (!this.paused) { - this.paused = new Promise(resolve => { - this.resumeWorker = function () { - resolve(); - this.paused = null; // Allow pause to be checked externally for paused state. - this.resumeWorker = null; - }; - }); - await (!doNotWaitActive && this.whenCurrentJobsFinished()); - this.emit('paused'); - } + this.trace( + () => `${this.name} ${this.id} Worker.pause`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, + }); - if (this.tracer) { - span.end(); - } + if (!this.paused) { + this.paused = new Promise(resolve => { + this.resumeWorker = function () { + resolve(); + this.paused = null; // Allow pause to be checked externally for paused state. + this.resumeWorker = null; + }; + }); + await (!doNotWaitActive && this.whenCurrentJobsFinished()); + this.emit('paused'); + } + }, + ); } /** @@ -971,24 +899,20 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.resume`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - }); - } - - if (this.resumeWorker) { - this.resumeWorker(); - this.emit('resumed'); - } + this.trace( + () => `${this.name} ${this.id} Worker.resume`, + span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); - if (this.tracer) { - span.end(); - } + if (this.resumeWorker) { + this.resumeWorker(); + this.emit('resumed'); + } + }, + ); } /** @@ -1023,63 +947,53 @@ will never work with more accuracy than 1ms. */ * @returns Promise that resolves when the worker has been closed. */ async close(force = false): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.close`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerForceClose]: force, - }); - } - - if (this.closing) { - if (this.tracer) { - span.end(); - } + this.trace( + () => `${this.name} ${this.id} Worker.close`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerForceClose]: force, + }); - return this.closing; - } - this.closing = (async () => { - this.emit('closing', 'closing queue'); - this.abortDelayController?.abort(); - - this.resume(); - - // Define the async cleanup functions - const asyncCleanups = [ - () => { - return force || this.whenCurrentJobsFinished(false); - }, - () => this.childPool?.clean(), - () => this.blockingConnection.close(force), - () => this.connection.close(force), - ]; - - // Run cleanup functions sequentially and make sure all are run despite any errors - for (const cleanup of asyncCleanups) { - try { - await cleanup(); - } catch (err) { - this.emit('error', err); + if (this.closing) { + return this.closing; } - } - - clearTimeout(this.extendLocksTimer); - clearTimeout(this.stalledCheckTimer); - - this.closed = true; - this.emit('closed'); - })(); + this.closing = (async () => { + this.emit('closing', 'closing queue'); + this.abortDelayController?.abort(); + + this.resume(); + + // Define the async cleanup functions + const asyncCleanups = [ + () => { + return force || this.whenCurrentJobsFinished(false); + }, + () => this.childPool?.clean(), + () => this.blockingConnection.close(force), + () => this.connection.close(force), + ]; + + // Run cleanup functions sequentially and make sure all are run despite any errors + for (const cleanup of asyncCleanups) { + try { + await cleanup(); + } catch (err) { + this.emit('error', err); + } + } - await this.closing; + clearTimeout(this.extendLocksTimer); + clearTimeout(this.stalledCheckTimer); - if (this.tracer) { - span.end(); - } + this.closed = true; + this.emit('closed'); + })(); - return; + return await this.closing; + }, + ); } /** @@ -1095,34 +1009,32 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.startStalledCheckTimer`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - }); - } - - if (!this.opts.skipStalledCheck) { - clearTimeout(this.stalledCheckTimer); + this.trace( + () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); - if (!this.closing) { - try { - await this.checkConnectionError(() => this.moveStalledJobsToWait()); - this.stalledCheckTimer = setTimeout(async () => { - await this.startStalledCheckTimer(); - }, this.opts.stalledInterval); - } catch (err) { - this.emit('error', err); + if (!this.opts.skipStalledCheck) { + clearTimeout(this.stalledCheckTimer); + + if (!this.closing) { + try { + await this.checkConnectionError(() => + this.moveStalledJobsToWait(), + ); + this.stalledCheckTimer = setTimeout(async () => { + await this.startStalledCheckTimer(); + }, this.opts.stalledInterval); + } catch (err) { + this.emit('error', err); + } + } } - } - } - - if (this.tracer) { - span.end(); - } + }, + ); } private startLockExtenderTimer( @@ -1204,89 +1116,82 @@ will never work with more accuracy than 1ms. */ } protected async extendLocks(jobs: Job[]) { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.extendLocks`; - span = this.tracer.startSpan(spanName); - const jobids = jobs.map(job => job.id); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerJobsInvolved]: JSON.stringify(jobs), - [TelemetryAttributes.WorkerJobsIdsInvolved]: jobids, - }); - } + this.trace( + () => `${this.name} ${this.id} Worker.extendLocks`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerJobsInvolved]: JSON.stringify(jobs), + [TelemetryAttributes.WorkerJobsIdsInvolved]: jobs.map(job => job.id), + }); - try { - const pipeline = (await this.client).pipeline(); - for (const job of jobs) { - await this.scripts.extendLock( - job.id, - job.token, - this.opts.lockDuration, - pipeline, - ); - } - const result = (await pipeline.exec()) as [Error, string][]; - - for (const [err, jobId] of result) { - if (err) { - // TODO: signal process function that the job has been lost. - this.emit( - 'error', - new Error(`could not renew lock for job ${jobId}`), - ); + try { + const pipeline = (await this.client).pipeline(); + for (const job of jobs) { + await this.scripts.extendLock( + job.id, + job.token, + this.opts.lockDuration, + pipeline, + ); + } + const result = (await pipeline.exec()) as [Error, string][]; + + for (const [err, jobId] of result) { + if (err) { + // TODO: signal process function that the job has been lost. + this.emit( + 'error', + new Error(`could not renew lock for job ${jobId}`), + ); + } + } + } catch (err) { + this.emit('error', err); } - } - } catch (err) { - this.emit('error', err); - } finally { - if (this.tracer) { - span.end(); - } - } + }, + ); } private async moveStalledJobsToWait() { - let span; - if (this.tracer) { - const spanName = `${this.name} ${this.id} Worker.moveStalledJobsToWait`; - span = this.tracer.startSpan(spanName); - span.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - }); - } - - const chunkSize = 50; - const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); + this.trace( + () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerName]: this.name, + [TelemetryAttributes.WorkerId]: this.id, + }); - stalled.forEach((jobId: string) => this.emit('stalled', jobId, 'active')); + const chunkSize = 50; + const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); - const jobPromises: Promise>[] = []; - for (let i = 0; i < failed.length; i++) { - jobPromises.push( - Job.fromId( - this as MinimalQueue, - failed[i], - ), - ); + stalled.forEach((jobId: string) => + this.emit('stalled', jobId, 'active'), + ); - if ((i + 1) % chunkSize === 0) { - this.notifyFailedJobs(await Promise.all(jobPromises)); - jobPromises.length = 0; - } - } + const jobPromises: Promise>[] = []; + for (let i = 0; i < failed.length; i++) { + jobPromises.push( + Job.fromId( + this as MinimalQueue, + failed[i], + ), + ); - this.notifyFailedJobs(await Promise.all(jobPromises)); + if ((i + 1) % chunkSize === 0) { + this.notifyFailedJobs(await Promise.all(jobPromises)); + jobPromises.length = 0; + } + } - if (this.tracer) { - span.setAttributes({ - [TelemetryAttributes.WorkerJobsIdsInvolved]: stalled, - }); + this.notifyFailedJobs(await Promise.all(jobPromises)); - span.end(); - } + span?.setAttributes({ + [TelemetryAttributes.WorkerJobsIdsInvolved]: stalled, + }); + }, + ); } private notifyFailedJobs(failedJobs: Job[]) { diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index f1a006b059..5954df4617 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -32,6 +32,11 @@ export interface QueueBaseOptions { * @defaultValue false */ skipVersionCheck?: boolean; + + /** + * Telemetry client + */ + telemetry?: Telemetry; } /** diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 418ed57505..5c78f79460 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -10,7 +10,6 @@ export interface Span { setAttribute(key: string, value: Attribute): Span; setAttributes(attributes: Attributes): Span; recordException(exception: Exception, time?: Time): void; - setStatus(code: SpanStatus): Span; end(): void; } @@ -54,14 +53,3 @@ interface NameException { export type Time = HighResolutionTime | number | Date; type HighResolutionTime = [number, number]; - -interface SpanStatus { - code: StatusCode; - message?: string; -} - -export enum StatusCode { - UNSET = 0, - OK = 1, - ERROR = 2, -} From 2e3fe3778583845fdaab9b0268c0a8d7d22932fe Mon Sep 17 00:00:00 2001 From: fgozdz Date: Wed, 4 Sep 2024 16:28:03 +0200 Subject: [PATCH 07/26] feat(queue, worker): telemetry attributes change, and fix promises --- src/classes/queue-base.ts | 5 ++ src/classes/queue.ts | 95 +++++++++---------------------- src/classes/worker.ts | 54 +++++++----------- src/enums/telemetry-attributes.ts | 5 +- 4 files changed, 52 insertions(+), 107 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index eaeddf4208..60d04a28a9 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -11,6 +11,7 @@ import { RedisConnection } from './redis-connection'; import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; +import { TelemetryAttributes } from '../enums'; /** * @class QueueBase @@ -191,6 +192,10 @@ export class QueueBase extends EventEmitter implements MinimalQueue { const span = this.tracer.startSpan(getSpanName()); + span.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); + try { return callback(span); } catch (err) { diff --git a/src/classes/queue.ts b/src/classes/queue.ts index d28a65ce80..5820cbe26c 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -221,13 +221,9 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { - return this.trace( + return await this.trace>( () => `${this.name}.${name} Queue.add`, async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - if (opts && opts.repeat) { if (opts.repeat.endDate) { if (+new Date(opts.repeat.endDate) < Date.now()) { @@ -283,11 +279,10 @@ export class Queue< async addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - return this.trace( + return await this.trace[]>( () => `${this.name} Queue.addBulk`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), [TelemetryAttributes.BulkCount]: jobs.length, }); @@ -320,13 +315,9 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.pause`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { await this.scripts.pause(true); this.emit('paused'); @@ -339,13 +330,9 @@ export class Queue< * */ async close(): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.close`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { if (!this.closing) { if (this._repeat) { await this._repeat.close(); @@ -363,13 +350,9 @@ export class Queue< * queue. */ async resume(): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.resume`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { await this.scripts.pause(false); this.emit('resumed'); @@ -427,13 +410,9 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { - return this.trace( + return await this.trace( () => `${this.name} ${name} Queue.removeRepeatable`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { const repeat = await this.repeat; const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); @@ -448,13 +427,9 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { - return this.trace( + return await this.trace( () => `${this.name} ${id} Queue.removeDebounceKey`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { const client = await this.client; return await client.del(`${this.keys.de}:${id}`); @@ -473,11 +448,10 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { - return this.trace( + return await this.trace( () => `${this.name} ${key} Queue.removeRepeatableByKey`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.JobKey]: key, }); @@ -499,11 +473,10 @@ export class Queue< * any of its dependencies were locked. */ async remove(jobId: string, { removeChildren = true } = {}): Promise { - return this.trace( + return await this.trace( () => `${this.name} ${jobId} Queue.remove`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.JobId]: jobId, [TelemetryAttributes.JobOptions]: JSON.stringify({ removeChildren, @@ -525,11 +498,10 @@ export class Queue< jobId: string, progress: number | object, ): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.updateJobProgress`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.JobId]: jobId, [TelemetryAttributes.JobProgress]: JSON.stringify(progress), }); @@ -564,11 +536,10 @@ export class Queue< * delayed jobs. */ async drain(delayed = false): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.drain`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.QueueDrainDelay]: delayed, }); @@ -599,26 +570,15 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { - return this.trace( + return await this.trace( () => `${this.name} Queue.clean`, async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - [TelemetryAttributes.QueueGrace]: grace, - [TelemetryAttributes.JobType]: type, - }); - const maxCount = limit || Infinity; const maxCountPerCall = Math.min(10000, maxCount); const timestamp = Date.now() - grace; let deletedCount = 0; const deletedJobsIds: string[] = []; - span?.setAttributes({ - [TelemetryAttributes.QueueCleanLimit]: maxCount, - [TelemetryAttributes.JobTimestamp]: timestamp, - }); - while (deletedCount < maxCount) { const jobsIds = await this.scripts.cleanJobsInSet( type, @@ -636,6 +596,10 @@ export class Queue< } span?.setAttributes({ + [TelemetryAttributes.QueueGrace]: grace, + [TelemetryAttributes.JobType]: type, + [TelemetryAttributes.QueueCleanLimit]: maxCount, + [TelemetryAttributes.JobTimestamp]: timestamp, [TelemetryAttributes.JobId]: deletedJobsIds, }); @@ -656,13 +620,9 @@ export class Queue< * @param opts - Obliterate options. */ async obliterate(opts?: ObliterateOpts): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.obliterate`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - + async () => { await this.pause(); let cursor = 0; @@ -690,11 +650,10 @@ export class Queue< async retryJobs( opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.retryJobs`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), }); @@ -719,11 +678,10 @@ export class Queue< * @returns */ async promoteJobs(opts: { count?: number } = {}): Promise { - this.trace( + await this.trace( () => `${this.name} Queue.promoteJobs`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), }); @@ -741,11 +699,10 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { - return this.trace( + return await this.trace( () => `${this.name} Queue.trimEvents`, async span => { span?.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, [TelemetryAttributes.QueueEventMaxLength]: maxLength, }); diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 46afb57f1d..c6810f5fd6 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -35,7 +35,6 @@ import { RATE_LIMIT_ERROR, WaitingChildrenError, } from './errors'; -import { Tracer } from '../interfaces'; import { TelemetryAttributes } from '../enums'; // 10 seconds is the maximum time a BRPOPLPUSH can block. @@ -404,11 +403,10 @@ export class Worker< } async run() { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.run`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), }); @@ -533,16 +531,9 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - return this.trace( + return await this.trace>( () => `${this.name} ${this.id} Worker.getNextJob`, async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, - [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), - }); - const nextJob = await this._getNextJob( await this.client, await this.blockingConnection.client, @@ -551,7 +542,10 @@ export class Worker< ); span?.setAttributes({ - [TelemetryAttributes.JobId]: nextJob.id, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), + [TelemetryAttributes.JobId]: nextJob?.id, }); return nextJob; @@ -616,11 +610,10 @@ export class Worker< * @param expireTimeMs - expire time in ms of this rate limit. */ async rateLimit(expireTimeMs: number): Promise { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.rateLimit`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, }); @@ -785,11 +778,10 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - return this.trace( + return await this.trace>( () => `${this.name} ${this.id} Worker.processJob`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerToken]: token, [TelemetryAttributes.JobId]: job.id, @@ -870,11 +862,10 @@ will never work with more accuracy than 1ms. */ * Pauses the processing of this queue only for this worker. */ async pause(doNotWaitActive?: boolean): Promise { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.pause`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, }); @@ -899,11 +890,10 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { - this.trace( + this.trace( () => `${this.name} ${this.id} Worker.resume`, span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, }); @@ -947,11 +937,10 @@ will never work with more accuracy than 1ms. */ * @returns Promise that resolves when the worker has been closed. */ async close(force = false): Promise { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.close`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, [TelemetryAttributes.WorkerForceClose]: force, }); @@ -1009,11 +998,10 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, }); @@ -1116,14 +1104,14 @@ will never work with more accuracy than 1ms. */ } protected async extendLocks(jobs: Job[]) { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.extendLocks`, async span => { span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerJobsInvolved]: JSON.stringify(jobs), - [TelemetryAttributes.WorkerJobsIdsInvolved]: jobs.map(job => job.id), + [TelemetryAttributes.WorkerJobsToExtendLocks]: jobs.map( + job => job.id, + ), }); try { @@ -1155,14 +1143,9 @@ will never work with more accuracy than 1ms. */ } private async moveStalledJobsToWait() { - this.trace( + await this.trace( () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerName]: this.name, - [TelemetryAttributes.WorkerId]: this.id, - }); - const chunkSize = 50; const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); @@ -1188,7 +1171,8 @@ will never work with more accuracy than 1ms. */ this.notifyFailedJobs(await Promise.all(jobPromises)); span?.setAttributes({ - [TelemetryAttributes.WorkerJobsIdsInvolved]: stalled, + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerStalledJobs]: stalled, }); }, ); diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index be2dcd9eea..78c80ff658 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -1,6 +1,5 @@ export enum TelemetryAttributes { QueueName = 'bullmq.queue.name', - WorkerName = 'bullmq.worker.name', BulkCount = 'bullmq.job.bulk.count', BulkNames = 'bullmq.job.bulk.names', JobName = 'bullmq.job.name', @@ -21,6 +20,6 @@ export enum TelemetryAttributes { WorkerRateLimit = 'bullmq.worker.rate.limit', WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', WorkerForceClose = 'bullmq.worker.force.close', - WorkerJobsInvolved = 'bullmq.worker.jobs.involved', - WorkerJobsIdsInvolved = 'bullmq.worker.jobs.ids.involved', + WorkerStalledJobs = 'bullmq.worker.stalled.jobs', + WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', } From 7f6c674cf7fef0350d7b8b45429ff47f520ed916 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Thu, 5 Sep 2024 11:15:16 +0200 Subject: [PATCH 08/26] Revert "feat(queue, worker): resolve conflict" This reverts commit c4cb51736ba8acf1d163b34ab1763018b8896478, reversing changes made to 69c129670a468b6b6ce1b8600e1928826d53f8bb. --- README.md | 40 +- docs/gitbook/SUMMARY.md | 222 ++++----- docs/gitbook/guide/jobs/repeatable.md | 15 +- docs/gitbook/patterns/timeout-jobs.md | 25 +- docs/gitbook/python/changelog.md | 130 ++---- docs/gitbook/python/introduction.md | 2 +- src/classes/queue-base.ts | 36 +- src/classes/queue.ts | 361 +++++---------- src/classes/worker.ts | 643 +++++++++++--------------- src/enums/index.ts | 1 - src/enums/telemetry-attributes.ts | 25 - src/interfaces/index.ts | 1 - src/interfaces/queue-options.ts | 11 - src/interfaces/telemetry.ts | 55 --- src/interfaces/worker-options.ts | 6 - 15 files changed, 583 insertions(+), 990 deletions(-) delete mode 100644 src/enums/telemetry-attributes.ts delete mode 100644 src/interfaces/telemetry.ts diff --git a/README.md b/README.md index a07ebe6342..9e718318aa 100644 --- a/README.md +++ b/README.md @@ -220,26 +220,26 @@ This is just scratching the surface, check all the features and more in the offi Since there are a few job queue solutions, here is a table comparing them: -| Feature | [BullMQ-Pro](https://bullmq.io/#bullmq-pro) | [BullMQ](https://bullmq.io) | Bull | Kue | Bee | Agenda | -| :------------------------ | :-----------------------------------------: | :-------------------------: | :-------------: | :---: | -------- | ------ | -| Backend | redis | redis | redis | redis | redis | mongo | -| Observables | ✓ | | | | | | -| Group Rate Limit | ✓ | | | | | | -| Group Support | ✓ | | | | | | -| Batches Support | ✓ | | | | | | -| Parent/Child Dependencies | ✓ | ✓ | | | | | -| Priorities | ✓ | ✓ | ✓ | ✓ | | ✓ | -| Concurrency | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| Delayed jobs | ✓ | ✓ | ✓ | ✓ | | ✓ | -| Global events | ✓ | ✓ | ✓ | ✓ | | | -| Rate Limiter | ✓ | ✓ | ✓ | | | | -| Pause/Resume | ✓ | ✓ | ✓ | ✓ | | | -| Sandboxed worker | ✓ | ✓ | ✓ | | | | -| Repeatable jobs | ✓ | ✓ | ✓ | | | ✓ | -| Atomic ops | ✓ | ✓ | ✓ | | ✓ | | -| Persistence | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | -| UI | ✓ | ✓ | ✓ | ✓ | | ✓ | -| Optimized for | Jobs / Messages | Jobs / Messages | Jobs / Messages | Jobs | Messages | Jobs | +| Feature | [BullMQ-Pro](https://bullmq.io/#bullmq-pro) | [BullMQ](https://bullmq.io) | Bull | Kue | Bee | Agenda | +| :------------------------ | :-------------: | :-------------: | :-------------: | :---: | -------- | ------ | +| Backend | redis | redis | redis | redis | redis | mongo | +| Observables | ✓ | | | | | | +| Group Rate Limit | ✓ | | | | | | +| Group Support | ✓ | | | | | | +| Batches Support | ✓ | | | | | | +| Parent/Child Dependencies | ✓ | ✓ | | | | | +| Priorities | ✓ | ✓ | ✓ | ✓ | | ✓ | +| Concurrency | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| Delayed jobs | ✓ | ✓ | ✓ | ✓ | | ✓ | +| Global events | ✓ | ✓ | ✓ | ✓ | | | +| Rate Limiter | ✓ | ✓ | ✓ | | | | +| Pause/Resume | ✓ | ✓ | ✓ | ✓ | | | +| Sandboxed worker | ✓ | ✓ | ✓ | | | | +| Repeatable jobs | ✓ | ✓ | ✓ | | | ✓ | +| Atomic ops | ✓ | ✓ | ✓ | | ✓ | | +| Persistence | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| UI | ✓ | ✓ | ✓ | ✓ | | ✓ | +| Optimized for | Jobs / Messages | Jobs / Messages | Jobs / Messages | Jobs | Messages | Jobs | ## Contributing diff --git a/docs/gitbook/SUMMARY.md b/docs/gitbook/SUMMARY.md index 476c6548bb..c4c338c6e3 100644 --- a/docs/gitbook/SUMMARY.md +++ b/docs/gitbook/SUMMARY.md @@ -1,131 +1,131 @@ # Table of contents -- [What is BullMQ](README.md) -- [Quick Start]() -- [API Reference](https://api.docs.bullmq.io) -- [Changelogs](changelog.md) - - [v4](changelogs/changelog-v4.md) - - [v3](changelogs/changelog-v3.md) - - [v2](changelogs/changelog-v2.md) - - [v1](changelogs/changelog-v1.md) +* [What is BullMQ](README.md) +* [Quick Start]() +* [API Reference](https://api.docs.bullmq.io) +* [Changelogs](changelog.md) + * [v4](changelogs/changelog-v4.md) + * [v3](changelogs/changelog-v3.md) + * [v2](changelogs/changelog-v2.md) + * [v1](changelogs/changelog-v1.md) ## Guide -- [Introduction](guide/introduction.md) -- [Connections](guide/connections.md) -- [Queues](guide/queues/README.md) - - [Auto-removal of jobs](guide/queues/auto-removal-of-jobs.md) - - [Adding jobs in bulk](guide/queues/adding-bulks.md) - - [Global Concurrency](guide/queues/global-concurrency.md) - - [Removing Jobs](guide/queues/removing-jobs.md) -- [Workers](guide/workers/README.md) - - [Auto-removal of jobs](guide/workers/auto-removal-of-jobs.md) - - [Concurrency](guide/workers/concurrency.md) - - [Graceful shutdown](guide/workers/graceful-shutdown.md) - - [Stalled Jobs](guide/workers/stalled-jobs.md) - - [Sandboxed processors](guide/workers/sandboxed-processors.md) - - [Pausing queues](guide/workers/pausing-queues.md) -- [Jobs](guide/jobs/README.md) - - [FIFO](guide/jobs/fifo.md) - - [LIFO](guide/jobs/lifo.md) - - [Job Ids](guide/jobs/job-ids.md) - - [Job Data](guide/jobs/job-data.md) - - [Debouncing](guide/jobs/debouncing.md) - - [Delayed](guide/jobs/delayed.md) - - [Repeatable](guide/jobs/repeatable.md) - - [Prioritized](guide/jobs/prioritized.md) - - [Removing jobs](guide/jobs/removing-job.md) - - [Stalled](guide/jobs/stalled.md) - - [Getters](guide/jobs/getters.md) -- [Flows](guide/flows/README.md) - - [Adding flows in bulk](guide/flows/adding-bulks.md) - - [Get Flow Tree](guide/flows/get-flow-tree.md) - - [Fail Parent](guide/flows/fail-parent.md) - - [Remove Dependency](guide/flows/remove-dependency.md) - - [Ignore Dependency](guide/flows/ignore-dependency.md) - - [Remove Child Dependency](guide/flows/remove-child-dependency.md) -- [Metrics](guide/metrics/metrics.md) -- [Rate limiting](guide/rate-limiting.md) -- [Parallelism and Concurrency](guide/parallelism-and-concurrency.md) -- [Retrying failing jobs](guide/retrying-failing-jobs.md) -- [Returning job data](guide/returning-job-data.md) -- [Events](guide/events.md) -- [QueueScheduler](guide/queuescheduler.md) -- [Redis™ Compatibility](guide/redis-tm-compatibility/README.md) - - [Dragonfly](guide/redis-tm-compatibility/dragonfly.md) -- [Redis™ hosting](guide/redis-tm-hosting/README.md) - - [AWS MemoryDB](guide/redis-tm-hosting/aws-memorydb.md) - - [AWS Elasticache](guide/redis-tm-hosting/aws-elasticache.md) -- [Architecture](guide/architecture.md) -- [NestJs](guide/nestjs/README.md) - - [Producers](guide/nestjs/producers.md) - - [Queue Events Listeners](guide/nestjs/queue-events-listeners.md) -- [Going to production](guide/going-to-production.md) -- [Migration to newer versions](guide/migration-to-newer-versions.md) -- [Troubleshooting](guide/troubleshooting.md) +* [Introduction](guide/introduction.md) +* [Connections](guide/connections.md) +* [Queues](guide/queues/README.md) + * [Auto-removal of jobs](guide/queues/auto-removal-of-jobs.md) + * [Adding jobs in bulk](guide/queues/adding-bulks.md) + * [Global Concurrency](guide/queues/global-concurrency.md) + * [Removing Jobs](guide/queues/removing-jobs.md) +* [Workers](guide/workers/README.md) + * [Auto-removal of jobs](guide/workers/auto-removal-of-jobs.md) + * [Concurrency](guide/workers/concurrency.md) + * [Graceful shutdown](guide/workers/graceful-shutdown.md) + * [Stalled Jobs](guide/workers/stalled-jobs.md) + * [Sandboxed processors](guide/workers/sandboxed-processors.md) + * [Pausing queues](guide/workers/pausing-queues.md) +* [Jobs](guide/jobs/README.md) + * [FIFO](guide/jobs/fifo.md) + * [LIFO](guide/jobs/lifo.md) + * [Job Ids](guide/jobs/job-ids.md) + * [Job Data](guide/jobs/job-data.md) + * [Debouncing](guide/jobs/debouncing.md) + * [Delayed](guide/jobs/delayed.md) + * [Repeatable](guide/jobs/repeatable.md) + * [Prioritized](guide/jobs/prioritized.md) + * [Removing jobs](guide/jobs/removing-job.md) + * [Stalled](guide/jobs/stalled.md) + * [Getters](guide/jobs/getters.md) +* [Flows](guide/flows/README.md) + * [Adding flows in bulk](guide/flows/adding-bulks.md) + * [Get Flow Tree](guide/flows/get-flow-tree.md) + * [Fail Parent](guide/flows/fail-parent.md) + * [Remove Dependency](guide/flows/remove-dependency.md) + * [Ignore Dependency](guide/flows/ignore-dependency.md) + * [Remove Child Dependency](guide/flows/remove-child-dependency.md) +* [Metrics](guide/metrics/metrics.md) +* [Rate limiting](guide/rate-limiting.md) +* [Parallelism and Concurrency](guide/parallelism-and-concurrency.md) +* [Retrying failing jobs](guide/retrying-failing-jobs.md) +* [Returning job data](guide/returning-job-data.md) +* [Events](guide/events.md) +* [QueueScheduler](guide/queuescheduler.md) +* [Redis™ Compatibility](guide/redis-tm-compatibility/README.md) + * [Dragonfly](guide/redis-tm-compatibility/dragonfly.md) +* [Redis™ hosting](guide/redis-tm-hosting/README.md) + * [AWS MemoryDB](guide/redis-tm-hosting/aws-memorydb.md) + * [AWS Elasticache](guide/redis-tm-hosting/aws-elasticache.md) +* [Architecture](guide/architecture.md) +* [NestJs](guide/nestjs/README.md) + * [Producers](guide/nestjs/producers.md) + * [Queue Events Listeners](guide/nestjs/queue-events-listeners.md) +* [Going to production](guide/going-to-production.md) +* [Migration to newer versions](guide/migration-to-newer-versions.md) +* [Troubleshooting](guide/troubleshooting.md) ## Patterns -- [Adding jobs in bulk across different queues](patterns/adding-bulks.md) -- [Manually processing jobs](patterns/manually-fetching-jobs.md) -- [Named Processor](patterns/named-processor.md) -- [Flows](patterns/flows.md) -- [Idempotent jobs](patterns/idempotent-jobs.md) -- [Throttle jobs](patterns/throttle-jobs.md) -- [Process Step Jobs](patterns/process-step-jobs.md) -- [Failing fast when Redis is down](patterns/failing-fast-when-redis-is-down.md) -- [Stop retrying jobs](patterns/stop-retrying-jobs.md) -- [Timeout jobs](patterns/timeout-jobs.md) -- [Redis Cluster](patterns/redis-cluster.md) +* [Adding jobs in bulk across different queues](patterns/adding-bulks.md) +* [Manually processing jobs](patterns/manually-fetching-jobs.md) +* [Named Processor](patterns/named-processor.md) +* [Flows](patterns/flows.md) +* [Idempotent jobs](patterns/idempotent-jobs.md) +* [Throttle jobs](patterns/throttle-jobs.md) +* [Process Step Jobs](patterns/process-step-jobs.md) +* [Failing fast when Redis is down](patterns/failing-fast-when-redis-is-down.md) +* [Stop retrying jobs](patterns/stop-retrying-jobs.md) +* [Timeout jobs](patterns/timeout-jobs.md) +* [Redis Cluster](patterns/redis-cluster.md) ## BullMQ Pro -- [Introduction](bullmq-pro/introduction.md) -- [Install](bullmq-pro/install.md) -- [Observables](bullmq-pro/observables/README.md) - - [Cancelation](bullmq-pro/observables/cancelation.md) -- [Groups](bullmq-pro/groups/README.md) - - [Getters](bullmq-pro/groups/getters.md) - - [Rate limiting](bullmq-pro/groups/rate-limiting.md) - - [Concurrency](bullmq-pro/groups/concurrency.md) - - [Local group concurrency](bullmq-pro/groups/local-group-concurrency.md) - - [Max group size](bullmq-pro/groups/max-group-size.md) - - [Pausing groups](bullmq-pro/groups/pausing-groups.md) - - [Prioritized intra-groups](bullmq-pro/groups/prioritized.md) - - [Sandboxes for groups](bullmq-pro/groups/sandboxes-for-groups.md) -- [Batches](bullmq-pro/batches.md) -- [NestJs](bullmq-pro/nestjs/README.md) - - [Producers](bullmq-pro/nestjs/producers.md) - - [Queue Events Listeners](bullmq-pro/nestjs/queue-events-listeners.md) - - [API Reference](https://nestjs.bullmq.pro/) - - [Changelog](bullmq-pro/nestjs/changelog.md) -- [API Reference](https://api.bullmq.pro) -- [Changelog](bullmq-pro/changelog.md) -- [Support](bullmq-pro/support.md) +* [Introduction](bullmq-pro/introduction.md) +* [Install](bullmq-pro/install.md) +* [Observables](bullmq-pro/observables/README.md) + * [Cancelation](bullmq-pro/observables/cancelation.md) +* [Groups](bullmq-pro/groups/README.md) + * [Getters](bullmq-pro/groups/getters.md) + * [Rate limiting](bullmq-pro/groups/rate-limiting.md) + * [Concurrency](bullmq-pro/groups/concurrency.md) + * [Local group concurrency](bullmq-pro/groups/local-group-concurrency.md) + * [Max group size](bullmq-pro/groups/max-group-size.md) + * [Pausing groups](bullmq-pro/groups/pausing-groups.md) + * [Prioritized intra-groups](bullmq-pro/groups/prioritized.md) + * [Sandboxes for groups](bullmq-pro/groups/sandboxes-for-groups.md) +* [Batches](bullmq-pro/batches.md) +* [NestJs](bullmq-pro/nestjs/README.md) + * [Producers](bullmq-pro/nestjs/producers.md) + * [Queue Events Listeners](bullmq-pro/nestjs/queue-events-listeners.md) + * [API Reference](https://nestjs.bullmq.pro/) + * [Changelog](bullmq-pro/nestjs/changelog.md) +* [API Reference](https://api.bullmq.pro) +* [Changelog](bullmq-pro/changelog.md) +* [Support](bullmq-pro/support.md) ## Bull -- [Introduction](bull/introduction.md) -- [Install](bull/install.md) -- [Quick Guide](bull/quick-guide.md) -- [Important Notes](bull/important-notes.md) -- [Reference](https://github.com/OptimalBits/bull/blob/develop/REFERENCE.md) -- [Patterns](bull/patterns/README.md) - - [Persistent connections](bull/patterns/persistent-connections.md) - - [Message queue](bull/patterns/message-queue.md) - - [Returning Job Completions](bull/patterns/returning-job-completions.md) - - [Reusing Redis Connections](bull/patterns/reusing-redis-connections.md) - - [Redis cluster](bull/patterns/redis-cluster.md) - - [Custom backoff strategy](bull/patterns/custom-backoff-strategy.md) - - [Debugging](bull/patterns/debugging.md) - - [Manually fetching jobs](bull/patterns/manually-fetching-jobs.md) +* [Introduction](bull/introduction.md) +* [Install](bull/install.md) +* [Quick Guide](bull/quick-guide.md) +* [Important Notes](bull/important-notes.md) +* [Reference](https://github.com/OptimalBits/bull/blob/develop/REFERENCE.md) +* [Patterns](bull/patterns/README.md) + * [Persistent connections](bull/patterns/persistent-connections.md) + * [Message queue](bull/patterns/message-queue.md) + * [Returning Job Completions](bull/patterns/returning-job-completions.md) + * [Reusing Redis Connections](bull/patterns/reusing-redis-connections.md) + * [Redis cluster](bull/patterns/redis-cluster.md) + * [Custom backoff strategy](bull/patterns/custom-backoff-strategy.md) + * [Debugging](bull/patterns/debugging.md) + * [Manually fetching jobs](bull/patterns/manually-fetching-jobs.md) ## Bull 3.x Migration -- [Compatibility class](bull-3.x-migration/compatibility-class.md) -- [Migration](bull-3.x-migration/migration.md) +* [Compatibility class](bull-3.x-migration/compatibility-class.md) +* [Migration](bull-3.x-migration/migration.md) ## Python -- [Introduction](python/introduction.md) -- [Changelog](python/changelog.md) +* [Introduction](python/introduction.md) +* [Changelog](python/changelog.md) diff --git a/docs/gitbook/guide/jobs/repeatable.md b/docs/gitbook/guide/jobs/repeatable.md index 50a424b59d..8c0c3af497 100644 --- a/docs/gitbook/guide/jobs/repeatable.md +++ b/docs/gitbook/guide/jobs/repeatable.md @@ -48,9 +48,9 @@ await myQueue.add( There are some important considerations regarding repeatable jobs: -- Bull is smart enough not to add the same repeatable job if the repeat options are the same. -- If there are no workers running, repeatable jobs will not accumulate next time a worker is online. -- Repeatable jobs can be removed using the [`removeRepeatable`](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatable) or [`removeRepeatableByKey`](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatableByKey) methods. +* Bull is smart enough not to add the same repeatable job if the repeat options are the same. +* If there are no workers running, repeatable jobs will not accumulate next time a worker is online. +* Repeatable jobs can be removed using the [`removeRepeatable`](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatable) or [`removeRepeatableByKey`](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatableByKey) methods. ```typescript import { Queue } from 'bullmq'; @@ -61,7 +61,7 @@ const myQueue = new Queue('Paint'); const job1 = await myQueue.add('red', { foo: 'bar' }, { repeat }); const job2 = await myQueue.add('blue', { foo: 'baz' }, { repeat }); - + const isRemoved1 = await myQueue.removeRepeatableByKey(job1.repeatJobKey); const isRemoved2 = await queue.removeRepeatable('blue', repeat); ``` @@ -223,6 +223,7 @@ await myQueue.add( }, }, ); + ``` #### Updating repeatable job's options @@ -247,6 +248,6 @@ The code above will not create a new repeatable meta job, it will just update th ### Read more: -- 💡 [Repeat Strategy API Reference](https://api.docs.bullmq.io/types/v5.RepeatStrategy.html) -- 💡 [Remove Repeatable Job API Reference](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatable) -- 💡 [Remove Repeatable Job by Key API Reference](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatableByKey) +* 💡 [Repeat Strategy API Reference](https://api.docs.bullmq.io/types/v5.RepeatStrategy.html) +* 💡 [Remove Repeatable Job API Reference](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatable) +* 💡 [Remove Repeatable Job by Key API Reference](https://api.docs.bullmq.io/classes/v5.Queue.html#removeRepeatableByKey) diff --git a/docs/gitbook/patterns/timeout-jobs.md b/docs/gitbook/patterns/timeout-jobs.md index 8e43dc91fb..a70bc9845d 100644 --- a/docs/gitbook/patterns/timeout-jobs.md +++ b/docs/gitbook/patterns/timeout-jobs.md @@ -2,18 +2,18 @@ BullMQ does not provide a specific mechanism to timeout jobs, however this can be accomplished in many cases with a custom timeout code in the worker's process function. -The basic concept is to set up a timeout callback that will abort the job processing, and throw an UnrecoverableError (to avoid retries, although this may not alway be the desired behaviour, if so just throw a normal Error). Note how we specified the timeout as a property of the job's data, in case we want to have different timeouts depending on the job, but we could also have a fixed constant timeout for all jobs if we wanted. +The basic concept is to set up a timeout callback that will abort the job processing, and throw an UnrecoverableError (to avoid retries, although this may not alway be the desired behaviour, if so just throw a normal Error). Note how we specified the timeout as a property of the job's data, in case we want to have different timeouts depending on the job, but we could also have a fixed constant timeout for all jobs if we wanted. ```typescript const worker = new Worker('foo', async job => { let controller = new AbortController(); const timer = setTimeout(() => controller.abort(), job.data.timeout); - + try { await doSomethingAbortable(controller.signal); - } catch (err) { - if (err.name == 'AbortError') { - throw new UnrecoverableError('Timeout'); + } catch(err) { + if (err.name == "AbortError") { + throw new UnrecoverableError("Timeout"); } else { throw err; } @@ -28,22 +28,22 @@ In this simple example we assume that doSomethingAbortable is an asynchronous fu Now let's see another case when we want to timeout a fetch call, it would look like this: ```typescript -const worker = new Worker('foo', async job => { +const worker = new Worker("foo", async (job) => { let controller = new AbortController(); const timer = setTimeout(() => controller.abort(), job.data.timeout); try { - let response = await fetch('/slowserver.com', { + let response = await fetch("/slowserver.com", { signal: controller.signal, - }); + }); const result = await response.text(); } catch (err) { - if (err.name == 'AbortError') { - throw new UnrecoverableError('Timeout'); + if (err.name == "AbortError") { + throw new UnrecoverableError("Timeout"); } else { throw err; } } finally { - clearTimeout(timer); + clearTimeout(timer) } }); ``` @@ -51,3 +51,6 @@ const worker = new Worker('foo', async job => { In this example we are aborting the fetch call using [AbortController](https://developer.mozilla.org/en-US/docs/Web/API/AbortController), which is the default mechanism provided by fetch to abort calls. Note that abort will even cause the async call to response.text() to also throw an Abort exception. In summary, while it is possible to implement timeout in your jobs, the mechanism to do it may vary depending on the type of asynchronous operations your jobs is performing, but in many cases using AbortController in combination with a setTimeout is more than enough. + + + diff --git a/docs/gitbook/python/changelog.md b/docs/gitbook/python/changelog.md index 09f53fa11f..aa4f42f1cc 100644 --- a/docs/gitbook/python/changelog.md +++ b/docs/gitbook/python/changelog.md @@ -3,135 +3,105 @@ ## v2.9.3 (2024-08-31) - ### Fix - -- **flows:** Throw error when queueName contains colon (#2719) fixes #2718 ([`9ef97c3`](https://github.com/taskforcesh/bullmq/commit/9ef97c37663e209f03c501a357b6b1a662b24d99)) -- **sandboxed:** Properly update data on wrapped job (#2739) fixes #2731 ([`9c4b245`](https://github.com/taskforcesh/bullmq/commit/9c4b2454025a14459de47b0586a09130d7a93cae)) -- **flow:** Remove debounce key when parent is moved to fail ([#2720](https://github.com/taskforcesh/bullmq/issues/2720)) ([`d51aabe`](https://github.com/taskforcesh/bullmq/commit/d51aabe999a489c285f871d21e36c3c84e2bef33)) -- **flow:** Recursive ignoreDependencyOnFailure option ([#2712](https://github.com/taskforcesh/bullmq/issues/2712)) ([`53bc9eb`](https://github.com/taskforcesh/bullmq/commit/53bc9eb68b5bb0a470a8fe64ef78ece5cde44632)) -- **job:** Throw error if removeDependencyOnFailure and ignoreDependencyOnFailure are used together ([#2711](https://github.com/taskforcesh/bullmq/issues/2711)) ([`967632c`](https://github.com/taskforcesh/bullmq/commit/967632c9ef8468aab59f0b36d1d828bcde1fbd70)) -- **stalled:** Support removeDependencyOnFailure option when job is stalled ([#2708](https://github.com/taskforcesh/bullmq/issues/2708)) ([`e0d3790`](https://github.com/taskforcesh/bullmq/commit/e0d3790e755c4dfe31006b52f177f08b40348e61)) -- **job:** Change moveToFinished return type to reflect jobData (#2706) ref #2342 ([`de094a3`](https://github.com/taskforcesh/bullmq/commit/de094a361a25886acbee0112bb4341c6b285b1c9)) -- **connection:** Remove unnecessary process.env.CI reference ([#2705](https://github.com/taskforcesh/bullmq/issues/2705)) ([`53de304`](https://github.com/taskforcesh/bullmq/commit/53de3049493ef79e02af40e8e450e2056c134155)) -- **worker:** Fix close sequence to reduce risk for open handlers ([#2656](https://github.com/taskforcesh/bullmq/issues/2656)) ([`8468e44`](https://github.com/taskforcesh/bullmq/commit/8468e44e5e9e39c7b65691945c26688a9e5d2275)) +* **flows:** Throw error when queueName contains colon (#2719) fixes #2718 ([`9ef97c3`](https://github.com/taskforcesh/bullmq/commit/9ef97c37663e209f03c501a357b6b1a662b24d99)) +* **sandboxed:** Properly update data on wrapped job (#2739) fixes #2731 ([`9c4b245`](https://github.com/taskforcesh/bullmq/commit/9c4b2454025a14459de47b0586a09130d7a93cae)) +* **flow:** Remove debounce key when parent is moved to fail ([#2720](https://github.com/taskforcesh/bullmq/issues/2720)) ([`d51aabe`](https://github.com/taskforcesh/bullmq/commit/d51aabe999a489c285f871d21e36c3c84e2bef33)) +* **flow:** Recursive ignoreDependencyOnFailure option ([#2712](https://github.com/taskforcesh/bullmq/issues/2712)) ([`53bc9eb`](https://github.com/taskforcesh/bullmq/commit/53bc9eb68b5bb0a470a8fe64ef78ece5cde44632)) +* **job:** Throw error if removeDependencyOnFailure and ignoreDependencyOnFailure are used together ([#2711](https://github.com/taskforcesh/bullmq/issues/2711)) ([`967632c`](https://github.com/taskforcesh/bullmq/commit/967632c9ef8468aab59f0b36d1d828bcde1fbd70)) +* **stalled:** Support removeDependencyOnFailure option when job is stalled ([#2708](https://github.com/taskforcesh/bullmq/issues/2708)) ([`e0d3790`](https://github.com/taskforcesh/bullmq/commit/e0d3790e755c4dfe31006b52f177f08b40348e61)) +* **job:** Change moveToFinished return type to reflect jobData (#2706) ref #2342 ([`de094a3`](https://github.com/taskforcesh/bullmq/commit/de094a361a25886acbee0112bb4341c6b285b1c9)) +* **connection:** Remove unnecessary process.env.CI reference ([#2705](https://github.com/taskforcesh/bullmq/issues/2705)) ([`53de304`](https://github.com/taskforcesh/bullmq/commit/53de3049493ef79e02af40e8e450e2056c134155)) +* **worker:** Fix close sequence to reduce risk for open handlers ([#2656](https://github.com/taskforcesh/bullmq/issues/2656)) ([`8468e44`](https://github.com/taskforcesh/bullmq/commit/8468e44e5e9e39c7b65691945c26688a9e5d2275)) ### Documentation - -- **connection:** Add decode_responses warning [python] (#2745) fixes #2695 ([`188192c`](https://github.com/taskforcesh/bullmq/commit/188192c1b15e77013ad78a4904f099cd314b4b86)) -- Update README.md sponsors ([`e7e7193`](https://github.com/taskforcesh/bullmq/commit/e7e7193bacf048162d7095ba003169ee81a3766e)) -- Update copyright holder ([`13516c6`](https://github.com/taskforcesh/bullmq/commit/13516c6f5a982b44f3bb49024ec8d11a90c6c9ab)) -- **debouncing:** Add remove debounce key sub-section ([#2724](https://github.com/taskforcesh/bullmq/issues/2724)) ([`b8d8886`](https://github.com/taskforcesh/bullmq/commit/b8d8886999dca42165c83ae1951edd293e8187dd)) +* **connection:** Add decode_responses warning [python] (#2745) fixes #2695 ([`188192c`](https://github.com/taskforcesh/bullmq/commit/188192c1b15e77013ad78a4904f099cd314b4b86)) +* Update README.md sponsors ([`e7e7193`](https://github.com/taskforcesh/bullmq/commit/e7e7193bacf048162d7095ba003169ee81a3766e)) +* Update copyright holder ([`13516c6`](https://github.com/taskforcesh/bullmq/commit/13516c6f5a982b44f3bb49024ec8d11a90c6c9ab)) +* **debouncing:** Add remove debounce key sub-section ([#2724](https://github.com/taskforcesh/bullmq/issues/2724)) ([`b8d8886`](https://github.com/taskforcesh/bullmq/commit/b8d8886999dca42165c83ae1951edd293e8187dd)) ### Performance - -- **fifo-queue:** Use linked list structure for queue ([#2629](https://github.com/taskforcesh/bullmq/issues/2629)) ([`df74578`](https://github.com/taskforcesh/bullmq/commit/df7457844a769e5644eb11d31d1a05a9d5b4e084)) +* **fifo-queue:** Use linked list structure for queue ([#2629](https://github.com/taskforcesh/bullmq/issues/2629)) ([`df74578`](https://github.com/taskforcesh/bullmq/commit/df7457844a769e5644eb11d31d1a05a9d5b4e084)) ## v2.9.2 (2024-08-10) - ### Fix - -- **flow:** Validate parentData before ignoreDependencyOnFailure when stalled check happens (#2702) (python) ([`9416501`](https://github.com/taskforcesh/bullmq/commit/9416501551b1ad464e59bdba1045a5a9955e2ea4)) +* **flow:** Validate parentData before ignoreDependencyOnFailure when stalled check happens (#2702) (python) ([`9416501`](https://github.com/taskforcesh/bullmq/commit/9416501551b1ad464e59bdba1045a5a9955e2ea4)) ### Documentation - -- **bullmq-pro:** Update changelog to v7.14.1 ([#2698](https://github.com/taskforcesh/bullmq/issues/2698)) ([`8260582`](https://github.com/taskforcesh/bullmq/commit/826058207f8b75fa77432df9e3a9c3b0b31ffc69)) +* **bullmq-pro:** Update changelog to v7.14.1 ([#2698](https://github.com/taskforcesh/bullmq/issues/2698)) ([`8260582`](https://github.com/taskforcesh/bullmq/commit/826058207f8b75fa77432df9e3a9c3b0b31ffc69)) ### Performance - -- **worker:** Promote delayed jobs while queue is rate limited (#2697) ref #2582 ([`f3290ac`](https://github.com/taskforcesh/bullmq/commit/f3290ace2f117e26357f9fae611a255af26b950b)) +* **worker:** Promote delayed jobs while queue is rate limited (#2697) ref #2582 ([`f3290ac`](https://github.com/taskforcesh/bullmq/commit/f3290ace2f117e26357f9fae611a255af26b950b)) ## v2.9.1 (2024-08-08) - ### Fix - -- **job:** Consider passing stackTraceLimit as 0 (#2692) ref #2487 ([`509a36b`](https://github.com/taskforcesh/bullmq/commit/509a36baf8d8cf37176e406fd28e33f712229d27)) +* **job:** Consider passing stackTraceLimit as 0 (#2692) ref #2487 ([`509a36b`](https://github.com/taskforcesh/bullmq/commit/509a36baf8d8cf37176e406fd28e33f712229d27)) ### Documentation - -- Update example in introduction [python] ([#2677](https://github.com/taskforcesh/bullmq/issues/2677)) ([`c67ce33`](https://github.com/taskforcesh/bullmq/commit/c67ce331e31c1312e85dfde46bc92a7985b0d493)) -- **guide:** Clarify example in retrying failing jobs section (#2690) ref #2602 ([`5e6154f`](https://github.com/taskforcesh/bullmq/commit/5e6154f220bf89dc1dd9d3f0a49ff5c35200557b)) +* Update example in introduction [python] ([#2677](https://github.com/taskforcesh/bullmq/issues/2677)) ([`c67ce33`](https://github.com/taskforcesh/bullmq/commit/c67ce331e31c1312e85dfde46bc92a7985b0d493)) +* **guide:** Clarify example in retrying failing jobs section (#2690) ref #2602 ([`5e6154f`](https://github.com/taskforcesh/bullmq/commit/5e6154f220bf89dc1dd9d3f0a49ff5c35200557b)) ## v2.9.0 (2024-08-02) - ### Feature - -- **queue-events:** Pass debounceId as a param of debounced event ([#2678](https://github.com/taskforcesh/bullmq/issues/2678)) ([`97fb97a`](https://github.com/taskforcesh/bullmq/commit/97fb97a054d6cebbe1d7ff1cb5c46d7da1c018d8)) -- **job:** Allow passing a debounce as option ([#2666](https://github.com/taskforcesh/bullmq/issues/2666)) ([`163ccea`](https://github.com/taskforcesh/bullmq/commit/163ccea19ef48191c4db6da27638ff6fb0080a74)) -- **repeatable:** New repeatables structure (#2617) ref #2612 fixes #2399 #2596 ([`8376a9a`](https://github.com/taskforcesh/bullmq/commit/8376a9a9007f58ac7eab1a3a1c2f9e7ec373bbd6)) -- **queue:** Support global concurrency (#2496) ref #2465 ([`47ba055`](https://github.com/taskforcesh/bullmq/commit/47ba055c1ea36178b684fd11c1e82cde7ec93ac8)) +* **queue-events:** Pass debounceId as a param of debounced event ([#2678](https://github.com/taskforcesh/bullmq/issues/2678)) ([`97fb97a`](https://github.com/taskforcesh/bullmq/commit/97fb97a054d6cebbe1d7ff1cb5c46d7da1c018d8)) +* **job:** Allow passing a debounce as option ([#2666](https://github.com/taskforcesh/bullmq/issues/2666)) ([`163ccea`](https://github.com/taskforcesh/bullmq/commit/163ccea19ef48191c4db6da27638ff6fb0080a74)) +* **repeatable:** New repeatables structure (#2617) ref #2612 fixes #2399 #2596 ([`8376a9a`](https://github.com/taskforcesh/bullmq/commit/8376a9a9007f58ac7eab1a3a1c2f9e7ec373bbd6)) +* **queue:** Support global concurrency (#2496) ref #2465 ([`47ba055`](https://github.com/taskforcesh/bullmq/commit/47ba055c1ea36178b684fd11c1e82cde7ec93ac8)) ### Fix - -- **job:** Make sure json.dumps return JSON compliant JSON [python] ([#2683](https://github.com/taskforcesh/bullmq/issues/2683)) ([`4441711`](https://github.com/taskforcesh/bullmq/commit/4441711a986a9f6a326100308d639eb0a2ea8c8d)) -- **repeatable:** Remove repeat hash when removing repeatable job ([#2676](https://github.com/taskforcesh/bullmq/issues/2676)) ([`97a297d`](https://github.com/taskforcesh/bullmq/commit/97a297d90ad8b27bcddb7db6a8a158acfb549389)) -- **repeatable:** Keep legacy repeatables if it exists instead of creating one with new structure ([#2665](https://github.com/taskforcesh/bullmq/issues/2665)) ([`93fad41`](https://github.com/taskforcesh/bullmq/commit/93fad41a9520961d0e6814d82454bc916a039501)) -- **repeatable:** Consider removing legacy repeatable job (#2658) fixes #2661 ([`a6764ae`](https://github.com/taskforcesh/bullmq/commit/a6764aecb557fb918d061f5e5c2e26e4afa3e8ee)) -- **repeatable:** Pass custom key as an args in addRepeatableJob to prevent CROSSSLOT issue (#2662) fixes #2660 ([`9d8f874`](https://github.com/taskforcesh/bullmq/commit/9d8f874b959e09662985f38c4614b95ab4d5e89c)) +* **job:** Make sure json.dumps return JSON compliant JSON [python] ([#2683](https://github.com/taskforcesh/bullmq/issues/2683)) ([`4441711`](https://github.com/taskforcesh/bullmq/commit/4441711a986a9f6a326100308d639eb0a2ea8c8d)) +* **repeatable:** Remove repeat hash when removing repeatable job ([#2676](https://github.com/taskforcesh/bullmq/issues/2676)) ([`97a297d`](https://github.com/taskforcesh/bullmq/commit/97a297d90ad8b27bcddb7db6a8a158acfb549389)) +* **repeatable:** Keep legacy repeatables if it exists instead of creating one with new structure ([#2665](https://github.com/taskforcesh/bullmq/issues/2665)) ([`93fad41`](https://github.com/taskforcesh/bullmq/commit/93fad41a9520961d0e6814d82454bc916a039501)) +* **repeatable:** Consider removing legacy repeatable job (#2658) fixes #2661 ([`a6764ae`](https://github.com/taskforcesh/bullmq/commit/a6764aecb557fb918d061f5e5c2e26e4afa3e8ee)) +* **repeatable:** Pass custom key as an args in addRepeatableJob to prevent CROSSSLOT issue (#2662) fixes #2660 ([`9d8f874`](https://github.com/taskforcesh/bullmq/commit/9d8f874b959e09662985f38c4614b95ab4d5e89c)) ### Documentation - -- **pro:** Update changelog to v7.12.0 ([#2675](https://github.com/taskforcesh/bullmq/issues/2675)) ([`45fa04e`](https://github.com/taskforcesh/bullmq/commit/45fa04e74b90ee9b331aed1b7aaa81ca1e0219c7)) -- **repeatable:** Fix examples when passing repeat.key ([#2669](https://github.com/taskforcesh/bullmq/issues/2669)) ([`e99fc2d`](https://github.com/taskforcesh/bullmq/commit/e99fc2d6435704c407e4c4bdeaeda580dd20bf8b)) -- **guide:** Add queue global concurrency section ([#2667](https://github.com/taskforcesh/bullmq/issues/2667)) ([`c905d62`](https://github.com/taskforcesh/bullmq/commit/c905d6206aa3d1b0a8f315da220cbc27f6a397c3)) -- **changelog:** Remove documentation section ([`b5500c4`](https://github.com/taskforcesh/bullmq/commit/b5500c4b24fefb2f0646839331f242642a4adcb8)) -- **metrics:** Fix markdown typo in metrics.md ([#2655](https://github.com/taskforcesh/bullmq/issues/2655)) ([`3e071fa`](https://github.com/taskforcesh/bullmq/commit/3e071fa17c8cf9c96003293d75ffd961b22e750c)) +* **pro:** Update changelog to v7.12.0 ([#2675](https://github.com/taskforcesh/bullmq/issues/2675)) ([`45fa04e`](https://github.com/taskforcesh/bullmq/commit/45fa04e74b90ee9b331aed1b7aaa81ca1e0219c7)) +* **repeatable:** Fix examples when passing repeat.key ([#2669](https://github.com/taskforcesh/bullmq/issues/2669)) ([`e99fc2d`](https://github.com/taskforcesh/bullmq/commit/e99fc2d6435704c407e4c4bdeaeda580dd20bf8b)) +* **guide:** Add queue global concurrency section ([#2667](https://github.com/taskforcesh/bullmq/issues/2667)) ([`c905d62`](https://github.com/taskforcesh/bullmq/commit/c905d6206aa3d1b0a8f315da220cbc27f6a397c3)) +* **changelog:** Remove documentation section ([`b5500c4`](https://github.com/taskforcesh/bullmq/commit/b5500c4b24fefb2f0646839331f242642a4adcb8)) +* **metrics:** Fix markdown typo in metrics.md ([#2655](https://github.com/taskforcesh/bullmq/issues/2655)) ([`3e071fa`](https://github.com/taskforcesh/bullmq/commit/3e071fa17c8cf9c96003293d75ffd961b22e750c)) ### Performance - -- **worker:** Fetch next job on failure ([#2342](https://github.com/taskforcesh/bullmq/issues/2342)) ([`f917b80`](https://github.com/taskforcesh/bullmq/commit/f917b8090f306c0580aac12f6bd4394fd9ef003d)) +* **worker:** Fetch next job on failure ([#2342](https://github.com/taskforcesh/bullmq/issues/2342)) ([`f917b80`](https://github.com/taskforcesh/bullmq/commit/f917b8090f306c0580aac12f6bd4394fd9ef003d)) ## v2.8.1 (2024-07-11) - ### Fix - -- **delayed:** Avoid using jobId in order to schedule delayed jobs (#2587) (python) ([`228db2c`](https://github.com/taskforcesh/bullmq/commit/228db2c780a1ca8323900fc568156495a13355a3)) +* **delayed:** Avoid using jobId in order to schedule delayed jobs (#2587) (python) ([`228db2c`](https://github.com/taskforcesh/bullmq/commit/228db2c780a1ca8323900fc568156495a13355a3)) ### Performance - -- **delayed:** Keep moving delayed jobs to waiting when queue is paused (#2640) (python) ([`b89e2e0`](https://github.com/taskforcesh/bullmq/commit/b89e2e0913c0886561fc1c2470771232f17f5b3b)) +* **delayed:** Keep moving delayed jobs to waiting when queue is paused (#2640) (python) ([`b89e2e0`](https://github.com/taskforcesh/bullmq/commit/b89e2e0913c0886561fc1c2470771232f17f5b3b)) ## v2.8.0 (2024-07-10) - ### Feature - -- **queue:** Add getCountsPerPriority method [python] ([#2607](https://github.com/taskforcesh/bullmq/issues/2607)) ([`02b8338`](https://github.com/taskforcesh/bullmq/commit/02b83380334879cc2434043141566f2a375db958)) +* **queue:** Add getCountsPerPriority method [python] ([#2607](https://github.com/taskforcesh/bullmq/issues/2607)) ([`02b8338`](https://github.com/taskforcesh/bullmq/commit/02b83380334879cc2434043141566f2a375db958)) ### Fix - -- **parent:** Consider re-adding child that is in completed state using same jobIds (#2627) (python) fixes #2554 ([`00cd017`](https://github.com/taskforcesh/bullmq/commit/00cd0174539fbe1cc4628b9b6e1a7eb87a5ef705)) -- **priority:** Consider paused state when calling getCountsPerPriority (python) ([#2609](https://github.com/taskforcesh/bullmq/issues/2609)) ([`6e99250`](https://github.com/taskforcesh/bullmq/commit/6e992504b2a7a2fa76f1d04ad53d1512e98add7f)) -- **priority:** Use module instead of bit.band to keep order (python) ([#2597](https://github.com/taskforcesh/bullmq/issues/2597)) ([`9ece15b`](https://github.com/taskforcesh/bullmq/commit/9ece15b17420fe0bee948a5307e870915e3bce87)) +* **parent:** Consider re-adding child that is in completed state using same jobIds (#2627) (python) fixes #2554 ([`00cd017`](https://github.com/taskforcesh/bullmq/commit/00cd0174539fbe1cc4628b9b6e1a7eb87a5ef705)) +* **priority:** Consider paused state when calling getCountsPerPriority (python) ([#2609](https://github.com/taskforcesh/bullmq/issues/2609)) ([`6e99250`](https://github.com/taskforcesh/bullmq/commit/6e992504b2a7a2fa76f1d04ad53d1512e98add7f)) +* **priority:** Use module instead of bit.band to keep order (python) ([#2597](https://github.com/taskforcesh/bullmq/issues/2597)) ([`9ece15b`](https://github.com/taskforcesh/bullmq/commit/9ece15b17420fe0bee948a5307e870915e3bce87)) ## v2.7.8 (2024-06-05) - ### Fix - -- Remove print calls [python] ([#2579](https://github.com/taskforcesh/bullmq/issues/2579)) ([`f957186`](https://github.com/taskforcesh/bullmq/commit/f95718689864dbaca8a6b4113a6b37727919d6df)) +* Remove print calls [python] ([#2579](https://github.com/taskforcesh/bullmq/issues/2579)) ([`f957186`](https://github.com/taskforcesh/bullmq/commit/f95718689864dbaca8a6b4113a6b37727919d6df)) ## v2.7.7 (2024-06-04) - ### Fix - -- **retry-job:** Throw error when job is not in active state ([#2576](https://github.com/taskforcesh/bullmq/issues/2576)) ([`ca207f5`](https://github.com/taskforcesh/bullmq/commit/ca207f593d0ed455ecc59d9e0ef389a9a50d9634)) -- **job:** Validate job existence when adding a log ([#2562](https://github.com/taskforcesh/bullmq/issues/2562)) ([`f87e3fe`](https://github.com/taskforcesh/bullmq/commit/f87e3fe029e48d8964722da762326e531c2256ee)) +* **retry-job:** Throw error when job is not in active state ([#2576](https://github.com/taskforcesh/bullmq/issues/2576)) ([`ca207f5`](https://github.com/taskforcesh/bullmq/commit/ca207f593d0ed455ecc59d9e0ef389a9a50d9634)) +* **job:** Validate job existence when adding a log ([#2562](https://github.com/taskforcesh/bullmq/issues/2562)) ([`f87e3fe`](https://github.com/taskforcesh/bullmq/commit/f87e3fe029e48d8964722da762326e531c2256ee)) ### Performance - -- **job:** Set processedBy using hmset (#2592) (python) ([`238680b`](https://github.com/taskforcesh/bullmq/commit/238680b84593690a73d542dbe1120611c3508b47)) +* **job:** Set processedBy using hmset (#2592) (python) ([`238680b`](https://github.com/taskforcesh/bullmq/commit/238680b84593690a73d542dbe1120611c3508b47)) ## v2.7.6 (2024-05-09) - ### Fix - -- **connection:** Use async Retry (#2555) [python] ([`d6dd21d`](https://github.com/taskforcesh/bullmq/commit/d6dd21d3ac28660bbfa7825bba0b586328769709)) -- **worker:** Make sure clearTimeout is always called after bzpopmin ([`782382e`](https://github.com/taskforcesh/bullmq/commit/782382e599218024bb9912ff0572c4aa9b1f22a3)) -- **worker:** Force timeout on bzpopmin command ([#2543](https://github.com/taskforcesh/bullmq/issues/2543)) ([`ae7cb6c`](https://github.com/taskforcesh/bullmq/commit/ae7cb6caefdbfa5ca0d28589cef4b896ffcce2db)) +* **connection:** Use async Retry (#2555) [python] ([`d6dd21d`](https://github.com/taskforcesh/bullmq/commit/d6dd21d3ac28660bbfa7825bba0b586328769709)) +* **worker:** Make sure clearTimeout is always called after bzpopmin ([`782382e`](https://github.com/taskforcesh/bullmq/commit/782382e599218024bb9912ff0572c4aa9b1f22a3)) +* **worker:** Force timeout on bzpopmin command ([#2543](https://github.com/taskforcesh/bullmq/issues/2543)) ([`ae7cb6c`](https://github.com/taskforcesh/bullmq/commit/ae7cb6caefdbfa5ca0d28589cef4b896ffcce2db)) ### Documentation - -- **changelog:** Update bullmq-pro to v7.7.1 ([#2550](https://github.com/taskforcesh/bullmq/issues/2550)) ([`b37edfe`](https://github.com/taskforcesh/bullmq/commit/b37edfedb53ab8a6811dcd7d37c2e55e61ed65c3)) -- Update README.md ([`7ab5a9b`](https://github.com/taskforcesh/bullmq/commit/7ab5a9bff29af0eb6c3dc487f65f8b74f02b70f4)) +* **changelog:** Update bullmq-pro to v7.7.1 ([#2550](https://github.com/taskforcesh/bullmq/issues/2550)) ([`b37edfe`](https://github.com/taskforcesh/bullmq/commit/b37edfedb53ab8a6811dcd7d37c2e55e61ed65c3)) +* Update README.md ([`7ab5a9b`](https://github.com/taskforcesh/bullmq/commit/7ab5a9bff29af0eb6c3dc487f65f8b74f02b70f4)) ## v2.7.5 (2024-04-28) diff --git a/docs/gitbook/python/introduction.md b/docs/gitbook/python/introduction.md index 16b4d704bd..ce57106264 100644 --- a/docs/gitbook/python/introduction.md +++ b/docs/gitbook/python/introduction.md @@ -77,5 +77,5 @@ if __name__ == "__main__": ``` {% hint style="warning" %} -If Redis responses are in binary format, you should pass (decode_responses)[https://redis-py.readthedocs.io/en/latest/examples/connection_examples.html#By-default-Redis-return-binary-responses,-to-decode-them-use-decode_responses=True] option as _True_. +If Redis responses are in binary format, you should pass (decode_responses)[https://redis-py.readthedocs.io/en/latest/examples/connection_examples.html#By-default-Redis-return-binary-responses,-to-decode-them-use-decode_responses=True] option as *True*. {% endhint %} diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 483324c2ae..41097e966d 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,5 +1,5 @@ import { EventEmitter } from 'events'; -import { QueueBaseOptions, RedisClient, Span, Tracer } from '../interfaces'; +import { QueueBaseOptions, RedisClient } from '../interfaces'; import { MinimalQueue } from '../types'; import { delay, @@ -11,7 +11,6 @@ import { RedisConnection } from './redis-connection'; import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; -import { TelemetryAttributes } from '../enums'; /** * @class QueueBase @@ -31,13 +30,6 @@ export class QueueBase extends EventEmitter implements MinimalQueue { protected connection: RedisConnection; public readonly qualifiedName: string; - /** - * Instance of a telemetry client - * To use it create if statement in a method to observe with start and end of a span - * It will check if tracer is provided and if not it will continue as is - */ - private tracer: Tracer | undefined; - /** * * @param name - The name of the queue. @@ -84,8 +76,6 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.keys = queueKeys.getKeys(name); this.toKey = (type: string) => queueKeys.toKey(name, type); this.setScripts(); - - this.tracer = opts?.telemetry?.tracer; } /** @@ -185,28 +175,4 @@ export class QueueBase extends EventEmitter implements MinimalQueue { } } } - - protected trace( - getSpanName: () => string, - callback: (span?: Span) => Promise | T, - ) { - if (!this.tracer) { - return callback(); - } - - const span = this.tracer.startSpan(getSpanName()); - - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, - }); - - try { - return callback(span); - } catch (err) { - span.recordException(err as Error); - throw err; - } finally { - span.end(); - } - } } diff --git a/src/classes/queue.ts b/src/classes/queue.ts index e074296339..25f796af1d 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -12,7 +12,6 @@ import { Job } from './job'; import { QueueGetters } from './queue-getters'; import { Repeat } from './repeat'; import { RedisConnection } from './redis-connection'; -import { TelemetryAttributes } from '../enums'; export interface ObliterateOpts { /** @@ -221,50 +220,38 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { - return await this.trace>( - () => `${this.name}.${name} Queue.add`, - async span => { - if (opts && opts.repeat) { - if (opts.repeat.endDate) { - if (+new Date(opts.repeat.endDate) < Date.now()) { - throw new Error( - 'End date must be greater than current timestamp', - ); - } - } - - return (await this.repeat).updateRepeatableJob< - DataType, - ResultType, - NameType - >(name, data, { ...this.jobsOpts, ...opts }, { override: true }); - } else { - const jobId = opts?.jobId; - - if (jobId == '0' || jobId?.startsWith('0:')) { - throw new Error("JobId cannot be '0' or start with 0:"); - } - - const job = await this.Job.create( - this as MinimalQueue, - name, - data, - { - ...this.jobsOpts, - ...opts, - jobId, - }, - ); - this.emit('waiting', job); - - span?.setAttributes({ - [TelemetryAttributes.JobId]: job.id, - }); - - return job; + if (opts && opts.repeat) { + if (opts.repeat.endDate) { + if (+new Date(opts.repeat.endDate) < Date.now()) { + throw new Error('End date must be greater than current timestamp'); } - }, - ); + } + + return (await this.repeat).updateRepeatableJob< + DataType, + ResultType, + NameType + >(name, data, { ...this.jobsOpts, ...opts }, { override: true }); + } else { + const jobId = opts?.jobId; + + if (jobId == '0' || jobId?.startsWith('0:')) { + throw new Error("JobId cannot be '0' or start with 0:"); + } + + const job = await this.Job.create( + this as MinimalQueue, + name, + data, + { + ...this.jobsOpts, + ...opts, + jobId, + }, + ); + this.emit('waiting', job); + return job; + } } /** @@ -274,30 +261,20 @@ export class Queue< * @param jobs - The array of jobs to add to the queue. Each job is defined by 3 * properties, 'name', 'data' and 'opts'. They follow the same signature as 'Queue.add'. */ - async addBulk( + addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - return await this.trace[]>( - () => `${this.name} Queue.addBulk`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), - [TelemetryAttributes.BulkCount]: jobs.length, - }); - - return await this.Job.createBulk( - this as MinimalQueue, - jobs.map(job => ({ - name: job.name, - data: job.data, - opts: { - ...this.jobsOpts, - ...job.opts, - jobId: job.opts?.jobId, - }, - })), - ); - }, + return this.Job.createBulk( + this as MinimalQueue, + jobs.map(job => ({ + name: job.name, + data: job.data, + opts: { + ...this.jobsOpts, + ...job.opts, + jobId: job.opts?.jobId, + }, + })), ); } @@ -313,14 +290,8 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { - await this.trace( - () => `${this.name} Queue.pause`, - async () => { - await this.scripts.pause(true); - - this.emit('paused'); - }, - ); + await this.scripts.pause(true); + this.emit('paused'); } /** @@ -328,18 +299,12 @@ export class Queue< * */ async close(): Promise { - await this.trace( - () => `${this.name} Queue.close`, - async () => { - if (!this.closing) { - if (this._repeat) { - await this._repeat.close(); - } - } - - await super.close(); - }, - ); + if (!this.closing) { + if (this._repeat) { + await this._repeat.close(); + } + } + return super.close(); } /** * Resumes the processing of this queue globally. @@ -348,14 +313,8 @@ export class Queue< * queue. */ async resume(): Promise { - await this.trace( - () => `${this.name} Queue.resume`, - async () => { - await this.scripts.pause(false); - - this.emit('resumed'); - }, - ); + await this.scripts.pause(false); + this.emit('resumed'); } /** @@ -408,15 +367,10 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { - return await this.trace( - () => `${this.name} ${name} Queue.removeRepeatable`, - async () => { - const repeat = await this.repeat; - const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); + const repeat = await this.repeat; + const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); - return !removed; - }, - ); + return !removed; } /** @@ -425,14 +379,9 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { - return await this.trace( - () => `${this.name} ${id} Queue.removeDebounceKey`, - async () => { - const client = await this.client; + const client = await this.client; - return await client.del(`${this.keys.de}:${id}`); - }, - ); + return client.del(`${this.keys.de}:${id}`); } /** @@ -446,19 +395,10 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { - return await this.trace( - () => `${this.name} ${key} Queue.removeRepeatableByKey`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.JobKey]: key, - }); + const repeat = await this.repeat; + const removed = await repeat.removeRepeatableByKey(key); - const repeat = await this.repeat; - const removed = await repeat.removeRepeatableByKey(key); - - return !removed; - }, - ); + return !removed; } /** @@ -470,20 +410,8 @@ export class Queue< * @returns 1 if it managed to remove the job or 0 if the job or * any of its dependencies were locked. */ - async remove(jobId: string, { removeChildren = true } = {}): Promise { - return await this.trace( - () => `${this.name} ${jobId} Queue.remove`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.JobId]: jobId, - [TelemetryAttributes.JobOptions]: JSON.stringify({ - removeChildren, - }), - }); - - return await this.scripts.remove(jobId, removeChildren); - }, - ); + remove(jobId: string, { removeChildren = true } = {}): Promise { + return this.scripts.remove(jobId, removeChildren); } /** @@ -496,17 +424,7 @@ export class Queue< jobId: string, progress: number | object, ): Promise { - await this.trace( - () => `${this.name} Queue.updateJobProgress`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.JobId]: jobId, - [TelemetryAttributes.JobProgress]: JSON.stringify(progress), - }); - - await this.scripts.updateProgress(jobId, progress); - }, - ); + return this.scripts.updateProgress(jobId, progress); } /** @@ -533,17 +451,8 @@ export class Queue< * @param delayed - Pass true if it should also clean the * delayed jobs. */ - async drain(delayed = false): Promise { - await this.trace( - () => `${this.name} Queue.drain`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueDrainDelay]: delayed, - }); - - await this.scripts.drain(delayed); - }, - ); + drain(delayed = false): Promise { + return this.scripts.drain(delayed); } /** @@ -568,42 +477,28 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { - return await this.trace( - () => `${this.name} Queue.clean`, - async span => { - const maxCount = limit || Infinity; - const maxCountPerCall = Math.min(10000, maxCount); - const timestamp = Date.now() - grace; - let deletedCount = 0; - const deletedJobsIds: string[] = []; - - while (deletedCount < maxCount) { - const jobsIds = await this.scripts.cleanJobsInSet( - type, - timestamp, - maxCountPerCall, - ); - - this.emit('cleaned', jobsIds, type); - deletedCount += jobsIds.length; - deletedJobsIds.push(...jobsIds); - - if (jobsIds.length < maxCountPerCall) { - break; - } - } - - span?.setAttributes({ - [TelemetryAttributes.QueueGrace]: grace, - [TelemetryAttributes.JobType]: type, - [TelemetryAttributes.QueueCleanLimit]: maxCount, - [TelemetryAttributes.JobTimestamp]: timestamp, - [TelemetryAttributes.JobId]: deletedJobsIds, - }); - - return deletedJobsIds; - }, - ); + const maxCount = limit || Infinity; + const maxCountPerCall = Math.min(10000, maxCount); + const timestamp = Date.now() - grace; + let deletedCount = 0; + const deletedJobsIds: string[] = []; + + while (deletedCount < maxCount) { + const jobsIds = await this.scripts.cleanJobsInSet( + type, + timestamp, + maxCountPerCall, + ); + + this.emit('cleaned', jobsIds, type); + deletedCount += jobsIds.length; + deletedJobsIds.push(...jobsIds); + + if (jobsIds.length < maxCountPerCall) { + break; + } + } + return deletedJobsIds; } /** @@ -618,21 +513,16 @@ export class Queue< * @param opts - Obliterate options. */ async obliterate(opts?: ObliterateOpts): Promise { - await this.trace( - () => `${this.name} Queue.obliterate`, - async () => { - await this.pause(); - - let cursor = 0; - do { - cursor = await this.scripts.obliterate({ - force: false, - count: 1000, - ...opts, - }); - } while (cursor); - }, - ); + await this.pause(); + + let cursor = 0; + do { + cursor = await this.scripts.obliterate({ + force: false, + count: 1000, + ...opts, + }); + } while (cursor); } /** @@ -648,23 +538,14 @@ export class Queue< async retryJobs( opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { - await this.trace( - () => `${this.name} Queue.retryJobs`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), - }); - - let cursor = 0; - do { - cursor = await this.scripts.retryJobs( - opts.state, - opts.count, - opts.timestamp, - ); - } while (cursor); - }, - ); + let cursor = 0; + do { + cursor = await this.scripts.retryJobs( + opts.state, + opts.count, + opts.timestamp, + ); + } while (cursor); } /** @@ -676,19 +557,10 @@ export class Queue< * @returns */ async promoteJobs(opts: { count?: number } = {}): Promise { - await this.trace( - () => `${this.name} Queue.promoteJobs`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), - }); - - let cursor = 0; - do { - cursor = await this.scripts.promoteJobs(opts.count); - } while (cursor); - }, - ); + let cursor = 0; + do { + cursor = await this.scripts.promoteJobs(opts.count); + } while (cursor); } /** @@ -697,17 +569,8 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { - return await this.trace( - () => `${this.name} Queue.trimEvents`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.QueueEventMaxLength]: maxLength, - }); - - const client = await this.client; - return await client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); - }, - ); + const client = await this.client; + return client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); } /** diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 945b736d19..881a25ef9a 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -35,7 +35,6 @@ import { RATE_LIMIT_ERROR, WaitingChildrenError, } from './errors'; -import { TelemetryAttributes } from '../enums'; // 10 seconds is the maximum time a BRPOPLPUSH can block. const maximumBlockTimeout = 10; @@ -403,126 +402,116 @@ export class Worker< } async run() { - await this.trace( - () => `${this.name} ${this.id} Worker.run`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), - }); + if (!this.processFn) { + throw new Error('No process function is defined.'); + } - if (!this.processFn) { - throw new Error('No process function is defined.'); - } + if (this.running) { + throw new Error('Worker is already running.'); + } - if (this.running) { - throw new Error('Worker is already running.'); - } + try { + this.running = true; - try { - this.running = true; + if (this.closing) { + return; + } - if (this.closing) { - return; - } + await this.startStalledCheckTimer(); - await this.startStalledCheckTimer(); + const jobsInProgress = new Set<{ job: Job; ts: number }>(); + this.startLockExtenderTimer(jobsInProgress); - const jobsInProgress = new Set<{ job: Job; ts: number }>(); - this.startLockExtenderTimer(jobsInProgress); + const asyncFifoQueue = (this.asyncFifoQueue = + new AsyncFifoQueue>()); - const asyncFifoQueue = (this.asyncFifoQueue = - new AsyncFifoQueue>()); - - let tokenPostfix = 0; - - const client = await this.client; - const bclient = await this.blockingConnection.client; - - /** - * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue - * as efficiently as possible, providing concurrency and minimal unnecessary calls - * to Redis. - */ - while (!this.closing) { - let numTotal = asyncFifoQueue.numTotal(); - - /** - * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job - * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) - */ - while ( - !this.waiting && - numTotal < this.opts.concurrency && - (!this.limitUntil || numTotal == 0) - ) { - const token = `${this.id}:${tokenPostfix++}`; - - const fetchedJob = this.retryIfFailed>( - () => this._getNextJob(client, bclient, token, { block: true }), - this.opts.runRetryDelay, - ); - asyncFifoQueue.add(fetchedJob); - - numTotal = asyncFifoQueue.numTotal(); - - if (this.waiting && numTotal > 1) { - // We are waiting for jobs but we have others that we could start processing already - break; - } - - // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls - // to Redis in high concurrency scenarios. - const job = await fetchedJob; - - // No more jobs waiting but we have others that could start processing already - if (!job && numTotal > 1) { - break; - } - - // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting - // for processing this job. - if (this.blockUntil) { - break; - } - } + let tokenPostfix = 0; - // Since there can be undefined jobs in the queue (when a job fails or queue is empty) - // we iterate until we find a job. - let job: Job | void; - do { - job = await asyncFifoQueue.fetch(); - } while (!job && asyncFifoQueue.numQueued() > 0); - - if (job) { - const token = job.token; - asyncFifoQueue.add( - this.retryIfFailed>( - () => - this.processJob( - >job, - token, - () => asyncFifoQueue.numTotal() <= this.opts.concurrency, - jobsInProgress, - ), - this.opts.runRetryDelay, - ), - ); - } + const client = await this.client; + const bclient = await this.blockingConnection.client; + + /** + * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue + * as efficiently as possible, providing concurrency and minimal unnecessary calls + * to Redis. + */ + while (!this.closing) { + let numTotal = asyncFifoQueue.numTotal(); + + /** + * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job + * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) + */ + while ( + !this.waiting && + numTotal < this.opts.concurrency && + (!this.limitUntil || numTotal == 0) + ) { + const token = `${this.id}:${tokenPostfix++}`; + + const fetchedJob = this.retryIfFailed>( + () => this._getNextJob(client, bclient, token, { block: true }), + this.opts.runRetryDelay, + ); + asyncFifoQueue.add(fetchedJob); + + numTotal = asyncFifoQueue.numTotal(); + + if (this.waiting && numTotal > 1) { + // We are waiting for jobs but we have others that we could start processing already + break; + } + + // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls + // to Redis in high concurrency scenarios. + const job = await fetchedJob; + + // No more jobs waiting but we have others that could start processing already + if (!job && numTotal > 1) { + break; } - this.running = false; - return await asyncFifoQueue.waitAll(); - } catch (error) { - this.running = false; - throw error; + // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting + // for processing this job. + if (this.blockUntil) { + break; + } } - }, - ); + + // Since there can be undefined jobs in the queue (when a job fails or queue is empty) + // we iterate until we find a job. + let job: Job | void; + do { + job = await asyncFifoQueue.fetch(); + } while (!job && asyncFifoQueue.numQueued() > 0); + + if (job) { + const token = job.token; + asyncFifoQueue.add( + this.retryIfFailed>( + () => + this.processJob( + >job, + token, + () => asyncFifoQueue.numTotal() <= this.opts.concurrency, + jobsInProgress, + ), + this.opts.runRetryDelay, + ), + ); + } + } + + this.running = false; + return asyncFifoQueue.waitAll(); + } catch (error) { + this.running = false; + throw error; + } } /** @@ -531,25 +520,11 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - return await this.trace>( - () => `${this.name} ${this.id} Worker.getNextJob`, - async span => { - const nextJob = await this._getNextJob( - await this.client, - await this.blockingConnection.client, - token, - { block }, - ); - - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, - [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), - [TelemetryAttributes.JobId]: nextJob?.id, - }); - - return nextJob; - }, + return this._getNextJob( + await this.client, + await this.blockingConnection.client, + token, + { block }, ); } @@ -610,23 +585,13 @@ export class Worker< * @param expireTimeMs - expire time in ms of this rate limit. */ async rateLimit(expireTimeMs: number): Promise { - await this.trace( - () => `${this.name} ${this.id} Worker.rateLimit`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, - }); - - await this.client.then(client => - client.set( - this.keys.limiter, - Number.MAX_SAFE_INTEGER, - 'PX', - expireTimeMs, - ), - ); - }, + await this.client.then(client => + client.set( + this.keys.limiter, + Number.MAX_SAFE_INTEGER, + 'PX', + expireTimeMs, + ), ); } @@ -780,83 +745,72 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - return await this.trace>( - () => `${this.name} ${this.id} Worker.processJob`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, - [TelemetryAttributes.JobId]: job.id, - }); + if (!job || this.closing || this.paused) { + return; + } - if (!job || this.closing || this.paused) { - return; - } + const handleCompleted = async (result: ResultType) => { + if (!this.connection.closing) { + const completed = await job.moveToCompleted( + result, + token, + fetchNextCallback() && !(this.closing || this.paused), + ); + this.emit('completed', job, result, 'active'); + const [jobData, jobId, limitUntil, delayUntil] = completed || []; + this.updateDelays(limitUntil, delayUntil); - const handleCompleted = async (result: ResultType) => { - if (!this.connection.closing) { - const completed = await job.moveToCompleted( - result, - token, - fetchNextCallback() && !(this.closing || this.paused), - ); - this.emit('completed', job, result, 'active'); - const [jobData, jobId, limitUntil, delayUntil] = completed || []; - this.updateDelays(limitUntil, delayUntil); + return this.nextJobFromJobData(jobData, jobId, token); + } + }; - return this.nextJobFromJobData(jobData, jobId, token); + const handleFailed = async (err: Error) => { + if (!this.connection.closing) { + try { + if (err.message == RATE_LIMIT_ERROR) { + this.limitUntil = await this.moveLimitedBackToWait(job, token); + return; } - }; - const handleFailed = async (err: Error) => { - if (!this.connection.closing) { - try { - if (err.message == RATE_LIMIT_ERROR) { - this.limitUntil = await this.moveLimitedBackToWait(job, token); - return; - } - - if ( - err instanceof DelayedError || - err.name == 'DelayedError' || - err instanceof WaitingChildrenError || - err.name == 'WaitingChildrenError' - ) { - return; - } - - const result = await job.moveToFailed(err, token, true); - this.emit('failed', job, err, 'active'); - - if (result) { - const [jobData, jobId, limitUntil, delayUntil] = result; - this.updateDelays(limitUntil, delayUntil); - return this.nextJobFromJobData(jobData, jobId, token); - } - } catch (err) { - this.emit('error', err); - // It probably means that the job has lost the lock before completion - // A worker will (or already has) moved the job back - // to the waiting list (as stalled) - } + if ( + err instanceof DelayedError || + err.name == 'DelayedError' || + err instanceof WaitingChildrenError || + err.name == 'WaitingChildrenError' + ) { + return; } - }; - this.emit('active', job, 'waiting'); + const result = await job.moveToFailed(err, token, true); + this.emit('failed', job, err, 'active'); - const inProgressItem = { job, ts: Date.now() }; - - try { - jobsInProgress.add(inProgressItem); - const result = await this.callProcessJob(job, token); - return await handleCompleted(result); + if (result) { + const [jobData, jobId, limitUntil, delayUntil] = result; + this.updateDelays(limitUntil, delayUntil); + return this.nextJobFromJobData(jobData, jobId, token); + } } catch (err) { - return handleFailed(err); - } finally { - jobsInProgress.delete(inProgressItem); + this.emit('error', err); + // It probably means that the job has lost the lock before completion + // A worker will (or already has) moved the job back + // to the waiting list (as stalled) } - }, - ); + } + }; + + this.emit('active', job, 'waiting'); + + const inProgressItem = { job, ts: Date.now() }; + + try { + jobsInProgress.add(inProgressItem); + const result = await this.callProcessJob(job, token); + return await handleCompleted(result); + } catch (err) { + return handleFailed(err); + } finally { + jobsInProgress.delete(inProgressItem); + } } /** @@ -864,27 +818,17 @@ will never work with more accuracy than 1ms. */ * Pauses the processing of this queue only for this worker. */ async pause(doNotWaitActive?: boolean): Promise { - await this.trace( - () => `${this.name} ${this.id} Worker.pause`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, - }); - - if (!this.paused) { - this.paused = new Promise(resolve => { - this.resumeWorker = function () { - resolve(); - this.paused = null; // Allow pause to be checked externally for paused state. - this.resumeWorker = null; - }; - }); - await (!doNotWaitActive && this.whenCurrentJobsFinished()); - this.emit('paused'); - } - }, - ); + if (!this.paused) { + this.paused = new Promise(resolve => { + this.resumeWorker = function () { + resolve(); + this.paused = null; // Allow pause to be checked externally for paused state. + this.resumeWorker = null; + }; + }); + await (!doNotWaitActive && this.whenCurrentJobsFinished()); + this.emit('paused'); + } } /** @@ -892,19 +836,10 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { - this.trace( - () => `${this.name} ${this.id} Worker.resume`, - span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); - - if (this.resumeWorker) { - this.resumeWorker(); - this.emit('resumed'); - } - }, - ); + if (this.resumeWorker) { + this.resumeWorker(); + this.emit('resumed'); + } } /** @@ -938,53 +873,42 @@ will never work with more accuracy than 1ms. */ * * @returns Promise that resolves when the worker has been closed. */ - async close(force = false): Promise { - await this.trace( - () => `${this.name} ${this.id} Worker.close`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerForceClose]: force, - }); - - if (this.closing) { - return this.closing; + close(force = false): Promise { + if (this.closing) { + return this.closing; + } + this.closing = (async () => { + this.emit('closing', 'closing queue'); + this.abortDelayController?.abort(); + + this.resume(); + + // Define the async cleanup functions + const asyncCleanups = [ + () => { + return force || this.whenCurrentJobsFinished(false); + }, + () => this.childPool?.clean(), + () => this.blockingConnection.close(force), + () => this.connection.close(force), + ]; + + // Run cleanup functions sequentially and make sure all are run despite any errors + for (const cleanup of asyncCleanups) { + try { + await cleanup(); + } catch (err) { + this.emit('error', err); } - this.closing = (async () => { - this.emit('closing', 'closing queue'); - this.abortDelayController?.abort(); - - this.resume(); - - // Define the async cleanup functions - const asyncCleanups = [ - () => { - return force || this.whenCurrentJobsFinished(false); - }, - () => this.childPool?.clean(), - () => this.blockingConnection.close(force), - () => this.connection.close(force), - ]; - - // Run cleanup functions sequentially and make sure all are run despite any errors - for (const cleanup of asyncCleanups) { - try { - await cleanup(); - } catch (err) { - this.emit('error', err); - } - } - - clearTimeout(this.extendLocksTimer); - clearTimeout(this.stalledCheckTimer); + } - this.closed = true; - this.emit('closed'); - })(); + clearTimeout(this.extendLocksTimer); + clearTimeout(this.stalledCheckTimer); - return await this.closing; - }, - ); + this.closed = true; + this.emit('closed'); + })(); + return this.closing; } /** @@ -1000,31 +924,20 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { - await this.trace( - () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); + if (!this.opts.skipStalledCheck) { + clearTimeout(this.stalledCheckTimer); - if (!this.opts.skipStalledCheck) { - clearTimeout(this.stalledCheckTimer); - - if (!this.closing) { - try { - await this.checkConnectionError(() => - this.moveStalledJobsToWait(), - ); - this.stalledCheckTimer = setTimeout(async () => { - await this.startStalledCheckTimer(); - }, this.opts.stalledInterval); - } catch (err) { - this.emit('error', err); - } - } + if (!this.closing) { + try { + await this.checkConnectionError(() => this.moveStalledJobsToWait()); + this.stalledCheckTimer = setTimeout(async () => { + await this.startStalledCheckTimer(); + }, this.opts.stalledInterval); + } catch (err) { + this.emit('error', err); } - }, - ); + } + } } private startLockExtenderTimer( @@ -1106,78 +1019,54 @@ will never work with more accuracy than 1ms. */ } protected async extendLocks(jobs: Job[]) { - await this.trace( - () => `${this.name} ${this.id} Worker.extendLocks`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerJobsToExtendLocks]: jobs.map( - job => job.id, - ), - }); - - try { - const pipeline = (await this.client).pipeline(); - for (const job of jobs) { - await this.scripts.extendLock( - job.id, - job.token, - this.opts.lockDuration, - pipeline, - ); - } - const result = (await pipeline.exec()) as [Error, string][]; - - for (const [err, jobId] of result) { - if (err) { - // TODO: signal process function that the job has been lost. - this.emit( - 'error', - new Error(`could not renew lock for job ${jobId}`), - ); - } - } - } catch (err) { - this.emit('error', err); + try { + const pipeline = (await this.client).pipeline(); + for (const job of jobs) { + await this.scripts.extendLock( + job.id, + job.token, + this.opts.lockDuration, + pipeline, + ); + } + const result = (await pipeline.exec()) as [Error, string][]; + + for (const [err, jobId] of result) { + if (err) { + // TODO: signal process function that the job has been lost. + this.emit( + 'error', + new Error(`could not renew lock for job ${jobId}`), + ); } - }, - ); + } + } catch (err) { + this.emit('error', err); + } } private async moveStalledJobsToWait() { - await this.trace( - () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, - async span => { - const chunkSize = 50; - const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); - - stalled.forEach((jobId: string) => - this.emit('stalled', jobId, 'active'), - ); + const chunkSize = 50; + const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); - const jobPromises: Promise>[] = []; - for (let i = 0; i < failed.length; i++) { - jobPromises.push( - Job.fromId( - this as MinimalQueue, - failed[i], - ), - ); + stalled.forEach((jobId: string) => this.emit('stalled', jobId, 'active')); - if ((i + 1) % chunkSize === 0) { - this.notifyFailedJobs(await Promise.all(jobPromises)); - jobPromises.length = 0; - } - } + const jobPromises: Promise>[] = []; + for (let i = 0; i < failed.length; i++) { + jobPromises.push( + Job.fromId( + this as MinimalQueue, + failed[i], + ), + ); + if ((i + 1) % chunkSize === 0) { this.notifyFailedJobs(await Promise.all(jobPromises)); + jobPromises.length = 0; + } + } - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerStalledJobs]: stalled, - }); - }, - ); + this.notifyFailedJobs(await Promise.all(jobPromises)); } private notifyFailedJobs(failedJobs: Job[]) { diff --git a/src/enums/index.ts b/src/enums/index.ts index 3cab38de6c..d6bae934f4 100644 --- a/src/enums/index.ts +++ b/src/enums/index.ts @@ -2,4 +2,3 @@ export * from './child-command'; export * from './error-code'; export * from './parent-command'; export * from './metrics-time'; -export * from './telemetry-attributes'; diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts deleted file mode 100644 index 78c80ff658..0000000000 --- a/src/enums/telemetry-attributes.ts +++ /dev/null @@ -1,25 +0,0 @@ -export enum TelemetryAttributes { - QueueName = 'bullmq.queue.name', - BulkCount = 'bullmq.job.bulk.count', - BulkNames = 'bullmq.job.bulk.names', - JobName = 'bullmq.job.name', - JobId = 'bullmq.job.id', - JobKey = 'bullmq.job.key', - JobOptions = 'bullmq.job.options', - JobProgress = 'bullmq.job.progress', - QueueDrainDelay = 'bullmq.queue.drain.delay', - QueueGrace = 'bullmq.queue.grace', - QueueCleanLimit = 'bullmq.queue.clean.limit', - JobType = 'bullmq.job.type', - JobTimestamp = 'bullmq.job.timestamp', - QueueOptions = 'bullmq.queue.options', - QueueEventMaxLength = 'bullmq.queue.event.max.length', - WorkerOptions = 'bullmq.worker.options', - WorkerToken = 'bullmq.worker.token', - WorkerId = 'bullmq.worker.id', - WorkerRateLimit = 'bullmq.worker.rate.limit', - WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', - WorkerForceClose = 'bullmq.worker.force.close', - WorkerStalledJobs = 'bullmq.worker.stalled.jobs', - WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', -} diff --git a/src/interfaces/index.ts b/src/interfaces/index.ts index 0d8b4e96d2..65dfed0396 100644 --- a/src/interfaces/index.ts +++ b/src/interfaces/index.ts @@ -23,4 +23,3 @@ export * from './repeat-options'; export * from './sandboxed-job-processor'; export * from './sandboxed-job'; export * from './worker-options'; -export * from './telemetry'; diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index 5954df4617..c8f00bd78c 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -1,7 +1,6 @@ import { AdvancedRepeatOptions } from './advanced-options'; import { DefaultJobOptions } from './base-job-options'; import { ConnectionOptions } from './redis-options'; -import { Telemetry } from './telemetry'; export enum ClientType { blocking = 'blocking', @@ -32,11 +31,6 @@ export interface QueueBaseOptions { * @defaultValue false */ skipVersionCheck?: boolean; - - /** - * Telemetry client - */ - telemetry?: Telemetry; } /** @@ -61,11 +55,6 @@ export interface QueueOptions extends QueueBaseOptions { }; settings?: AdvancedRepeatOptions; - - /** - * Telemetry client - */ - telemetry?: Telemetry; } /** diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts deleted file mode 100644 index 5c78f79460..0000000000 --- a/src/interfaces/telemetry.ts +++ /dev/null @@ -1,55 +0,0 @@ -export interface Telemetry { - tracer: Tracer; -} - -export interface Tracer { - startSpan(name: string): Span; -} - -export interface Span { - setAttribute(key: string, value: Attribute): Span; - setAttributes(attributes: Attributes): Span; - recordException(exception: Exception, time?: Time): void; - end(): void; -} - -export interface Attributes { - [attribute: string]: Attribute | undefined; -} - -export type Attribute = - | string - | number - | boolean - | null - | undefined - | (null | undefined | string | number | boolean)[]; - -export type Exception = string | ExceptionType; - -export type ExceptionType = CodeException | MessageException | NameException; - -interface CodeException { - code: string | number; - name?: string; - message?: string; - stack?: string; -} - -interface MessageException { - code?: string | number; - name?: string; - message: string; - stack?: string; -} - -interface NameException { - code?: string | number; - name: string; - message?: string; - stack?: string; -} - -export type Time = HighResolutionTime | number | Date; - -type HighResolutionTime = [number, number]; diff --git a/src/interfaces/worker-options.ts b/src/interfaces/worker-options.ts index 39ba799a16..77a204a23f 100644 --- a/src/interfaces/worker-options.ts +++ b/src/interfaces/worker-options.ts @@ -4,7 +4,6 @@ import { QueueBaseOptions } from './queue-options'; import { RateLimiterOptions } from './rate-limiter-options'; import { MetricsOptions } from './metrics-options'; import { KeepJobs } from './keep-jobs'; -import { Telemetry } from './telemetry'; /** * An async function that receives `Job`s and handles them. @@ -145,11 +144,6 @@ export interface WorkerOptions extends QueueBaseOptions { * @default false */ useWorkerThreads?: boolean; - - /** - * Telemetry client - */ - telemetry?: Telemetry; } export interface GetNextJobOptions { From 059d50bdadf043dd27ec0c49db2d653fa66f4ad6 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Thu, 5 Sep 2024 12:22:02 +0200 Subject: [PATCH 09/26] fix(documentation): fix documentation formatting --- src/classes/queue-base.ts | 36 +- src/classes/queue.ts | 361 +++++++++++------ src/classes/worker.ts | 643 ++++++++++++++++++------------ src/enums/index.ts | 1 + src/enums/telemetry-attributes.ts | 25 ++ src/interfaces/index.ts | 1 + src/interfaces/queue-options.ts | 11 + src/interfaces/telemetry.ts | 55 +++ src/interfaces/worker-options.ts | 6 + 9 files changed, 760 insertions(+), 379 deletions(-) create mode 100644 src/enums/telemetry-attributes.ts create mode 100644 src/interfaces/telemetry.ts diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 41097e966d..483324c2ae 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,5 +1,5 @@ import { EventEmitter } from 'events'; -import { QueueBaseOptions, RedisClient } from '../interfaces'; +import { QueueBaseOptions, RedisClient, Span, Tracer } from '../interfaces'; import { MinimalQueue } from '../types'; import { delay, @@ -11,6 +11,7 @@ import { RedisConnection } from './redis-connection'; import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; +import { TelemetryAttributes } from '../enums'; /** * @class QueueBase @@ -30,6 +31,13 @@ export class QueueBase extends EventEmitter implements MinimalQueue { protected connection: RedisConnection; public readonly qualifiedName: string; + /** + * Instance of a telemetry client + * To use it create if statement in a method to observe with start and end of a span + * It will check if tracer is provided and if not it will continue as is + */ + private tracer: Tracer | undefined; + /** * * @param name - The name of the queue. @@ -76,6 +84,8 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.keys = queueKeys.getKeys(name); this.toKey = (type: string) => queueKeys.toKey(name, type); this.setScripts(); + + this.tracer = opts?.telemetry?.tracer; } /** @@ -175,4 +185,28 @@ export class QueueBase extends EventEmitter implements MinimalQueue { } } } + + protected trace( + getSpanName: () => string, + callback: (span?: Span) => Promise | T, + ) { + if (!this.tracer) { + return callback(); + } + + const span = this.tracer.startSpan(getSpanName()); + + span.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); + + try { + return callback(span); + } catch (err) { + span.recordException(err as Error); + throw err; + } finally { + span.end(); + } + } } diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 25f796af1d..e074296339 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -12,6 +12,7 @@ import { Job } from './job'; import { QueueGetters } from './queue-getters'; import { Repeat } from './repeat'; import { RedisConnection } from './redis-connection'; +import { TelemetryAttributes } from '../enums'; export interface ObliterateOpts { /** @@ -220,38 +221,50 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { - if (opts && opts.repeat) { - if (opts.repeat.endDate) { - if (+new Date(opts.repeat.endDate) < Date.now()) { - throw new Error('End date must be greater than current timestamp'); + return await this.trace>( + () => `${this.name}.${name} Queue.add`, + async span => { + if (opts && opts.repeat) { + if (opts.repeat.endDate) { + if (+new Date(opts.repeat.endDate) < Date.now()) { + throw new Error( + 'End date must be greater than current timestamp', + ); + } + } + + return (await this.repeat).updateRepeatableJob< + DataType, + ResultType, + NameType + >(name, data, { ...this.jobsOpts, ...opts }, { override: true }); + } else { + const jobId = opts?.jobId; + + if (jobId == '0' || jobId?.startsWith('0:')) { + throw new Error("JobId cannot be '0' or start with 0:"); + } + + const job = await this.Job.create( + this as MinimalQueue, + name, + data, + { + ...this.jobsOpts, + ...opts, + jobId, + }, + ); + this.emit('waiting', job); + + span?.setAttributes({ + [TelemetryAttributes.JobId]: job.id, + }); + + return job; } - } - - return (await this.repeat).updateRepeatableJob< - DataType, - ResultType, - NameType - >(name, data, { ...this.jobsOpts, ...opts }, { override: true }); - } else { - const jobId = opts?.jobId; - - if (jobId == '0' || jobId?.startsWith('0:')) { - throw new Error("JobId cannot be '0' or start with 0:"); - } - - const job = await this.Job.create( - this as MinimalQueue, - name, - data, - { - ...this.jobsOpts, - ...opts, - jobId, - }, - ); - this.emit('waiting', job); - return job; - } + }, + ); } /** @@ -261,20 +274,30 @@ export class Queue< * @param jobs - The array of jobs to add to the queue. Each job is defined by 3 * properties, 'name', 'data' and 'opts'. They follow the same signature as 'Queue.add'. */ - addBulk( + async addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - return this.Job.createBulk( - this as MinimalQueue, - jobs.map(job => ({ - name: job.name, - data: job.data, - opts: { - ...this.jobsOpts, - ...job.opts, - jobId: job.opts?.jobId, - }, - })), + return await this.trace[]>( + () => `${this.name} Queue.addBulk`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), + [TelemetryAttributes.BulkCount]: jobs.length, + }); + + return await this.Job.createBulk( + this as MinimalQueue, + jobs.map(job => ({ + name: job.name, + data: job.data, + opts: { + ...this.jobsOpts, + ...job.opts, + jobId: job.opts?.jobId, + }, + })), + ); + }, ); } @@ -290,8 +313,14 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { - await this.scripts.pause(true); - this.emit('paused'); + await this.trace( + () => `${this.name} Queue.pause`, + async () => { + await this.scripts.pause(true); + + this.emit('paused'); + }, + ); } /** @@ -299,12 +328,18 @@ export class Queue< * */ async close(): Promise { - if (!this.closing) { - if (this._repeat) { - await this._repeat.close(); - } - } - return super.close(); + await this.trace( + () => `${this.name} Queue.close`, + async () => { + if (!this.closing) { + if (this._repeat) { + await this._repeat.close(); + } + } + + await super.close(); + }, + ); } /** * Resumes the processing of this queue globally. @@ -313,8 +348,14 @@ export class Queue< * queue. */ async resume(): Promise { - await this.scripts.pause(false); - this.emit('resumed'); + await this.trace( + () => `${this.name} Queue.resume`, + async () => { + await this.scripts.pause(false); + + this.emit('resumed'); + }, + ); } /** @@ -367,10 +408,15 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { - const repeat = await this.repeat; - const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); + return await this.trace( + () => `${this.name} ${name} Queue.removeRepeatable`, + async () => { + const repeat = await this.repeat; + const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); - return !removed; + return !removed; + }, + ); } /** @@ -379,9 +425,14 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { - const client = await this.client; + return await this.trace( + () => `${this.name} ${id} Queue.removeDebounceKey`, + async () => { + const client = await this.client; - return client.del(`${this.keys.de}:${id}`); + return await client.del(`${this.keys.de}:${id}`); + }, + ); } /** @@ -395,10 +446,19 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { - const repeat = await this.repeat; - const removed = await repeat.removeRepeatableByKey(key); + return await this.trace( + () => `${this.name} ${key} Queue.removeRepeatableByKey`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.JobKey]: key, + }); - return !removed; + const repeat = await this.repeat; + const removed = await repeat.removeRepeatableByKey(key); + + return !removed; + }, + ); } /** @@ -410,8 +470,20 @@ export class Queue< * @returns 1 if it managed to remove the job or 0 if the job or * any of its dependencies were locked. */ - remove(jobId: string, { removeChildren = true } = {}): Promise { - return this.scripts.remove(jobId, removeChildren); + async remove(jobId: string, { removeChildren = true } = {}): Promise { + return await this.trace( + () => `${this.name} ${jobId} Queue.remove`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobOptions]: JSON.stringify({ + removeChildren, + }), + }); + + return await this.scripts.remove(jobId, removeChildren); + }, + ); } /** @@ -424,7 +496,17 @@ export class Queue< jobId: string, progress: number | object, ): Promise { - return this.scripts.updateProgress(jobId, progress); + await this.trace( + () => `${this.name} Queue.updateJobProgress`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.JobId]: jobId, + [TelemetryAttributes.JobProgress]: JSON.stringify(progress), + }); + + await this.scripts.updateProgress(jobId, progress); + }, + ); } /** @@ -451,8 +533,17 @@ export class Queue< * @param delayed - Pass true if it should also clean the * delayed jobs. */ - drain(delayed = false): Promise { - return this.scripts.drain(delayed); + async drain(delayed = false): Promise { + await this.trace( + () => `${this.name} Queue.drain`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueDrainDelay]: delayed, + }); + + await this.scripts.drain(delayed); + }, + ); } /** @@ -477,28 +568,42 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { - const maxCount = limit || Infinity; - const maxCountPerCall = Math.min(10000, maxCount); - const timestamp = Date.now() - grace; - let deletedCount = 0; - const deletedJobsIds: string[] = []; - - while (deletedCount < maxCount) { - const jobsIds = await this.scripts.cleanJobsInSet( - type, - timestamp, - maxCountPerCall, - ); - - this.emit('cleaned', jobsIds, type); - deletedCount += jobsIds.length; - deletedJobsIds.push(...jobsIds); - - if (jobsIds.length < maxCountPerCall) { - break; - } - } - return deletedJobsIds; + return await this.trace( + () => `${this.name} Queue.clean`, + async span => { + const maxCount = limit || Infinity; + const maxCountPerCall = Math.min(10000, maxCount); + const timestamp = Date.now() - grace; + let deletedCount = 0; + const deletedJobsIds: string[] = []; + + while (deletedCount < maxCount) { + const jobsIds = await this.scripts.cleanJobsInSet( + type, + timestamp, + maxCountPerCall, + ); + + this.emit('cleaned', jobsIds, type); + deletedCount += jobsIds.length; + deletedJobsIds.push(...jobsIds); + + if (jobsIds.length < maxCountPerCall) { + break; + } + } + + span?.setAttributes({ + [TelemetryAttributes.QueueGrace]: grace, + [TelemetryAttributes.JobType]: type, + [TelemetryAttributes.QueueCleanLimit]: maxCount, + [TelemetryAttributes.JobTimestamp]: timestamp, + [TelemetryAttributes.JobId]: deletedJobsIds, + }); + + return deletedJobsIds; + }, + ); } /** @@ -513,16 +618,21 @@ export class Queue< * @param opts - Obliterate options. */ async obliterate(opts?: ObliterateOpts): Promise { - await this.pause(); - - let cursor = 0; - do { - cursor = await this.scripts.obliterate({ - force: false, - count: 1000, - ...opts, - }); - } while (cursor); + await this.trace( + () => `${this.name} Queue.obliterate`, + async () => { + await this.pause(); + + let cursor = 0; + do { + cursor = await this.scripts.obliterate({ + force: false, + count: 1000, + ...opts, + }); + } while (cursor); + }, + ); } /** @@ -538,14 +648,23 @@ export class Queue< async retryJobs( opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { - let cursor = 0; - do { - cursor = await this.scripts.retryJobs( - opts.state, - opts.count, - opts.timestamp, - ); - } while (cursor); + await this.trace( + () => `${this.name} Queue.retryJobs`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); + + let cursor = 0; + do { + cursor = await this.scripts.retryJobs( + opts.state, + opts.count, + opts.timestamp, + ); + } while (cursor); + }, + ); } /** @@ -557,10 +676,19 @@ export class Queue< * @returns */ async promoteJobs(opts: { count?: number } = {}): Promise { - let cursor = 0; - do { - cursor = await this.scripts.promoteJobs(opts.count); - } while (cursor); + await this.trace( + () => `${this.name} Queue.promoteJobs`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), + }); + + let cursor = 0; + do { + cursor = await this.scripts.promoteJobs(opts.count); + } while (cursor); + }, + ); } /** @@ -569,8 +697,17 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { - const client = await this.client; - return client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); + return await this.trace( + () => `${this.name} Queue.trimEvents`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.QueueEventMaxLength]: maxLength, + }); + + const client = await this.client; + return await client.xtrim(this.keys.events, 'MAXLEN', '~', maxLength); + }, + ); } /** diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 881a25ef9a..945b736d19 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -35,6 +35,7 @@ import { RATE_LIMIT_ERROR, WaitingChildrenError, } from './errors'; +import { TelemetryAttributes } from '../enums'; // 10 seconds is the maximum time a BRPOPLPUSH can block. const maximumBlockTimeout = 10; @@ -402,116 +403,126 @@ export class Worker< } async run() { - if (!this.processFn) { - throw new Error('No process function is defined.'); - } - - if (this.running) { - throw new Error('Worker is already running.'); - } - - try { - this.running = true; - - if (this.closing) { - return; - } - - await this.startStalledCheckTimer(); - - const jobsInProgress = new Set<{ job: Job; ts: number }>(); - this.startLockExtenderTimer(jobsInProgress); - - const asyncFifoQueue = (this.asyncFifoQueue = - new AsyncFifoQueue>()); - - let tokenPostfix = 0; - - const client = await this.client; - const bclient = await this.blockingConnection.client; + await this.trace( + () => `${this.name} ${this.id} Worker.run`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), + }); - /** - * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue - * as efficiently as possible, providing concurrency and minimal unnecessary calls - * to Redis. - */ - while (!this.closing) { - let numTotal = asyncFifoQueue.numTotal(); + if (!this.processFn) { + throw new Error('No process function is defined.'); + } - /** - * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job - * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) - */ - while ( - !this.waiting && - numTotal < this.opts.concurrency && - (!this.limitUntil || numTotal == 0) - ) { - const token = `${this.id}:${tokenPostfix++}`; - - const fetchedJob = this.retryIfFailed>( - () => this._getNextJob(client, bclient, token, { block: true }), - this.opts.runRetryDelay, - ); - asyncFifoQueue.add(fetchedJob); + if (this.running) { + throw new Error('Worker is already running.'); + } - numTotal = asyncFifoQueue.numTotal(); + try { + this.running = true; - if (this.waiting && numTotal > 1) { - // We are waiting for jobs but we have others that we could start processing already - break; + if (this.closing) { + return; } - // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls - // to Redis in high concurrency scenarios. - const job = await fetchedJob; + await this.startStalledCheckTimer(); - // No more jobs waiting but we have others that could start processing already - if (!job && numTotal > 1) { - break; - } + const jobsInProgress = new Set<{ job: Job; ts: number }>(); + this.startLockExtenderTimer(jobsInProgress); - // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting - // for processing this job. - if (this.blockUntil) { - break; - } - } + const asyncFifoQueue = (this.asyncFifoQueue = + new AsyncFifoQueue>()); + + let tokenPostfix = 0; + + const client = await this.client; + const bclient = await this.blockingConnection.client; + + /** + * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue + * as efficiently as possible, providing concurrency and minimal unnecessary calls + * to Redis. + */ + while (!this.closing) { + let numTotal = asyncFifoQueue.numTotal(); + + /** + * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job + * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) + */ + while ( + !this.waiting && + numTotal < this.opts.concurrency && + (!this.limitUntil || numTotal == 0) + ) { + const token = `${this.id}:${tokenPostfix++}`; + + const fetchedJob = this.retryIfFailed>( + () => this._getNextJob(client, bclient, token, { block: true }), + this.opts.runRetryDelay, + ); + asyncFifoQueue.add(fetchedJob); + + numTotal = asyncFifoQueue.numTotal(); + + if (this.waiting && numTotal > 1) { + // We are waiting for jobs but we have others that we could start processing already + break; + } + + // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls + // to Redis in high concurrency scenarios. + const job = await fetchedJob; + + // No more jobs waiting but we have others that could start processing already + if (!job && numTotal > 1) { + break; + } + + // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting + // for processing this job. + if (this.blockUntil) { + break; + } + } - // Since there can be undefined jobs in the queue (when a job fails or queue is empty) - // we iterate until we find a job. - let job: Job | void; - do { - job = await asyncFifoQueue.fetch(); - } while (!job && asyncFifoQueue.numQueued() > 0); - - if (job) { - const token = job.token; - asyncFifoQueue.add( - this.retryIfFailed>( - () => - this.processJob( - >job, - token, - () => asyncFifoQueue.numTotal() <= this.opts.concurrency, - jobsInProgress, + // Since there can be undefined jobs in the queue (when a job fails or queue is empty) + // we iterate until we find a job. + let job: Job | void; + do { + job = await asyncFifoQueue.fetch(); + } while (!job && asyncFifoQueue.numQueued() > 0); + + if (job) { + const token = job.token; + asyncFifoQueue.add( + this.retryIfFailed>( + () => + this.processJob( + >job, + token, + () => asyncFifoQueue.numTotal() <= this.opts.concurrency, + jobsInProgress, + ), + this.opts.runRetryDelay, ), - this.opts.runRetryDelay, - ), - ); - } - } + ); + } + } - this.running = false; - return asyncFifoQueue.waitAll(); - } catch (error) { - this.running = false; - throw error; - } + this.running = false; + return await asyncFifoQueue.waitAll(); + } catch (error) { + this.running = false; + throw error; + } + }, + ); } /** @@ -520,11 +531,25 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - return this._getNextJob( - await this.client, - await this.blockingConnection.client, - token, - { block }, + return await this.trace>( + () => `${this.name} ${this.id} Worker.getNextJob`, + async span => { + const nextJob = await this._getNextJob( + await this.client, + await this.blockingConnection.client, + token, + { block }, + ); + + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), + [TelemetryAttributes.JobId]: nextJob?.id, + }); + + return nextJob; + }, ); } @@ -585,13 +610,23 @@ export class Worker< * @param expireTimeMs - expire time in ms of this rate limit. */ async rateLimit(expireTimeMs: number): Promise { - await this.client.then(client => - client.set( - this.keys.limiter, - Number.MAX_SAFE_INTEGER, - 'PX', - expireTimeMs, - ), + await this.trace( + () => `${this.name} ${this.id} Worker.rateLimit`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerRateLimit]: expireTimeMs, + }); + + await this.client.then(client => + client.set( + this.keys.limiter, + Number.MAX_SAFE_INTEGER, + 'PX', + expireTimeMs, + ), + ); + }, ); } @@ -745,72 +780,83 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - if (!job || this.closing || this.paused) { - return; - } + return await this.trace>( + () => `${this.name} ${this.id} Worker.processJob`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.JobId]: job.id, + }); - const handleCompleted = async (result: ResultType) => { - if (!this.connection.closing) { - const completed = await job.moveToCompleted( - result, - token, - fetchNextCallback() && !(this.closing || this.paused), - ); - this.emit('completed', job, result, 'active'); - const [jobData, jobId, limitUntil, delayUntil] = completed || []; - this.updateDelays(limitUntil, delayUntil); + if (!job || this.closing || this.paused) { + return; + } - return this.nextJobFromJobData(jobData, jobId, token); - } - }; + const handleCompleted = async (result: ResultType) => { + if (!this.connection.closing) { + const completed = await job.moveToCompleted( + result, + token, + fetchNextCallback() && !(this.closing || this.paused), + ); + this.emit('completed', job, result, 'active'); + const [jobData, jobId, limitUntil, delayUntil] = completed || []; + this.updateDelays(limitUntil, delayUntil); - const handleFailed = async (err: Error) => { - if (!this.connection.closing) { - try { - if (err.message == RATE_LIMIT_ERROR) { - this.limitUntil = await this.moveLimitedBackToWait(job, token); - return; + return this.nextJobFromJobData(jobData, jobId, token); } + }; - if ( - err instanceof DelayedError || - err.name == 'DelayedError' || - err instanceof WaitingChildrenError || - err.name == 'WaitingChildrenError' - ) { - return; + const handleFailed = async (err: Error) => { + if (!this.connection.closing) { + try { + if (err.message == RATE_LIMIT_ERROR) { + this.limitUntil = await this.moveLimitedBackToWait(job, token); + return; + } + + if ( + err instanceof DelayedError || + err.name == 'DelayedError' || + err instanceof WaitingChildrenError || + err.name == 'WaitingChildrenError' + ) { + return; + } + + const result = await job.moveToFailed(err, token, true); + this.emit('failed', job, err, 'active'); + + if (result) { + const [jobData, jobId, limitUntil, delayUntil] = result; + this.updateDelays(limitUntil, delayUntil); + return this.nextJobFromJobData(jobData, jobId, token); + } + } catch (err) { + this.emit('error', err); + // It probably means that the job has lost the lock before completion + // A worker will (or already has) moved the job back + // to the waiting list (as stalled) + } } + }; - const result = await job.moveToFailed(err, token, true); - this.emit('failed', job, err, 'active'); + this.emit('active', job, 'waiting'); - if (result) { - const [jobData, jobId, limitUntil, delayUntil] = result; - this.updateDelays(limitUntil, delayUntil); - return this.nextJobFromJobData(jobData, jobId, token); - } + const inProgressItem = { job, ts: Date.now() }; + + try { + jobsInProgress.add(inProgressItem); + const result = await this.callProcessJob(job, token); + return await handleCompleted(result); } catch (err) { - this.emit('error', err); - // It probably means that the job has lost the lock before completion - // A worker will (or already has) moved the job back - // to the waiting list (as stalled) + return handleFailed(err); + } finally { + jobsInProgress.delete(inProgressItem); } - } - }; - - this.emit('active', job, 'waiting'); - - const inProgressItem = { job, ts: Date.now() }; - - try { - jobsInProgress.add(inProgressItem); - const result = await this.callProcessJob(job, token); - return await handleCompleted(result); - } catch (err) { - return handleFailed(err); - } finally { - jobsInProgress.delete(inProgressItem); - } + }, + ); } /** @@ -818,17 +864,27 @@ will never work with more accuracy than 1ms. */ * Pauses the processing of this queue only for this worker. */ async pause(doNotWaitActive?: boolean): Promise { - if (!this.paused) { - this.paused = new Promise(resolve => { - this.resumeWorker = function () { - resolve(); - this.paused = null; // Allow pause to be checked externally for paused state. - this.resumeWorker = null; - }; - }); - await (!doNotWaitActive && this.whenCurrentJobsFinished()); - this.emit('paused'); - } + await this.trace( + () => `${this.name} ${this.id} Worker.pause`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, + }); + + if (!this.paused) { + this.paused = new Promise(resolve => { + this.resumeWorker = function () { + resolve(); + this.paused = null; // Allow pause to be checked externally for paused state. + this.resumeWorker = null; + }; + }); + await (!doNotWaitActive && this.whenCurrentJobsFinished()); + this.emit('paused'); + } + }, + ); } /** @@ -836,10 +892,19 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { - if (this.resumeWorker) { - this.resumeWorker(); - this.emit('resumed'); - } + this.trace( + () => `${this.name} ${this.id} Worker.resume`, + span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + }); + + if (this.resumeWorker) { + this.resumeWorker(); + this.emit('resumed'); + } + }, + ); } /** @@ -873,42 +938,53 @@ will never work with more accuracy than 1ms. */ * * @returns Promise that resolves when the worker has been closed. */ - close(force = false): Promise { - if (this.closing) { - return this.closing; - } - this.closing = (async () => { - this.emit('closing', 'closing queue'); - this.abortDelayController?.abort(); - - this.resume(); - - // Define the async cleanup functions - const asyncCleanups = [ - () => { - return force || this.whenCurrentJobsFinished(false); - }, - () => this.childPool?.clean(), - () => this.blockingConnection.close(force), - () => this.connection.close(force), - ]; - - // Run cleanup functions sequentially and make sure all are run despite any errors - for (const cleanup of asyncCleanups) { - try { - await cleanup(); - } catch (err) { - this.emit('error', err); + async close(force = false): Promise { + await this.trace( + () => `${this.name} ${this.id} Worker.close`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerForceClose]: force, + }); + + if (this.closing) { + return this.closing; } - } + this.closing = (async () => { + this.emit('closing', 'closing queue'); + this.abortDelayController?.abort(); + + this.resume(); + + // Define the async cleanup functions + const asyncCleanups = [ + () => { + return force || this.whenCurrentJobsFinished(false); + }, + () => this.childPool?.clean(), + () => this.blockingConnection.close(force), + () => this.connection.close(force), + ]; + + // Run cleanup functions sequentially and make sure all are run despite any errors + for (const cleanup of asyncCleanups) { + try { + await cleanup(); + } catch (err) { + this.emit('error', err); + } + } - clearTimeout(this.extendLocksTimer); - clearTimeout(this.stalledCheckTimer); + clearTimeout(this.extendLocksTimer); + clearTimeout(this.stalledCheckTimer); + + this.closed = true; + this.emit('closed'); + })(); - this.closed = true; - this.emit('closed'); - })(); - return this.closing; + return await this.closing; + }, + ); } /** @@ -924,20 +1000,31 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { - if (!this.opts.skipStalledCheck) { - clearTimeout(this.stalledCheckTimer); + await this.trace( + () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + }); - if (!this.closing) { - try { - await this.checkConnectionError(() => this.moveStalledJobsToWait()); - this.stalledCheckTimer = setTimeout(async () => { - await this.startStalledCheckTimer(); - }, this.opts.stalledInterval); - } catch (err) { - this.emit('error', err); + if (!this.opts.skipStalledCheck) { + clearTimeout(this.stalledCheckTimer); + + if (!this.closing) { + try { + await this.checkConnectionError(() => + this.moveStalledJobsToWait(), + ); + this.stalledCheckTimer = setTimeout(async () => { + await this.startStalledCheckTimer(); + }, this.opts.stalledInterval); + } catch (err) { + this.emit('error', err); + } + } } - } - } + }, + ); } private startLockExtenderTimer( @@ -1019,54 +1106,78 @@ will never work with more accuracy than 1ms. */ } protected async extendLocks(jobs: Job[]) { - try { - const pipeline = (await this.client).pipeline(); - for (const job of jobs) { - await this.scripts.extendLock( - job.id, - job.token, - this.opts.lockDuration, - pipeline, - ); - } - const result = (await pipeline.exec()) as [Error, string][]; - - for (const [err, jobId] of result) { - if (err) { - // TODO: signal process function that the job has been lost. - this.emit( - 'error', - new Error(`could not renew lock for job ${jobId}`), - ); + await this.trace( + () => `${this.name} ${this.id} Worker.extendLocks`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerJobsToExtendLocks]: jobs.map( + job => job.id, + ), + }); + + try { + const pipeline = (await this.client).pipeline(); + for (const job of jobs) { + await this.scripts.extendLock( + job.id, + job.token, + this.opts.lockDuration, + pipeline, + ); + } + const result = (await pipeline.exec()) as [Error, string][]; + + for (const [err, jobId] of result) { + if (err) { + // TODO: signal process function that the job has been lost. + this.emit( + 'error', + new Error(`could not renew lock for job ${jobId}`), + ); + } + } + } catch (err) { + this.emit('error', err); } - } - } catch (err) { - this.emit('error', err); - } + }, + ); } private async moveStalledJobsToWait() { - const chunkSize = 50; - const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); + await this.trace( + () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, + async span => { + const chunkSize = 50; + const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); + + stalled.forEach((jobId: string) => + this.emit('stalled', jobId, 'active'), + ); - stalled.forEach((jobId: string) => this.emit('stalled', jobId, 'active')); + const jobPromises: Promise>[] = []; + for (let i = 0; i < failed.length; i++) { + jobPromises.push( + Job.fromId( + this as MinimalQueue, + failed[i], + ), + ); - const jobPromises: Promise>[] = []; - for (let i = 0; i < failed.length; i++) { - jobPromises.push( - Job.fromId( - this as MinimalQueue, - failed[i], - ), - ); + if ((i + 1) % chunkSize === 0) { + this.notifyFailedJobs(await Promise.all(jobPromises)); + jobPromises.length = 0; + } + } - if ((i + 1) % chunkSize === 0) { this.notifyFailedJobs(await Promise.all(jobPromises)); - jobPromises.length = 0; - } - } - this.notifyFailedJobs(await Promise.all(jobPromises)); + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerStalledJobs]: stalled, + }); + }, + ); } private notifyFailedJobs(failedJobs: Job[]) { diff --git a/src/enums/index.ts b/src/enums/index.ts index d6bae934f4..3cab38de6c 100644 --- a/src/enums/index.ts +++ b/src/enums/index.ts @@ -2,3 +2,4 @@ export * from './child-command'; export * from './error-code'; export * from './parent-command'; export * from './metrics-time'; +export * from './telemetry-attributes'; diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts new file mode 100644 index 0000000000..78c80ff658 --- /dev/null +++ b/src/enums/telemetry-attributes.ts @@ -0,0 +1,25 @@ +export enum TelemetryAttributes { + QueueName = 'bullmq.queue.name', + BulkCount = 'bullmq.job.bulk.count', + BulkNames = 'bullmq.job.bulk.names', + JobName = 'bullmq.job.name', + JobId = 'bullmq.job.id', + JobKey = 'bullmq.job.key', + JobOptions = 'bullmq.job.options', + JobProgress = 'bullmq.job.progress', + QueueDrainDelay = 'bullmq.queue.drain.delay', + QueueGrace = 'bullmq.queue.grace', + QueueCleanLimit = 'bullmq.queue.clean.limit', + JobType = 'bullmq.job.type', + JobTimestamp = 'bullmq.job.timestamp', + QueueOptions = 'bullmq.queue.options', + QueueEventMaxLength = 'bullmq.queue.event.max.length', + WorkerOptions = 'bullmq.worker.options', + WorkerToken = 'bullmq.worker.token', + WorkerId = 'bullmq.worker.id', + WorkerRateLimit = 'bullmq.worker.rate.limit', + WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', + WorkerForceClose = 'bullmq.worker.force.close', + WorkerStalledJobs = 'bullmq.worker.stalled.jobs', + WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', +} diff --git a/src/interfaces/index.ts b/src/interfaces/index.ts index 65dfed0396..0d8b4e96d2 100644 --- a/src/interfaces/index.ts +++ b/src/interfaces/index.ts @@ -23,3 +23,4 @@ export * from './repeat-options'; export * from './sandboxed-job-processor'; export * from './sandboxed-job'; export * from './worker-options'; +export * from './telemetry'; diff --git a/src/interfaces/queue-options.ts b/src/interfaces/queue-options.ts index c8f00bd78c..5954df4617 100644 --- a/src/interfaces/queue-options.ts +++ b/src/interfaces/queue-options.ts @@ -1,6 +1,7 @@ import { AdvancedRepeatOptions } from './advanced-options'; import { DefaultJobOptions } from './base-job-options'; import { ConnectionOptions } from './redis-options'; +import { Telemetry } from './telemetry'; export enum ClientType { blocking = 'blocking', @@ -31,6 +32,11 @@ export interface QueueBaseOptions { * @defaultValue false */ skipVersionCheck?: boolean; + + /** + * Telemetry client + */ + telemetry?: Telemetry; } /** @@ -55,6 +61,11 @@ export interface QueueOptions extends QueueBaseOptions { }; settings?: AdvancedRepeatOptions; + + /** + * Telemetry client + */ + telemetry?: Telemetry; } /** diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts new file mode 100644 index 0000000000..5c78f79460 --- /dev/null +++ b/src/interfaces/telemetry.ts @@ -0,0 +1,55 @@ +export interface Telemetry { + tracer: Tracer; +} + +export interface Tracer { + startSpan(name: string): Span; +} + +export interface Span { + setAttribute(key: string, value: Attribute): Span; + setAttributes(attributes: Attributes): Span; + recordException(exception: Exception, time?: Time): void; + end(): void; +} + +export interface Attributes { + [attribute: string]: Attribute | undefined; +} + +export type Attribute = + | string + | number + | boolean + | null + | undefined + | (null | undefined | string | number | boolean)[]; + +export type Exception = string | ExceptionType; + +export type ExceptionType = CodeException | MessageException | NameException; + +interface CodeException { + code: string | number; + name?: string; + message?: string; + stack?: string; +} + +interface MessageException { + code?: string | number; + name?: string; + message: string; + stack?: string; +} + +interface NameException { + code?: string | number; + name: string; + message?: string; + stack?: string; +} + +export type Time = HighResolutionTime | number | Date; + +type HighResolutionTime = [number, number]; diff --git a/src/interfaces/worker-options.ts b/src/interfaces/worker-options.ts index 77a204a23f..39ba799a16 100644 --- a/src/interfaces/worker-options.ts +++ b/src/interfaces/worker-options.ts @@ -4,6 +4,7 @@ import { QueueBaseOptions } from './queue-options'; import { RateLimiterOptions } from './rate-limiter-options'; import { MetricsOptions } from './metrics-options'; import { KeepJobs } from './keep-jobs'; +import { Telemetry } from './telemetry'; /** * An async function that receives `Job`s and handles them. @@ -144,6 +145,11 @@ export interface WorkerOptions extends QueueBaseOptions { * @default false */ useWorkerThreads?: boolean; + + /** + * Telemetry client + */ + telemetry?: Telemetry; } export interface GetNextJobOptions { From f70cc8a1983498e79cf20adb4f0ab517a83de478 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Fri, 6 Sep 2024 01:29:54 +0200 Subject: [PATCH 10/26] feat(queue-base): add context manager for telemetry --- src/classes/queue-base.ts | 29 ++++++++++++++++++++++++++--- src/interfaces/telemetry.ts | 23 ++++++++++++++++++++++- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 483324c2ae..cad1c868a9 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,5 +1,12 @@ import { EventEmitter } from 'events'; -import { QueueBaseOptions, RedisClient, Span, Tracer } from '../interfaces'; +import { + QueueBaseOptions, + RedisClient, + Span, + Tracer, + SetSpan, + ContextManager, +} from '../interfaces'; import { MinimalQueue } from '../types'; import { delay, @@ -37,6 +44,8 @@ export class QueueBase extends EventEmitter implements MinimalQueue { * It will check if tracer is provided and if not it will continue as is */ private tracer: Tracer | undefined; + private setSpan: SetSpan | undefined; + private contextManager: ContextManager | undefined; /** * @@ -85,7 +94,11 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.toKey = (type: string) => queueKeys.toKey(name, type); this.setScripts(); - this.tracer = opts?.telemetry?.tracer; + if (opts?.telemetry) { + this.tracer = opts.telemetry.trace.getTracer(opts.telemetry.tracerName); + this.setSpan = opts.telemetry.trace.setSpan; + this.contextManager = opts.telemetry.contextManager; + } } /** @@ -186,6 +199,13 @@ export class QueueBase extends EventEmitter implements MinimalQueue { } } + /** + * Wraps the code with telemetry and provides span for configuration. + * + * @param getSpanName - name of the span + * @param callback - code to wrap with telemetry + * @returns + */ protected trace( getSpanName: () => string, callback: (span?: Span) => Promise | T, @@ -201,7 +221,10 @@ export class QueueBase extends EventEmitter implements MinimalQueue { }); try { - return callback(span); + return this.contextManager.with( + this.setSpan(this.contextManager.active(), span), + () => callback(span), + ); } catch (err) { span.recordException(err as Error); throw err; diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 5c78f79460..297db9f4a8 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -1,5 +1,26 @@ export interface Telemetry { - tracer: Tracer; + trace: Trace; + contextManager: ContextManager; + tracerName: string; +} + +export interface ContextManager { + with any>( + context: Context, + fn: A, + ): ReturnType; + active(): Context; +} + +export interface Trace { + getTracer(name: string, version?: string): Tracer; + setSpan: SetSpan; +} + +export type SetSpan = (context: Context, span: Span) => Context; + +export interface Context { + [key: string]: Function; } export interface Tracer { From 8f38087be95390231f8e28f442534df4b5749659 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Tue, 10 Sep 2024 09:56:12 +0200 Subject: [PATCH 11/26] feat(worker, queue): add spankind and distributed context propagation --- src/classes/queue-base.ts | 34 ++++++++++++++++++++++++---- src/classes/queue.ts | 45 ++++++++++++++++++++++++++++--------- src/classes/worker.ts | 18 +++++++++++++-- src/interfaces/telemetry.ts | 34 +++++++++++++++++++++++++++- 4 files changed, 113 insertions(+), 18 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index cad1c868a9..2368a9727b 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -6,6 +6,8 @@ import { Tracer, SetSpan, ContextManager, + SpanKind, + Propagation, } from '../interfaces'; import { MinimalQueue } from '../types'; import { @@ -19,6 +21,7 @@ import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; import { TelemetryAttributes } from '../enums'; +import { Context } from 'vm'; /** * @class QueueBase @@ -45,7 +48,8 @@ export class QueueBase extends EventEmitter implements MinimalQueue { */ private tracer: Tracer | undefined; private setSpan: SetSpan | undefined; - private contextManager: ContextManager | undefined; + protected contextManager: ContextManager | undefined; + protected propagation: Propagation | undefined; /** * @@ -98,6 +102,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.tracer = opts.telemetry.trace.getTracer(opts.telemetry.tracerName); this.setSpan = opts.telemetry.trace.setSpan; this.contextManager = opts.telemetry.contextManager; + this.propagation = opts.telemetry.propagation; } } @@ -202,28 +207,49 @@ export class QueueBase extends EventEmitter implements MinimalQueue { /** * Wraps the code with telemetry and provides span for configuration. * + * @param spanType - type of the span: Producer, Consumer, Internal * @param getSpanName - name of the span * @param callback - code to wrap with telemetry * @returns */ protected trace( + getSpanType: () => SpanKind, getSpanName: () => string, - callback: (span?: Span) => Promise | T, + callback: ( + span?: Span, + telemetryHeaders?: Record, + ) => Promise | T, + activeTelemetryHeaders?: Record, ) { if (!this.tracer) { return callback(); } - const span = this.tracer.startSpan(getSpanName()); + const span = this.tracer.startSpan(getSpanName(), { + kind: getSpanType(), + }); span.setAttributes({ [TelemetryAttributes.QueueName]: this.name, }); try { + if (activeTelemetryHeaders) { + const activeContext = this.propagation.extract( + this.contextManager.active(), + activeTelemetryHeaders, + ); + + return this.contextManager.with(activeContext, () => callback(span)); + } + + const telemetryHeaders: Record = {}; + + this.propagation.inject(this.contextManager.active(), telemetryHeaders); + return this.contextManager.with( this.setSpan(this.contextManager.active(), span), - () => callback(span), + () => callback(span, telemetryHeaders), ); } catch (err) { span.recordException(err as Error); diff --git a/src/classes/queue.ts b/src/classes/queue.ts index e074296339..6cb860ca60 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -221,9 +221,14 @@ export class Queue< data: DataType, opts?: JobsOptions, ): Promise> { - return await this.trace>( + return this.trace>( + () => 3, () => `${this.name}.${name} Queue.add`, - async span => { + async (span, telemetryHeaders) => { + if (telemetryHeaders) { + data = { ...data, telemetryHeaders }; + } + if (opts && opts.repeat) { if (opts.repeat.endDate) { if (+new Date(opts.repeat.endDate) < Date.now()) { @@ -277,9 +282,10 @@ export class Queue< async addBulk( jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { - return await this.trace[]>( + return this.trace[]>( + () => 3, () => `${this.name} Queue.addBulk`, - async span => { + async (span, telemetryHeaders) => { span?.setAttributes({ [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), [TelemetryAttributes.BulkCount]: jobs.length, @@ -289,7 +295,10 @@ export class Queue< this as MinimalQueue, jobs.map(job => ({ name: job.name, - data: job.data, + data: { + ...job.data, + ...(span && telemetryHeaders), + }, opts: { ...this.jobsOpts, ...job.opts, @@ -314,6 +323,7 @@ export class Queue< */ async pause(): Promise { await this.trace( + () => 3, () => `${this.name} Queue.pause`, async () => { await this.scripts.pause(true); @@ -329,6 +339,7 @@ export class Queue< */ async close(): Promise { await this.trace( + () => 3, () => `${this.name} Queue.close`, async () => { if (!this.closing) { @@ -349,6 +360,7 @@ export class Queue< */ async resume(): Promise { await this.trace( + () => 3, () => `${this.name} Queue.resume`, async () => { await this.scripts.pause(false); @@ -408,7 +420,8 @@ export class Queue< repeatOpts: RepeatOptions, jobId?: string, ): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} ${name} Queue.removeRepeatable`, async () => { const repeat = await this.repeat; @@ -425,7 +438,8 @@ export class Queue< * @param id - identifier */ async removeDebounceKey(id: string): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} ${id} Queue.removeDebounceKey`, async () => { const client = await this.client; @@ -446,7 +460,8 @@ export class Queue< * @returns */ async removeRepeatableByKey(key: string): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} ${key} Queue.removeRepeatableByKey`, async span => { span?.setAttributes({ @@ -471,7 +486,8 @@ export class Queue< * any of its dependencies were locked. */ async remove(jobId: string, { removeChildren = true } = {}): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} ${jobId} Queue.remove`, async span => { span?.setAttributes({ @@ -497,6 +513,7 @@ export class Queue< progress: number | object, ): Promise { await this.trace( + () => 3, () => `${this.name} Queue.updateJobProgress`, async span => { span?.setAttributes({ @@ -535,6 +552,7 @@ export class Queue< */ async drain(delayed = false): Promise { await this.trace( + () => 3, () => `${this.name} Queue.drain`, async span => { span?.setAttributes({ @@ -568,7 +586,8 @@ export class Queue< | 'delayed' | 'failed' = 'completed', ): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} Queue.clean`, async span => { const maxCount = limit || Infinity; @@ -619,6 +638,7 @@ export class Queue< */ async obliterate(opts?: ObliterateOpts): Promise { await this.trace( + () => 3, () => `${this.name} Queue.obliterate`, async () => { await this.pause(); @@ -649,6 +669,7 @@ export class Queue< opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { await this.trace( + () => 3, () => `${this.name} Queue.retryJobs`, async span => { span?.setAttributes({ @@ -677,6 +698,7 @@ export class Queue< */ async promoteJobs(opts: { count?: number } = {}): Promise { await this.trace( + () => 3, () => `${this.name} Queue.promoteJobs`, async span => { span?.setAttributes({ @@ -697,7 +719,8 @@ export class Queue< * @param maxLength - */ async trimEvents(maxLength: number): Promise { - return await this.trace( + return this.trace( + () => 3, () => `${this.name} Queue.trimEvents`, async span => { span?.setAttributes({ diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 945b736d19..c9989bb565 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -10,6 +10,7 @@ import { AbortController } from 'node-abort-controller'; import { GetNextJobOptions, IoredisListener, + JobDataWithHeaders, JobJsonRaw, Processor, RedisClient, @@ -404,6 +405,7 @@ export class Worker< async run() { await this.trace( + () => 3, () => `${this.name} ${this.id} Worker.run`, async span => { span?.setAttributes({ @@ -531,7 +533,8 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { - return await this.trace>( + return this.trace>( + () => 3, () => `${this.name} ${this.id} Worker.getNextJob`, async span => { const nextJob = await this._getNextJob( @@ -611,6 +614,7 @@ export class Worker< */ async rateLimit(expireTimeMs: number): Promise { await this.trace( + () => 3, () => `${this.name} ${this.id} Worker.rateLimit`, async span => { span?.setAttributes({ @@ -780,7 +784,10 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - return await this.trace>( + const { telemetryHeaders } = job.data as JobDataWithHeaders; + + return this.trace>( + () => 3, () => `${this.name} ${this.id} Worker.processJob`, async span => { span?.setAttributes({ @@ -856,6 +863,7 @@ will never work with more accuracy than 1ms. */ jobsInProgress.delete(inProgressItem); } }, + telemetryHeaders, ); } @@ -865,6 +873,7 @@ will never work with more accuracy than 1ms. */ */ async pause(doNotWaitActive?: boolean): Promise { await this.trace( + () => 3, () => `${this.name} ${this.id} Worker.pause`, async span => { span?.setAttributes({ @@ -893,6 +902,7 @@ will never work with more accuracy than 1ms. */ */ resume(): void { this.trace( + () => 3, () => `${this.name} ${this.id} Worker.resume`, span => { span?.setAttributes({ @@ -940,6 +950,7 @@ will never work with more accuracy than 1ms. */ */ async close(force = false): Promise { await this.trace( + () => 3, () => `${this.name} ${this.id} Worker.close`, async span => { span?.setAttributes({ @@ -1001,6 +1012,7 @@ will never work with more accuracy than 1ms. */ */ async startStalledCheckTimer(): Promise { await this.trace( + () => 3, () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, async span => { span?.setAttributes({ @@ -1107,6 +1119,7 @@ will never work with more accuracy than 1ms. */ protected async extendLocks(jobs: Job[]) { await this.trace( + () => 0, () => `${this.name} ${this.id} Worker.extendLocks`, async span => { span?.setAttributes({ @@ -1146,6 +1159,7 @@ will never work with more accuracy than 1ms. */ private async moveStalledJobsToWait() { await this.trace( + () => 0, () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, async span => { const chunkSize = 50; diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 297db9f4a8..283be4f0cc 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -2,6 +2,7 @@ export interface Telemetry { trace: Trace; contextManager: ContextManager; tracerName: string; + propagation: Propagation; } export interface ContextManager { @@ -24,16 +25,34 @@ export interface Context { } export interface Tracer { - startSpan(name: string): Span; + startSpan(name: string, options?: SpanOptions): Span; +} + +export interface SpanOptions { + kind: SpanKind; +} + +export enum SpanKind { + INTERNAL = 0, + SERVER = 1, + CLIENT = 2, + PRODUCER = 3, + CONSUMER = 4, } export interface Span { setAttribute(key: string, value: Attribute): Span; setAttributes(attributes: Attributes): Span; recordException(exception: Exception, time?: Time): void; + spanContext(): SpanContext; end(): void; } +export interface SpanContext { + traceId: string; + spanId: string; +} + export interface Attributes { [attribute: string]: Attribute | undefined; } @@ -74,3 +93,16 @@ interface NameException { export type Time = HighResolutionTime | number | Date; type HighResolutionTime = [number, number]; + +export interface Propagation { + inject(context: Context, carrier: T, setter?: TextMapSetter): void; + extract(context: Context, carrier: T): Context; +} + +interface TextMapSetter { + get(carrier: T, key: string): undefined | string | string[]; +} + +export interface JobDataWithHeaders { + telemetryHeaders?: Record; +} From 379abf50ed66e6cbd6ad34095b8f15cdd5f58443 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Tue, 10 Sep 2024 10:01:18 +0200 Subject: [PATCH 12/26] fix(worker, queue): remove unused import --- src/classes/queue-base.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 2368a9727b..14195fc719 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -21,7 +21,6 @@ import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; import { TelemetryAttributes } from '../enums'; -import { Context } from 'vm'; /** * @class QueueBase From 6169ea3bb84fdd4ce04df3a6b63c721c0ed2a1a1 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Mon, 16 Sep 2024 16:19:58 +0200 Subject: [PATCH 13/26] feat(queue, worker): documentation and changes to propagation, basic tests for telemetry interface --- src/classes/queue-base.ts | 62 +++--- src/classes/queue.ts | 48 +++-- src/classes/worker.ts | 27 ++- src/enums/telemetry-attributes.ts | 8 + src/interfaces/base-job-options.ts | 5 + src/interfaces/telemetry.ts | 31 ++- src/interfaces/worker-options.ts | 2 +- src/types/job-options.ts | 5 + tests/test_telemetry_interface.ts | 303 +++++++++++++++++++++++++++++ 9 files changed, 411 insertions(+), 80 deletions(-) create mode 100644 tests/test_telemetry_interface.ts diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 14195fc719..c8a3e5edf5 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -6,8 +6,8 @@ import { Tracer, SetSpan, ContextManager, - SpanKind, Propagation, + Context, } from '../interfaces'; import { MinimalQueue } from '../types'; import { @@ -20,7 +20,7 @@ import { RedisConnection } from './redis-connection'; import { Job } from './job'; import { KeysMap, QueueKeys } from './queue-keys'; import { Scripts } from './scripts'; -import { TelemetryAttributes } from '../enums'; +import { TelemetryAttributes, SpanKind } from '../enums'; /** * @class QueueBase @@ -102,6 +102,20 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.setSpan = opts.telemetry.trace.setSpan; this.contextManager = opts.telemetry.contextManager; this.propagation = opts.telemetry.propagation; + + this.contextManager.getMetadata = (context: Context) => { + const metadata = {}; + this.propagation.inject(context, metadata); + return metadata; + }; + + this.contextManager.fromMetadata = ( + activeContext: Context, + metadata: Record, + ) => { + const context = this.propagation.extract(activeContext, metadata); + return context; + }; } } @@ -206,49 +220,51 @@ export class QueueBase extends EventEmitter implements MinimalQueue { /** * Wraps the code with telemetry and provides span for configuration. * - * @param spanType - type of the span: Producer, Consumer, Internal + * @param spanKind - kind of the span: Producer, Consumer, Internal * @param getSpanName - name of the span * @param callback - code to wrap with telemetry + * @param srcPropagationMedatada - * @returns */ - protected trace( - getSpanType: () => SpanKind, + protected async trace( + spanKind: SpanKind, getSpanName: () => string, callback: ( span?: Span, - telemetryHeaders?: Record, + dstPropagationMetadata?: Record, ) => Promise | T, - activeTelemetryHeaders?: Record, + srcPropagationMetadata?: Record, ) { if (!this.tracer) { return callback(); } const span = this.tracer.startSpan(getSpanName(), { - kind: getSpanType(), - }); - - span.setAttributes({ - [TelemetryAttributes.QueueName]: this.name, + kind: spanKind, }); try { - if (activeTelemetryHeaders) { - const activeContext = this.propagation.extract( - this.contextManager.active(), - activeTelemetryHeaders, - ); + span.setAttributes({ + [TelemetryAttributes.QueueName]: this.name, + }); - return this.contextManager.with(activeContext, () => callback(span)); + let activeContext = this.contextManager.active(); + if (srcPropagationMetadata) { + activeContext = this.contextManager.fromMetadata( + activeContext, + srcPropagationMetadata, + ); } - const telemetryHeaders: Record = {}; + let dstPropagationMetadata: undefined | Record; + if (spanKind === SpanKind.PRODUCER) { + dstPropagationMetadata = this.contextManager.getMetadata(activeContext); + } - this.propagation.inject(this.contextManager.active(), telemetryHeaders); + const messageContext = this.setSpan(activeContext, span); - return this.contextManager.with( - this.setSpan(this.contextManager.active(), span), - () => callback(span, telemetryHeaders), + return await this.contextManager.with(messageContext, () => + callback(span, dstPropagationMetadata), ); } catch (err) { span.recordException(err as Error); diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 6cb860ca60..56b56becfc 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -12,7 +12,7 @@ import { Job } from './job'; import { QueueGetters } from './queue-getters'; import { Repeat } from './repeat'; import { RedisConnection } from './redis-connection'; -import { TelemetryAttributes } from '../enums'; +import { SpanKind, TelemetryAttributes } from '../enums'; export interface ObliterateOpts { /** @@ -222,11 +222,11 @@ export class Queue< opts?: JobsOptions, ): Promise> { return this.trace>( - () => 3, + SpanKind.PRODUCER, () => `${this.name}.${name} Queue.add`, - async (span, telemetryHeaders) => { - if (telemetryHeaders) { - data = { ...data, telemetryHeaders }; + async (span, srcPropagationMedatada) => { + if (srcPropagationMedatada) { + opts = { ...opts, tm: srcPropagationMedatada }; } if (opts && opts.repeat) { @@ -283,9 +283,9 @@ export class Queue< jobs: { name: NameType; data: DataType; opts?: BulkJobOptions }[], ): Promise[]> { return this.trace[]>( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.addBulk`, - async (span, telemetryHeaders) => { + async (span, srcPropagationMedatada) => { span?.setAttributes({ [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), [TelemetryAttributes.BulkCount]: jobs.length, @@ -295,14 +295,12 @@ export class Queue< this as MinimalQueue, jobs.map(job => ({ name: job.name, - data: { - ...job.data, - ...(span && telemetryHeaders), - }, + data: job.data, opts: { ...this.jobsOpts, ...job.opts, jobId: job.opts?.jobId, + tm: span && srcPropagationMedatada, }, })), ); @@ -323,7 +321,7 @@ export class Queue< */ async pause(): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.pause`, async () => { await this.scripts.pause(true); @@ -339,7 +337,7 @@ export class Queue< */ async close(): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.close`, async () => { if (!this.closing) { @@ -360,7 +358,7 @@ export class Queue< */ async resume(): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.resume`, async () => { await this.scripts.pause(false); @@ -421,7 +419,7 @@ export class Queue< jobId?: string, ): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} ${name} Queue.removeRepeatable`, async () => { const repeat = await this.repeat; @@ -439,7 +437,7 @@ export class Queue< */ async removeDebounceKey(id: string): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} ${id} Queue.removeDebounceKey`, async () => { const client = await this.client; @@ -461,7 +459,7 @@ export class Queue< */ async removeRepeatableByKey(key: string): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} ${key} Queue.removeRepeatableByKey`, async span => { span?.setAttributes({ @@ -487,7 +485,7 @@ export class Queue< */ async remove(jobId: string, { removeChildren = true } = {}): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} ${jobId} Queue.remove`, async span => { span?.setAttributes({ @@ -513,7 +511,7 @@ export class Queue< progress: number | object, ): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.updateJobProgress`, async span => { span?.setAttributes({ @@ -552,7 +550,7 @@ export class Queue< */ async drain(delayed = false): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.drain`, async span => { span?.setAttributes({ @@ -587,7 +585,7 @@ export class Queue< | 'failed' = 'completed', ): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.clean`, async span => { const maxCount = limit || Infinity; @@ -638,7 +636,7 @@ export class Queue< */ async obliterate(opts?: ObliterateOpts): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.obliterate`, async () => { await this.pause(); @@ -669,7 +667,7 @@ export class Queue< opts: { count?: number; state?: FinishedStatus; timestamp?: number } = {}, ): Promise { await this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.retryJobs`, async span => { span?.setAttributes({ @@ -698,7 +696,7 @@ export class Queue< */ async promoteJobs(opts: { count?: number } = {}): Promise { await this.trace( - () => 3, + 3, () => `${this.name} Queue.promoteJobs`, async span => { span?.setAttributes({ @@ -720,7 +718,7 @@ export class Queue< */ async trimEvents(maxLength: number): Promise { return this.trace( - () => 3, + SpanKind.PRODUCER, () => `${this.name} Queue.trimEvents`, async span => { span?.setAttributes({ diff --git a/src/classes/worker.ts b/src/classes/worker.ts index c9989bb565..9d5031b862 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -10,7 +10,6 @@ import { AbortController } from 'node-abort-controller'; import { GetNextJobOptions, IoredisListener, - JobDataWithHeaders, JobJsonRaw, Processor, RedisClient, @@ -36,7 +35,7 @@ import { RATE_LIMIT_ERROR, WaitingChildrenError, } from './errors'; -import { TelemetryAttributes } from '../enums'; +import { SpanKind, TelemetryAttributes } from '../enums'; // 10 seconds is the maximum time a BRPOPLPUSH can block. const maximumBlockTimeout = 10; @@ -405,7 +404,7 @@ export class Worker< async run() { await this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.run`, async span => { span?.setAttributes({ @@ -534,7 +533,7 @@ export class Worker< */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { return this.trace>( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.getNextJob`, async span => { const nextJob = await this._getNextJob( @@ -614,7 +613,7 @@ export class Worker< */ async rateLimit(expireTimeMs: number): Promise { await this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.rateLimit`, async span => { span?.setAttributes({ @@ -784,10 +783,10 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - const { telemetryHeaders } = job.data as JobDataWithHeaders; + const { tm: dstPropagationMedatada } = job.opts; return this.trace>( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.processJob`, async span => { span?.setAttributes({ @@ -863,7 +862,7 @@ will never work with more accuracy than 1ms. */ jobsInProgress.delete(inProgressItem); } }, - telemetryHeaders, + dstPropagationMedatada, ); } @@ -873,7 +872,7 @@ will never work with more accuracy than 1ms. */ */ async pause(doNotWaitActive?: boolean): Promise { await this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.pause`, async span => { span?.setAttributes({ @@ -902,7 +901,7 @@ will never work with more accuracy than 1ms. */ */ resume(): void { this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.resume`, span => { span?.setAttributes({ @@ -950,7 +949,7 @@ will never work with more accuracy than 1ms. */ */ async close(force = false): Promise { await this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.close`, async span => { span?.setAttributes({ @@ -1012,7 +1011,7 @@ will never work with more accuracy than 1ms. */ */ async startStalledCheckTimer(): Promise { await this.trace( - () => 3, + SpanKind.CONSUMER, () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, async span => { span?.setAttributes({ @@ -1119,7 +1118,7 @@ will never work with more accuracy than 1ms. */ protected async extendLocks(jobs: Job[]) { await this.trace( - () => 0, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.extendLocks`, async span => { span?.setAttributes({ @@ -1159,7 +1158,7 @@ will never work with more accuracy than 1ms. */ private async moveStalledJobsToWait() { await this.trace( - () => 0, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, async span => { const chunkSize = 50; diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index 78c80ff658..734aaadb29 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -23,3 +23,11 @@ export enum TelemetryAttributes { WorkerStalledJobs = 'bullmq.worker.stalled.jobs', WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', } + +export enum SpanKind { + INTERNAL = 0, + SERVER = 1, + CLIENT = 2, + PRODUCER = 3, + CONSUMER = 4, +} diff --git a/src/interfaces/base-job-options.ts b/src/interfaces/base-job-options.ts index 2d630f8c3e..8db9429656 100644 --- a/src/interfaces/base-job-options.ts +++ b/src/interfaces/base-job-options.ts @@ -111,4 +111,9 @@ export interface BaseJobOptions extends DefaultJobOptions { * Internal property used by repeatable jobs. */ prevMillis?: number; + + /** + * Telemetry propagation + */ + tm?: Record; } diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 283be4f0cc..c9a5f74c04 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -1,3 +1,5 @@ +import { SpanKind } from '../enums'; + export interface Telemetry { trace: Trace; contextManager: ContextManager; @@ -6,11 +8,22 @@ export interface Telemetry { } export interface ContextManager { + /** + * Creates a new context and sets it as active for the fn passed as last argument + * + * @param context + * @param fn + */ with any>( context: Context, fn: A, ): ReturnType; active(): Context; + getMetadata(context: Context): Record; + fromMetadata( + activeContext: Context, + metadata: Record, + ): Context; } export interface Trace { @@ -32,14 +45,6 @@ export interface SpanOptions { kind: SpanKind; } -export enum SpanKind { - INTERNAL = 0, - SERVER = 1, - CLIENT = 2, - PRODUCER = 3, - CONSUMER = 4, -} - export interface Span { setAttribute(key: string, value: Attribute): Span; setAttributes(attributes: Attributes): Span; @@ -95,14 +100,6 @@ export type Time = HighResolutionTime | number | Date; type HighResolutionTime = [number, number]; export interface Propagation { - inject(context: Context, carrier: T, setter?: TextMapSetter): void; + inject(context: Context, carrier: T): void; extract(context: Context, carrier: T): Context; } - -interface TextMapSetter { - get(carrier: T, key: string): undefined | string | string[]; -} - -export interface JobDataWithHeaders { - telemetryHeaders?: Record; -} diff --git a/src/interfaces/worker-options.ts b/src/interfaces/worker-options.ts index 39ba799a16..97ac9dfa84 100644 --- a/src/interfaces/worker-options.ts +++ b/src/interfaces/worker-options.ts @@ -147,7 +147,7 @@ export interface WorkerOptions extends QueueBaseOptions { useWorkerThreads?: boolean; /** - * Telemetry client + * Telemetry Addon */ telemetry?: Telemetry; } diff --git a/src/types/job-options.ts b/src/types/job-options.ts index 4b0eea7b78..d85b6851f9 100644 --- a/src/types/job-options.ts +++ b/src/types/job-options.ts @@ -20,6 +20,11 @@ export type JobsOptions = BaseJobOptions & { * If true, removes the job from its parent dependencies when it fails after all attempts. */ removeDependencyOnFailure?: boolean; + + /** + * TelemetryMetadata, provide for context propagation. + */ + tm?: Record; }; /** diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts new file mode 100644 index 0000000000..fd766bedb9 --- /dev/null +++ b/tests/test_telemetry_interface.ts @@ -0,0 +1,303 @@ +import { expect } from 'chai'; +import { default as IORedis } from 'ioredis'; +import { after, beforeEach, describe, it, before } from 'mocha'; +import { v4 } from 'uuid'; +import { Queue, Worker, Job } from '../src/classes'; +import { removeAllQueueData } from '../src/utils'; +import { + Telemetry, + Trace, + ContextManager, + Propagation, + Tracer, + Span, + SpanOptions, + Attributes, + Exception, + Time, + SpanContext, + Context, +} from '../src/interfaces'; +import { SpanKind, TelemetryAttributes } from '../src/enums'; +import * as sinon from 'sinon'; + +describe('Telemetry', () => { + type ExtendedException = Exception & { + message: string; + }; + + const redisHost = process.env.REDIS_HOST || 'localhost'; + const prefix = process.env.BULLMQ_TEST_PREFIX || 'bull'; + + class MockTelemetry implements Telemetry { + public trace: Trace; + public contextManager: ContextManager; + public propagation: Propagation; + public tracerName = 'mockTracer'; + + constructor() { + this.trace = new MockTrace(); + this.contextManager = new MockContextManager(); + this.propagation = new MockPropagation(); + } + } + + class MockTrace implements Trace { + getTracer(): Tracer { + return new MockTracer(); + } + + setSpan(context: Context, span: Span): Context { + const newContext = { ...context }; + newContext['getSpan'] = () => span; + return newContext; + } + } + + class MockContextManager implements ContextManager { + private activeContext: Context = {}; + + with any>( + context: Context, + fn: A, + ): ReturnType { + this.activeContext = context; + return fn(); + } + + active(): Context { + return this.activeContext; + } + + getMetadata(context: Context): Record { + const metadata: Record = {}; + Object.keys(context).forEach(key => { + if (key.startsWith('getMetadata_')) { + const value = (context[key] as () => string)(); + metadata[key.replace('getMetadata_', '')] = value; + } + }); + return metadata; + } + + fromMetadata( + activeContext: Context, + metadata: Record, + ): Context { + const newContext = { ...activeContext }; + Object.keys(metadata).forEach(key => { + newContext[`getMetadata_${key}`] = () => metadata[key]; + }); + return newContext; + } + } + + class MockTracer implements Tracer { + startSpan(name: string, options?: SpanOptions): Span { + return new MockSpan(name, options); + } + } + + class MockSpan implements Span { + attributes: Attributes = {}; + name: string; + options: SpanOptions | undefined; + exception: ExtendedException | undefined; + + constructor(name: string, options?: SpanOptions) { + this.name = name; + this.options = options; + } + + setAttribute(key: string, value: any): Span { + this.attributes[key] = value; + return this; + } + + setAttributes(attributes: Attributes): Span { + this.attributes = { ...this.attributes, ...attributes }; + return this; + } + + recordException(exception: ExtendedException, time?: Time): void { + this.exception = exception; + } + + spanContext(): SpanContext { + return { traceId: 'mock-trace-id', spanId: 'mock-span-id' }; + } + + end(): void {} + } + + class MockPropagation implements Propagation { + inject(context: Context, carrier: T): void {} + + extract(context: Context, carrier: T): Context { + const newContext = { ...context }; + newContext['extractedFunction'] = () => {}; + + return newContext; + } + } + + let telemetryClient; + + let queue: Queue; + let queueName: string; + + let connection; + before(async function () { + connection = new IORedis(redisHost, { maxRetriesPerRequest: null }); + }); + + beforeEach(async function () { + queueName = `test-${v4()}`; + telemetryClient = new MockTelemetry(); + queue = new Queue(queueName, { + connection, + prefix, + telemetry: telemetryClient, + }); + }); + + afterEach(async function () { + await queue.close(); + await removeAllQueueData(new IORedis(redisHost), queueName); + }); + + after(async function () { + await connection.quit(); + }); + + describe('Queue.add', () => { + it('should correctly interact with telemetry when adding a job', async () => { + await queue.add('testJob', { foo: 'bar' }); + + const activeContext = telemetryClient.contextManager.active(); + const span = activeContext.getSpan?.() as MockSpan; + + expect(span).to.be.an.instanceOf(MockSpan); + expect(span.name).to.equal(`${queueName}.testJob Queue.add`); + expect(span.options?.kind).to.equal(SpanKind.PRODUCER); + expect(span.attributes[TelemetryAttributes.QueueName]).to.equal( + queueName, + ); + }); + + it('should correctly handle errors and record them in telemetry', async () => { + const addStub = sinon + .stub(queue, 'add') + .rejects(new Error('Simulated error')); + + const span = telemetryClient.trace + .getTracer('testtracer') + .startSpan('Queue.add.error') as MockSpan; + const recordExceptionSpy = sinon.spy(span, 'recordException'); + + const activeContext = telemetryClient.contextManager.active(); + activeContext['getSpan'] = () => span; + + try { + await queue.add('testJob', { foo: 'bar' }); + + expect.fail('Expected an error to be thrown'); + } catch (error) { + span.recordException(error); + + sinon.assert.calledOnce(recordExceptionSpy); + const [exception] = recordExceptionSpy.firstCall.args; + expect(exception?.message).to.equal('Simulated error'); + } finally { + addStub.restore(); + } + }); + }); + + describe('Queue.addBulk', () => { + it('should correctly interact with telemetry when adding multiple jobs', async () => { + const jobs = [ + { name: 'job1', data: { foo: 'bar' } }, + { name: 'job2', data: { baz: 'qux' } }, + ]; + + await queue.addBulk(jobs); + + const activeContext = telemetryClient.contextManager.active(); + const span = activeContext.getSpan?.() as MockSpan; + + expect(span).to.be.an.instanceOf(MockSpan); + expect(span.name).to.equal(`${queueName} Queue.addBulk`); + expect(span.options?.kind).to.equal(SpanKind.PRODUCER); + expect(span.attributes[TelemetryAttributes.BulkNames]).to.deep.equal( + jobs.map(job => job.name), + ); + expect(span.attributes[TelemetryAttributes.BulkCount]).to.equal( + jobs.length, + ); + }); + + it('should correctly handle errors and record them in telemetry for addBulk', async () => { + const jobs = [ + { name: 'job1', data: { foo: 'bar' } }, + { name: 'job2', data: { baz: 'qux' } }, + ]; + + const addBulkStub = sinon + .stub(queue.Job, 'createBulk') + .rejects(new Error('Simulated bulk error')); + + const span = telemetryClient.trace + .getTracer('testtracer') + .startSpan('Queue.addBulk.error') as MockSpan; + const recordExceptionSpy = sinon.spy(span, 'recordException'); + + const activeContext = telemetryClient.contextManager.active(); + activeContext['getSpan'] = () => span; + + try { + await queue.addBulk(jobs); + + expect.fail('Expected an error to be thrown'); + } catch (error) { + span.recordException(error); + + sinon.assert.calledOnce(recordExceptionSpy); + const [exception] = recordExceptionSpy.firstCall.args; + expect(exception?.message).to.equal('Simulated bulk error'); + } finally { + addBulkStub.restore(); + } + }); + }); + + describe('Worker.processJob', async () => { + it('should correctly interact with telemetry when processing a job', async () => { + const worker = new Worker(queueName, async () => 'some result', { + connection, + telemetry: telemetryClient, + }); + await worker.waitUntilReady(); + + const job = await queue.add('testJob', { foo: 'bar' }); + const token = 'some-token'; + + const moveToCompletedStub = sinon.stub(job, 'moveToCompleted').resolves(); + + await worker.processJob(job, token, () => false, new Set()); + + const activeContext = telemetryClient.contextManager.active(); + const span = activeContext.getSpan?.() as MockSpan; + + expect(span).to.be.an.instanceOf(MockSpan); + expect(span.name).to.equal(`${queueName} ${worker.id} Worker.processJob`); + expect(span.options?.kind).to.equal(SpanKind.CONSUMER); + expect(span.attributes[TelemetryAttributes.WorkerId]).to.equal(worker.id); + expect(span.attributes[TelemetryAttributes.WorkerToken]).to.equal(token); + expect(span.attributes[TelemetryAttributes.JobId]).to.equal(job.id); + + moveToCompletedStub.restore(); + await worker.close(); + }); + }); +}); From 908ec13cb40929d343d7b80f7b7d401dfc76ba96 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Mon, 16 Sep 2024 16:29:11 +0200 Subject: [PATCH 14/26] feat(test_telemetry_interface): remove unused imports --- tests/test_telemetry_interface.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts index fd766bedb9..bee12725a6 100644 --- a/tests/test_telemetry_interface.ts +++ b/tests/test_telemetry_interface.ts @@ -2,7 +2,7 @@ import { expect } from 'chai'; import { default as IORedis } from 'ioredis'; import { after, beforeEach, describe, it, before } from 'mocha'; import { v4 } from 'uuid'; -import { Queue, Worker, Job } from '../src/classes'; +import { Queue, Worker } from '../src/classes'; import { removeAllQueueData } from '../src/utils'; import { Telemetry, From 2dab35f16890b7eb22ef0370e2b29bef926ccb87 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Sun, 22 Sep 2024 16:42:53 +0200 Subject: [PATCH 15/26] feat(queue, worker): correct tests and spankind, use property mapping for shorter names --- src/classes/job.ts | 1 + src/classes/queue-base.ts | 38 ++++-------- src/classes/queue.ts | 28 ++++----- src/classes/worker.ts | 12 ++-- src/interfaces/base-job-options.ts | 4 +- src/types/job-options.ts | 10 +-- tests/test_telemetry_interface.ts | 97 ++++++++++++------------------ 7 files changed, 76 insertions(+), 114 deletions(-) diff --git a/src/classes/job.ts b/src/classes/job.ts index 4d335f6c11..bb2b319ac3 100644 --- a/src/classes/job.ts +++ b/src/classes/job.ts @@ -43,6 +43,7 @@ const optsDecodeMap = { idof: 'ignoreDependencyOnFailure', kl: 'keepLogs', rdof: 'removeDependencyOnFailure', + tm: 'telemetryMetadata', }; const optsEncodeMap = invertObject(optsDecodeMap); diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index c8a3e5edf5..6e7420aa23 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -42,13 +42,10 @@ export class QueueBase extends EventEmitter implements MinimalQueue { /** * Instance of a telemetry client - * To use it create if statement in a method to observe with start and end of a span + * To use it wrap the code with trace helper * It will check if tracer is provided and if not it will continue as is */ private tracer: Tracer | undefined; - private setSpan: SetSpan | undefined; - protected contextManager: ContextManager | undefined; - protected propagation: Propagation | undefined; /** * @@ -99,23 +96,6 @@ export class QueueBase extends EventEmitter implements MinimalQueue { if (opts?.telemetry) { this.tracer = opts.telemetry.trace.getTracer(opts.telemetry.tracerName); - this.setSpan = opts.telemetry.trace.setSpan; - this.contextManager = opts.telemetry.contextManager; - this.propagation = opts.telemetry.propagation; - - this.contextManager.getMetadata = (context: Context) => { - const metadata = {}; - this.propagation.inject(context, metadata); - return metadata; - }; - - this.contextManager.fromMetadata = ( - activeContext: Context, - metadata: Record, - ) => { - const context = this.propagation.extract(activeContext, metadata); - return context; - }; } } @@ -218,7 +198,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { } /** - * Wraps the code with telemetry and provides span for configuration. + * Wraps the code with telemetry and provides a span for configuration. * * @param spanKind - kind of the span: Producer, Consumer, Internal * @param getSpanName - name of the span @@ -248,9 +228,9 @@ export class QueueBase extends EventEmitter implements MinimalQueue { [TelemetryAttributes.QueueName]: this.name, }); - let activeContext = this.contextManager.active(); + let activeContext = this.opts.telemetry.contextManager.active(); if (srcPropagationMetadata) { - activeContext = this.contextManager.fromMetadata( + activeContext = this.opts.telemetry.contextManager.fromMetadata( activeContext, srcPropagationMetadata, ); @@ -258,12 +238,16 @@ export class QueueBase extends EventEmitter implements MinimalQueue { let dstPropagationMetadata: undefined | Record; if (spanKind === SpanKind.PRODUCER) { - dstPropagationMetadata = this.contextManager.getMetadata(activeContext); + dstPropagationMetadata = + this.opts.telemetry.contextManager.getMetadata(activeContext); } - const messageContext = this.setSpan(activeContext, span); + const messageContext = this.opts.telemetry.trace.setSpan( + activeContext, + span, + ); - return await this.contextManager.with(messageContext, () => + return await this.opts.telemetry.contextManager.with(messageContext, () => callback(span, dstPropagationMetadata), ); } catch (err) { diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 56b56becfc..ff4eac4389 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -226,7 +226,7 @@ export class Queue< () => `${this.name}.${name} Queue.add`, async (span, srcPropagationMedatada) => { if (srcPropagationMedatada) { - opts = { ...opts, tm: srcPropagationMedatada }; + opts = { ...opts, telemetryMetadata: srcPropagationMedatada }; } if (opts && opts.repeat) { @@ -321,7 +321,7 @@ export class Queue< */ async pause(): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.pause`, async () => { await this.scripts.pause(true); @@ -337,7 +337,7 @@ export class Queue< */ async close(): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.close`, async () => { if (!this.closing) { @@ -358,7 +358,7 @@ export class Queue< */ async resume(): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.resume`, async () => { await this.scripts.pause(false); @@ -419,7 +419,7 @@ export class Queue< jobId?: string, ): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} ${name} Queue.removeRepeatable`, async () => { const repeat = await this.repeat; @@ -437,7 +437,7 @@ export class Queue< */ async removeDebounceKey(id: string): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} ${id} Queue.removeDebounceKey`, async () => { const client = await this.client; @@ -459,7 +459,7 @@ export class Queue< */ async removeRepeatableByKey(key: string): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} ${key} Queue.removeRepeatableByKey`, async span => { span?.setAttributes({ @@ -485,7 +485,7 @@ export class Queue< */ async remove(jobId: string, { removeChildren = true } = {}): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} ${jobId} Queue.remove`, async span => { span?.setAttributes({ @@ -511,7 +511,7 @@ export class Queue< progress: number | object, ): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.updateJobProgress`, async span => { span?.setAttributes({ @@ -550,7 +550,7 @@ export class Queue< */ async drain(delayed = false): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.drain`, async span => { span?.setAttributes({ @@ -585,7 +585,7 @@ export class Queue< | 'failed' = 'completed', ): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.clean`, async span => { const maxCount = limit || Infinity; @@ -636,7 +636,7 @@ export class Queue< */ async obliterate(opts?: ObliterateOpts): Promise { await this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.obliterate`, async () => { await this.pause(); @@ -696,7 +696,7 @@ export class Queue< */ async promoteJobs(opts: { count?: number } = {}): Promise { await this.trace( - 3, + SpanKind.INTERNAL, () => `${this.name} Queue.promoteJobs`, async span => { span?.setAttributes({ @@ -718,7 +718,7 @@ export class Queue< */ async trimEvents(maxLength: number): Promise { return this.trace( - SpanKind.PRODUCER, + SpanKind.INTERNAL, () => `${this.name} Queue.trimEvents`, async span => { span?.setAttributes({ diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 9d5031b862..0ce0b0bbc0 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -613,7 +613,7 @@ export class Worker< */ async rateLimit(expireTimeMs: number): Promise { await this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.rateLimit`, async span => { span?.setAttributes({ @@ -783,7 +783,7 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - const { tm: dstPropagationMedatada } = job.opts; + const { telemetryMetadata: dstPropagationMedatada } = job.opts; return this.trace>( SpanKind.CONSUMER, @@ -872,7 +872,7 @@ will never work with more accuracy than 1ms. */ */ async pause(doNotWaitActive?: boolean): Promise { await this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.pause`, async span => { span?.setAttributes({ @@ -901,7 +901,7 @@ will never work with more accuracy than 1ms. */ */ resume(): void { this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.resume`, span => { span?.setAttributes({ @@ -949,7 +949,7 @@ will never work with more accuracy than 1ms. */ */ async close(force = false): Promise { await this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.close`, async span => { span?.setAttributes({ @@ -1011,7 +1011,7 @@ will never work with more accuracy than 1ms. */ */ async startStalledCheckTimer(): Promise { await this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, async span => { span?.setAttributes({ diff --git a/src/interfaces/base-job-options.ts b/src/interfaces/base-job-options.ts index 8db9429656..6a5df49bdb 100644 --- a/src/interfaces/base-job-options.ts +++ b/src/interfaces/base-job-options.ts @@ -113,7 +113,7 @@ export interface BaseJobOptions extends DefaultJobOptions { prevMillis?: number; /** - * Telemetry propagation + * TelemetryMetadata, provide for context propagation. */ - tm?: Record; + telemetryMetadata?: Record; } diff --git a/src/types/job-options.ts b/src/types/job-options.ts index d85b6851f9..9f3bf9be89 100644 --- a/src/types/job-options.ts +++ b/src/types/job-options.ts @@ -20,11 +20,6 @@ export type JobsOptions = BaseJobOptions & { * If true, removes the job from its parent dependencies when it fails after all attempts. */ removeDependencyOnFailure?: boolean; - - /** - * TelemetryMetadata, provide for context propagation. - */ - tm?: Record; }; /** @@ -55,4 +50,9 @@ export type RedisJobOptions = BaseJobOptions & { * If true, removes the job from its parent dependencies when it fails after all attempts. */ rdof?: boolean; + + /** + * TelemetryMetadata, provide for context propagation. + */ + tm?: Record; }; diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts index bee12725a6..fb4ba3a0b5 100644 --- a/tests/test_telemetry_interface.ts +++ b/tests/test_telemetry_interface.ts @@ -1,4 +1,4 @@ -import { expect } from 'chai'; +import { expect, assert } from 'chai'; import { default as IORedis } from 'ioredis'; import { after, beforeEach, describe, it, before } from 'mocha'; import { v4 } from 'uuid'; @@ -38,7 +38,6 @@ describe('Telemetry', () => { constructor() { this.trace = new MockTrace(); this.contextManager = new MockContextManager(); - this.propagation = new MockPropagation(); } } @@ -130,17 +129,6 @@ describe('Telemetry', () => { end(): void {} } - class MockPropagation implements Propagation { - inject(context: Context, carrier: T): void {} - - extract(context: Context, carrier: T): Context { - const newContext = { ...context }; - newContext['extractedFunction'] = () => {}; - - return newContext; - } - } - let telemetryClient; let queue: Queue; @@ -186,30 +174,28 @@ describe('Telemetry', () => { }); it('should correctly handle errors and record them in telemetry', async () => { - const addStub = sinon - .stub(queue, 'add') - .rejects(new Error('Simulated error')); - - const span = telemetryClient.trace - .getTracer('testtracer') - .startSpan('Queue.add.error') as MockSpan; - const recordExceptionSpy = sinon.spy(span, 'recordException'); - - const activeContext = telemetryClient.contextManager.active(); - activeContext['getSpan'] = () => span; + const opts = { + repeat: { + endDate: 1, + }, + }; + + const recordExceptionSpy = sinon.spy( + MockSpan.prototype, + 'recordException', + ); try { - await queue.add('testJob', { foo: 'bar' }); - - expect.fail('Expected an error to be thrown'); - } catch (error) { - span.recordException(error); - - sinon.assert.calledOnce(recordExceptionSpy); - const [exception] = recordExceptionSpy.firstCall.args; - expect(exception?.message).to.equal('Simulated error'); + await queue.add('testJob', { someData: 'testData' }, opts); + } catch (e) { + assert(recordExceptionSpy.calledOnce); + const recordedError = recordExceptionSpy.firstCall.args[0]; + assert.equal( + recordedError.message, + 'End date must be greater than current timestamp', + ); } finally { - addStub.restore(); + recordExceptionSpy.restore(); } }); }); @@ -238,35 +224,26 @@ describe('Telemetry', () => { }); it('should correctly handle errors and record them in telemetry for addBulk', async () => { - const jobs = [ - { name: 'job1', data: { foo: 'bar' } }, - { name: 'job2', data: { baz: 'qux' } }, - ]; - - const addBulkStub = sinon - .stub(queue.Job, 'createBulk') - .rejects(new Error('Simulated bulk error')); - - const span = telemetryClient.trace - .getTracer('testtracer') - .startSpan('Queue.addBulk.error') as MockSpan; - const recordExceptionSpy = sinon.spy(span, 'recordException'); - - const activeContext = telemetryClient.contextManager.active(); - activeContext['getSpan'] = () => span; + const recordExceptionSpy = sinon.spy( + MockSpan.prototype, + 'recordException', + ); try { - await queue.addBulk(jobs); - - expect.fail('Expected an error to be thrown'); - } catch (error) { - span.recordException(error); - - sinon.assert.calledOnce(recordExceptionSpy); - const [exception] = recordExceptionSpy.firstCall.args; - expect(exception?.message).to.equal('Simulated bulk error'); + await queue.addBulk([ + { name: 'testJob1', data: { someData: 'testData1' } }, + { + name: 'testJob2', + data: { someData: 'testData2' }, + opts: { jobId: '0' }, + }, + ]); + } catch (e) { + assert(recordExceptionSpy.calledOnce); + const recordedError = recordExceptionSpy.firstCall.args[0]; + assert.equal(recordedError.message, 'Custom Ids cannot be integers'); } finally { - addBulkStub.restore(); + recordExceptionSpy.restore(); } }); }); From 846e3ef1f9e561f29d0354b44a882560d6ec3ae9 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Tue, 24 Sep 2024 00:07:25 +0200 Subject: [PATCH 16/26] feat(worker): minor changes --- src/classes/worker.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 0ce0b0bbc0..071fc2cc3a 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -404,7 +404,7 @@ export class Worker< async run() { await this.trace( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.run`, async span => { span?.setAttributes({ @@ -533,7 +533,7 @@ export class Worker< */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { return this.trace>( - SpanKind.CONSUMER, + SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.getNextJob`, async span => { const nextJob = await this._getNextJob( @@ -783,7 +783,7 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { - const { telemetryMetadata: dstPropagationMedatada } = job.opts; + const { telemetryMetadata: srcPropagationMedatada } = job.opts; return this.trace>( SpanKind.CONSUMER, @@ -862,7 +862,7 @@ will never work with more accuracy than 1ms. */ jobsInProgress.delete(inProgressItem); } }, - dstPropagationMedatada, + srcPropagationMedatada, ); } From fc733e90bf45e0aa7dcc81c97f0e5e4725fefca7 Mon Sep 17 00:00:00 2001 From: fgozdz Date: Wed, 9 Oct 2024 04:34:23 +0200 Subject: [PATCH 17/26] feat(queue, worker): distributed tracing --- src/classes/queue-base.ts | 52 +++++++++++---------- src/interfaces/base-job-options.ts | 4 +- src/interfaces/telemetry.ts | 37 +++++++-------- src/types/job-options.ts | 4 +- tests/test_telemetry_interface.ts | 72 +++++++++++++++++++----------- 5 files changed, 90 insertions(+), 79 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 6e7420aa23..b97a832fd1 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,13 +1,10 @@ import { EventEmitter } from 'events'; import { + Carrier, QueueBaseOptions, RedisClient, Span, Tracer, - SetSpan, - ContextManager, - Propagation, - Context, } from '../interfaces'; import { MinimalQueue } from '../types'; import { @@ -209,45 +206,46 @@ export class QueueBase extends EventEmitter implements MinimalQueue { protected async trace( spanKind: SpanKind, getSpanName: () => string, - callback: ( - span?: Span, - dstPropagationMetadata?: Record, - ) => Promise | T, - srcPropagationMetadata?: Record, + callback: (span?: Span, dstPropagationMetadata?: Carrier) => Promise | T, + srcPropagationMetadata?: Carrier, ) { if (!this.tracer) { return callback(); } - const span = this.tracer.startSpan(getSpanName(), { - kind: spanKind, - }); + let currentContext; + if (srcPropagationMetadata) { + currentContext = this.opts.telemetry.contextManager.fromMetadata( + this.opts.telemetry.contextManager.active(), + srcPropagationMetadata, + ); + } + + const span = this.tracer.startSpan( + getSpanName(), + { + kind: spanKind, + }, + currentContext, + ); try { span.setAttributes({ [TelemetryAttributes.QueueName]: this.name, }); - let activeContext = this.opts.telemetry.contextManager.active(); - if (srcPropagationMetadata) { - activeContext = this.opts.telemetry.contextManager.fromMetadata( - activeContext, - srcPropagationMetadata, + let dstPropagationMetadata: undefined | Carrier; + if (spanKind === SpanKind.PRODUCER) { + currentContext = this.opts.telemetry.trace.setSpan( + this.opts.telemetry.contextManager.active(), + span, ); - } - let dstPropagationMetadata: undefined | Record; - if (spanKind === SpanKind.PRODUCER) { dstPropagationMetadata = - this.opts.telemetry.contextManager.getMetadata(activeContext); + this.opts.telemetry.contextManager.getMetadata(currentContext); } - const messageContext = this.opts.telemetry.trace.setSpan( - activeContext, - span, - ); - - return await this.opts.telemetry.contextManager.with(messageContext, () => + return await this.opts.telemetry.contextManager.with(currentContext, () => callback(span, dstPropagationMetadata), ); } catch (err) { diff --git a/src/interfaces/base-job-options.ts b/src/interfaces/base-job-options.ts index 6a5df49bdb..df6770b193 100644 --- a/src/interfaces/base-job-options.ts +++ b/src/interfaces/base-job-options.ts @@ -1,4 +1,4 @@ -import { RepeatOptions, KeepJobs, BackoffOptions } from './'; +import { RepeatOptions, KeepJobs, BackoffOptions, Carrier } from './'; export interface DefaultJobOptions { /** @@ -115,5 +115,5 @@ export interface BaseJobOptions extends DefaultJobOptions { /** * TelemetryMetadata, provide for context propagation. */ - telemetryMetadata?: Record; + telemetryMetadata?: Carrier; } diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index c9a5f74c04..23e22abab9 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -1,13 +1,12 @@ import { SpanKind } from '../enums'; -export interface Telemetry { +export interface Telemetry { trace: Trace; contextManager: ContextManager; tracerName: string; - propagation: Propagation; } -export interface ContextManager { +export interface ContextManager { /** * Creates a new context and sets it as active for the fn passed as last argument * @@ -19,26 +18,27 @@ export interface ContextManager { fn: A, ): ReturnType; active(): Context; - getMetadata(context: Context): Record; - fromMetadata( - activeContext: Context, - metadata: Record, - ): Context; + getMetadata(context: Context): Carrier; + fromMetadata(activeContext: Context, metadata: Carrier): Context; } -export interface Trace { +export interface Carrier { + traceparent?: string; + tracestate?: string; +} + +export interface Trace { getTracer(name: string, version?: string): Tracer; setSpan: SetSpan; } -export type SetSpan = (context: Context, span: Span) => Context; - -export interface Context { - [key: string]: Function; -} +export type SetSpan = ( + context: Context, + span: Span, +) => Context; -export interface Tracer { - startSpan(name: string, options?: SpanOptions): Span; +export interface Tracer { + startSpan(name: string, options?: SpanOptions, context?: Context): Span; } export interface SpanOptions { @@ -98,8 +98,3 @@ interface NameException { export type Time = HighResolutionTime | number | Date; type HighResolutionTime = [number, number]; - -export interface Propagation { - inject(context: Context, carrier: T): void; - extract(context: Context, carrier: T): Context; -} diff --git a/src/types/job-options.ts b/src/types/job-options.ts index 9f3bf9be89..800b14f565 100644 --- a/src/types/job-options.ts +++ b/src/types/job-options.ts @@ -1,4 +1,4 @@ -import { BaseJobOptions, DebounceOptions } from '../interfaces'; +import { BaseJobOptions, Carrier, DebounceOptions } from '../interfaces'; export type JobsOptions = BaseJobOptions & { /** @@ -54,5 +54,5 @@ export type RedisJobOptions = BaseJobOptions & { /** * TelemetryMetadata, provide for context propagation. */ - tm?: Record; + tm?: Carrier; }; diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts index fb4ba3a0b5..4126617630 100644 --- a/tests/test_telemetry_interface.ts +++ b/tests/test_telemetry_interface.ts @@ -8,7 +8,6 @@ import { Telemetry, Trace, ContextManager, - Propagation, Tracer, Span, SpanOptions, @@ -16,10 +15,9 @@ import { Exception, Time, SpanContext, - Context, } from '../src/interfaces'; -import { SpanKind, TelemetryAttributes } from '../src/enums'; import * as sinon from 'sinon'; +import { SpanKind, TelemetryAttributes } from '../src/enums'; describe('Telemetry', () => { type ExtendedException = Exception & { @@ -29,32 +27,31 @@ describe('Telemetry', () => { const redisHost = process.env.REDIS_HOST || 'localhost'; const prefix = process.env.BULLMQ_TEST_PREFIX || 'bull'; - class MockTelemetry implements Telemetry { - public trace: Trace; - public contextManager: ContextManager; - public propagation: Propagation; - public tracerName = 'mockTracer'; + class MockTelemetry implements Telemetry { + public trace: Trace; + public contextManager: ContextManager; + public tracerName: string; - constructor() { + constructor(name: string) { this.trace = new MockTrace(); this.contextManager = new MockContextManager(); + this.tracerName = name; } } - class MockTrace implements Trace { + class MockTrace implements Trace { getTracer(): Tracer { return new MockTracer(); } setSpan(context: Context, span: Span): Context { - const newContext = { ...context }; - newContext['getSpan'] = () => span; - return newContext; + context['getSpan'] = () => span; + return { ...context, getMetadata_span: span['name'] }; } } - class MockContextManager implements ContextManager { - private activeContext: Context = {}; + class MockContextManager implements ContextManager { + private activeContext: Context = {} as Context; with any>( context: Context, @@ -70,10 +67,10 @@ describe('Telemetry', () => { getMetadata(context: Context): Record { const metadata: Record = {}; - Object.keys(context).forEach(key => { + Object.keys(context as object).forEach(key => { if (key.startsWith('getMetadata_')) { - const value = (context[key] as () => string)(); - metadata[key.replace('getMetadata_', '')] = value; + const value = context[key]; + metadata[key] = value; } }); return metadata; @@ -85,7 +82,7 @@ describe('Telemetry', () => { ): Context { const newContext = { ...activeContext }; Object.keys(metadata).forEach(key => { - newContext[`getMetadata_${key}`] = () => metadata[key]; + newContext[key] = () => metadata[key]; }); return newContext; } @@ -141,7 +138,7 @@ describe('Telemetry', () => { beforeEach(async function () { queueName = `test-${v4()}`; - telemetryClient = new MockTelemetry(); + telemetryClient = new MockTelemetry('mockTracer'); queue = new Queue(queueName, { connection, prefix, @@ -164,7 +161,6 @@ describe('Telemetry', () => { const activeContext = telemetryClient.contextManager.active(); const span = activeContext.getSpan?.() as MockSpan; - expect(span).to.be.an.instanceOf(MockSpan); expect(span.name).to.equal(`${queueName}.testJob Queue.add`); expect(span.options?.kind).to.equal(SpanKind.PRODUCER); @@ -211,7 +207,6 @@ describe('Telemetry', () => { const activeContext = telemetryClient.contextManager.active(); const span = activeContext.getSpan?.() as MockSpan; - expect(span).to.be.an.instanceOf(MockSpan); expect(span.name).to.equal(`${queueName} Queue.addBulk`); expect(span.options?.kind).to.equal(SpanKind.PRODUCER); @@ -250,21 +245,23 @@ describe('Telemetry', () => { describe('Worker.processJob', async () => { it('should correctly interact with telemetry when processing a job', async () => { + const job = await queue.add('testJob', { foo: 'bar' }); + const worker = new Worker(queueName, async () => 'some result', { connection, telemetry: telemetryClient, }); + await worker.waitUntilReady(); + const moveToCompletedStub = sinon.stub(job, 'moveToCompleted').resolves(); - const job = await queue.add('testJob', { foo: 'bar' }); - const token = 'some-token'; + const startSpanSpy = sinon.spy(worker.tracer, 'startSpan'); - const moveToCompletedStub = sinon.stub(job, 'moveToCompleted').resolves(); + const token = 'some-token'; await worker.processJob(job, token, () => false, new Set()); - const activeContext = telemetryClient.contextManager.active(); - const span = activeContext.getSpan?.() as MockSpan; + const span = startSpanSpy.returnValues[0] as MockSpan; expect(span).to.be.an.instanceOf(MockSpan); expect(span.name).to.equal(`${queueName} ${worker.id} Worker.processJob`); @@ -276,5 +273,26 @@ describe('Telemetry', () => { moveToCompletedStub.restore(); await worker.close(); }); + + it('should propagate context correctly between queue and worker using telemetry', async () => { + const job = await queue.add('testJob', { foo: 'bar' }); + + const worker = new Worker(queueName, async () => 'some result', { + connection, + telemetry: telemetryClient, + }); + await worker.waitUntilReady(); + + const moveToCompletedStub = sinon.stub(job, 'moveToCompleted').resolves(); + + await worker.processJob(job, 'some-token', () => false, new Set()); + + const workerActiveContext = telemetryClient.contextManager.active(); + const queueActiveContext = telemetryClient.contextManager.active(); + expect(workerActiveContext).to.equal(queueActiveContext); + + moveToCompletedStub.restore(); + await worker.close(); + }); }); }); From 7261e4056b6f2a914111356bda797e0b72da0d9c Mon Sep 17 00:00:00 2001 From: fgozdz Date: Thu, 10 Oct 2024 12:48:34 +0200 Subject: [PATCH 18/26] feat(worker): job error handling, do not trace certain methods until there is a point to --- src/classes/worker.ts | 75 +++++++++++++++++-------------- src/enums/telemetry-attributes.ts | 3 ++ 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/classes/worker.ts b/src/classes/worker.ts index 8fa9c7f20c..ca34f5f6be 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -817,6 +817,10 @@ will never work with more accuracy than 1ms. */ fetchNextCallback = () => true, jobsInProgress: Set<{ job: Job; ts: number }>, ): Promise> { + if (!job || this.closing || this.paused) { + return; + } + const { telemetryMetadata: srcPropagationMedatada } = job.opts; return this.trace>( @@ -829,10 +833,6 @@ will never work with more accuracy than 1ms. */ [TelemetryAttributes.JobId]: job.id, }); - if (!job || this.closing || this.paused) { - return; - } - const handleCompleted = async (result: ResultType) => { if (!this.connection.closing) { const completed = await job.moveToCompleted( @@ -891,7 +891,15 @@ will never work with more accuracy than 1ms. */ const result = await this.callProcessJob(job, token); return await handleCompleted(result); } catch (err) { - return handleFailed(err); + const failed = await handleFailed(err); + + span?.setAttributes({ + [TelemetryAttributes.JobFinishedTimestamp]: job.finishedOn, + [TelemetryAttributes.JobProcessedTimestamp]: job.processedOn, + [TelemetryAttributes.JobFailedReason]: job.failedReason, + }); + + return failed; } finally { jobsInProgress.delete(inProgressItem); } @@ -934,20 +942,20 @@ will never work with more accuracy than 1ms. */ * Resumes processing of this worker (if paused). */ resume(): void { - this.trace( - SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.resume`, - span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); + if (this.resumeWorker) { + this.trace( + SpanKind.INTERNAL, + () => `${this.name} ${this.id} Worker.resume`, + span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + }); - if (this.resumeWorker) { this.resumeWorker(); this.emit('resumed'); - } - }, - ); + }, + ); + } } /** @@ -982,6 +990,10 @@ will never work with more accuracy than 1ms. */ * @returns Promise that resolves when the worker has been closed. */ async close(force = false): Promise { + if (this.closing) { + return this.closing; + } + await this.trace( SpanKind.INTERNAL, () => `${this.name} ${this.id} Worker.close`, @@ -991,9 +1003,6 @@ will never work with more accuracy than 1ms. */ [TelemetryAttributes.WorkerForceClose]: force, }); - if (this.closing) { - return this.closing; - } this.closing = (async () => { this.emit('closing', 'closing queue'); this.abortDelayController?.abort(); @@ -1044,18 +1053,18 @@ will never work with more accuracy than 1ms. */ * @see {@link https://docs.bullmq.io/patterns/manually-fetching-jobs} */ async startStalledCheckTimer(): Promise { - await this.trace( - SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); + if (!this.opts.skipStalledCheck) { + clearTimeout(this.stalledCheckTimer); - if (!this.opts.skipStalledCheck) { - clearTimeout(this.stalledCheckTimer); + if (!this.closing) { + await this.trace( + SpanKind.INTERNAL, + () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + }); - if (!this.closing) { try { await this.checkConnectionError(() => this.moveStalledJobsToWait(), @@ -1066,10 +1075,10 @@ will never work with more accuracy than 1ms. */ } catch (err) { this.emit('error', err); } - } - } - }, - ); + }, + ); + } + } } private startLockExtenderTimer( diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index 734aaadb29..088d525ab9 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -22,6 +22,9 @@ export enum TelemetryAttributes { WorkerForceClose = 'bullmq.worker.force.close', WorkerStalledJobs = 'bullmq.worker.stalled.jobs', WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', + JobFinishedTimestamp = 'bullmq.job.finished.timestamp', + JobProcessedTimestamp = 'bullmq.job.processed.timestamp', + JobFailedReason = 'bullmq.job.failed.reason', } export enum SpanKind { From 677fcc6e1ab41781fdce78bd9790b1f3a597045a Mon Sep 17 00:00:00 2001 From: fgozdz Date: Fri, 11 Oct 2024 10:33:20 +0200 Subject: [PATCH 19/26] feat(queue-base): add propagation for internal spanKind and setSpan for every SpanKind --- src/classes/queue-base.ts | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index b97a832fd1..09df692ac1 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -213,10 +213,12 @@ export class QueueBase extends EventEmitter implements MinimalQueue { return callback(); } - let currentContext; + const currentContext = this.opts.telemetry.contextManager.active(); + + let parentContext; if (srcPropagationMetadata) { - currentContext = this.opts.telemetry.contextManager.fromMetadata( - this.opts.telemetry.contextManager.active(), + parentContext = this.opts.telemetry.contextManager.fromMetadata( + currentContext, srcPropagationMetadata, ); } @@ -226,7 +228,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { { kind: spanKind, }, - currentContext, + parentContext, ); try { @@ -234,18 +236,27 @@ export class QueueBase extends EventEmitter implements MinimalQueue { [TelemetryAttributes.QueueName]: this.name, }); + let messageContext; let dstPropagationMetadata: undefined | Carrier; + if (spanKind === SpanKind.PRODUCER) { - currentContext = this.opts.telemetry.trace.setSpan( - this.opts.telemetry.contextManager.active(), + messageContext = this.opts.telemetry.trace.setSpan( + currentContext, span, ); dstPropagationMetadata = - this.opts.telemetry.contextManager.getMetadata(currentContext); + this.opts.telemetry.contextManager.getMetadata(messageContext); + } else if (spanKind === SpanKind.INTERNAL) { + messageContext = this.opts.telemetry.trace.setSpan( + currentContext, + span, + ); + } else if (spanKind === SpanKind.CONSUMER) { + messageContext = this.opts.telemetry.trace.setSpan(parentContext, span); } - return await this.opts.telemetry.contextManager.with(currentContext, () => + return await this.opts.telemetry.contextManager.with(messageContext, () => callback(span, dstPropagationMetadata), ); } catch (err) { From 51a353edd464ee84f968a904bed1de38c561053f Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Mon, 28 Oct 2024 16:22:35 +0100 Subject: [PATCH 20/26] refactor(telemetry): several improvements and small changes --- src/classes/flow-producer.ts | 1 + src/classes/job.ts | 136 ++++++++++++++++++----------- src/classes/queue-base.ts | 39 +++------ src/classes/queue.ts | 32 +++---- src/classes/worker.ts | 97 ++++++++++++-------- src/commands/updateJobOption-1.lua | 26 ++++++ src/enums/telemetry-attributes.ts | 2 + src/interfaces/base-job-options.ts | 4 +- src/interfaces/telemetry.ts | 46 +++------- src/types/job-options.ts | 4 +- src/types/minimal-queue.ts | 1 + 11 files changed, 221 insertions(+), 167 deletions(-) create mode 100644 src/commands/updateJobOption-1.lua diff --git a/src/classes/flow-producer.ts b/src/classes/flow-producer.ts index 5753aaec07..c7d70fa6dc 100644 --- a/src/classes/flow-producer.ts +++ b/src/classes/flow-producer.ts @@ -456,6 +456,7 @@ export class FlowProducer extends EventEmitter { emit: this.emit.bind(this) as any, on: this.on.bind(this) as any, redisVersion: this.connection.redisVersion, + trace: async (): Promise => {}, }; } diff --git a/src/classes/job.ts b/src/classes/job.ts index 3e18bbefc3..f65e188b3f 100644 --- a/src/classes/job.ts +++ b/src/classes/job.ts @@ -34,6 +34,7 @@ import { Backoffs } from './backoffs'; import { Scripts, raw2NextJobData } from './scripts'; import { UnrecoverableError } from './errors/unrecoverable-error'; import type { QueueEvents } from './queue-events'; +import { SpanKind } from '../enums'; const logger = debuglog('bull'); @@ -658,6 +659,28 @@ export class Job< return result; } + private async shouldRetryJob(err: Error): Promise<[boolean, number]> { + if ( + this.attemptsMade + 1 < this.opts.attempts && + !this.discarded && + !(err instanceof UnrecoverableError || err.name == 'UnrecoverableError') + ) { + const opts = this.queue.opts as WorkerOptions; + + const delay = await Backoffs.calculate( + this.opts.backoff, + this.attemptsMade + 1, + err, + this, + opts.settings && opts.settings.backoffStrategy, + ); + + return [delay == -1 ? false : true, delay == -1 ? 0 : delay]; + } else { + return [false, 0]; + } + } + /** * Moves a job to the failed queue. * @@ -674,7 +697,6 @@ export class Job< const client = await this.queue.client; const message = err?.message; - const queue = this.queue; this.failedReason = message; let command: string; @@ -685,32 +707,15 @@ export class Job< // // Check if an automatic retry should be performed // - let moveToFailed = false; - let finishedOn, delay; - if ( - this.attemptsMade + 1 < this.opts.attempts && - !this.discarded && - !(err instanceof UnrecoverableError || err.name == 'UnrecoverableError') - ) { - const opts = queue.opts as WorkerOptions; - - // Check if backoff is needed - delay = await Backoffs.calculate( - this.opts.backoff, - this.attemptsMade + 1, - err, - this, - opts.settings && opts.settings.backoffStrategy, - ); - - if (delay === -1) { - moveToFailed = true; - } else if (delay) { + let finishedOn: number; + const [shouldRetry, retryDelay] = await this.shouldRetryJob(err); + if (shouldRetry) { + if (retryDelay) { const args = this.scripts.moveToDelayedArgs( this.id, Date.now(), token, - delay, + retryDelay, ); (multi).moveToDelayed(args); command = 'moveToDelayed'; @@ -722,11 +727,6 @@ export class Job< command = 'retryJob'; } } else { - // If not, move to failed - moveToFailed = true; - } - - if (moveToFailed) { const args = this.scripts.moveToFailedArgs( this, message, @@ -739,37 +739,67 @@ export class Job< command = 'moveToFinished'; } - const results = await multi.exec(); - const anyError = results.find(result => result[0]); - if (anyError) { - throw new Error( - `Error "moveToFailed" with command ${command}: ${anyError}`, - ); - } + await this.queue.trace>( + SpanKind.INTERNAL, + () => this.getSpanName(command), + async (span, srcPropagationMedatada) => { + if (srcPropagationMedatada) { + (multi).updateJobOption([ + this.toKey(this.id), + 'tm', + srcPropagationMedatada, + ]); + } - const result = results[results.length - 1][1] as number; - if (result < 0) { - throw this.scripts.finishedErrors({ - code: result, - jobId: this.id, - command, - state: 'active', - }); - } + const results = await multi.exec(); + const anyError = results.find(result => result[0]); + if (anyError) { + throw new Error( + `Error "moveToFailed" with command ${command}: ${anyError}`, + ); + } - if (finishedOn && typeof finishedOn === 'number') { - this.finishedOn = finishedOn; - } + const result = results[results.length - 1][1] as number; + if (result < 0) { + throw this.scripts.finishedErrors({ + code: result, + jobId: this.id, + command, + state: 'active', + }); + } - if (delay && typeof delay === 'number') { - this.delay = delay; - } + if (finishedOn && typeof finishedOn === 'number') { + this.finishedOn = finishedOn; + } - this.attemptsMade += 1; + if (retryDelay && typeof retryDelay === 'number') { + this.delay = retryDelay; + } + + this.attemptsMade += 1; + + if (Array.isArray(result)) { + return raw2NextJobData(result); + } + }, + ); + } - if (Array.isArray(result)) { - return raw2NextJobData(result); + private getSpanName(command: string) { + let operation; + switch (command) { + case 'moveToDelayed': + operation = 'delay'; + break; + case 'retryJob': + operation = 'retry'; + break; + case 'moveToFinished': + operation = 'fail'; + break; } + return `${operation} ${this.queue.name}`; } /** diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 09df692ac1..609f719adf 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -1,11 +1,5 @@ import { EventEmitter } from 'events'; -import { - Carrier, - QueueBaseOptions, - RedisClient, - Span, - Tracer, -} from '../interfaces'; +import { QueueBaseOptions, RedisClient, Span, Tracer } from '../interfaces'; import { MinimalQueue } from '../types'; import { delay, @@ -92,7 +86,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { this.setScripts(); if (opts?.telemetry) { - this.tracer = opts.telemetry.trace.getTracer(opts.telemetry.tracerName); + this.tracer = opts.telemetry.tracer; } } @@ -203,11 +197,11 @@ export class QueueBase extends EventEmitter implements MinimalQueue { * @param srcPropagationMedatada - * @returns */ - protected async trace( + async trace( spanKind: SpanKind, getSpanName: () => string, - callback: (span?: Span, dstPropagationMetadata?: Carrier) => Promise | T, - srcPropagationMetadata?: Carrier, + callback: (span?: Span, dstPropagationMetadata?: string) => Promise | T, + srcPropagationMetadata?: string, ) { if (!this.tracer) { return callback(); @@ -223,8 +217,9 @@ export class QueueBase extends EventEmitter implements MinimalQueue { ); } + const spanName = getSpanName(); const span = this.tracer.startSpan( - getSpanName(), + spanName, { kind: spanKind, }, @@ -237,23 +232,17 @@ export class QueueBase extends EventEmitter implements MinimalQueue { }); let messageContext; - let dstPropagationMetadata: undefined | Carrier; + let dstPropagationMetadata: undefined | string; - if (spanKind === SpanKind.PRODUCER) { - messageContext = this.opts.telemetry.trace.setSpan( - currentContext, - span, - ); + if (spanKind === SpanKind.CONSUMER) { + messageContext = span.setSpanOnContext(parentContext); + } else { + messageContext = span.setSpanOnContext(currentContext); + } + if (callback.length == 2) { dstPropagationMetadata = this.opts.telemetry.contextManager.getMetadata(messageContext); - } else if (spanKind === SpanKind.INTERNAL) { - messageContext = this.opts.telemetry.trace.setSpan( - currentContext, - span, - ); - } else if (spanKind === SpanKind.CONSUMER) { - messageContext = this.opts.telemetry.trace.setSpan(parentContext, span); } return await this.opts.telemetry.contextManager.with(messageContext, () => diff --git a/src/classes/queue.ts b/src/classes/queue.ts index 68070cce64..f12925ed8e 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -239,7 +239,7 @@ export class Queue< ): Promise> { return this.trace>( SpanKind.PRODUCER, - () => `${this.name}.${name} Queue.add`, + () => `add ${this.name}.${name}`, async (span, srcPropagationMedatada) => { if (srcPropagationMedatada) { opts = { ...opts, telemetryMetadata: srcPropagationMedatada }; @@ -300,7 +300,7 @@ export class Queue< ): Promise[]> { return this.trace[]>( SpanKind.PRODUCER, - () => `${this.name} Queue.addBulk`, + () => `addBulk ${this.name}`, async (span, srcPropagationMedatada) => { span?.setAttributes({ [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), @@ -381,7 +381,7 @@ export class Queue< async pause(): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.pause`, + () => `pause ${this.name}`, async () => { await this.scripts.pause(true); @@ -397,7 +397,7 @@ export class Queue< async close(): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.close`, + () => `close ${this.name}`, async () => { if (!this.closing) { if (this._repeat) { @@ -418,7 +418,7 @@ export class Queue< async resume(): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.resume`, + () => `resume ${this.name}`, async () => { await this.scripts.pause(false); @@ -500,7 +500,7 @@ export class Queue< ): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} ${name} Queue.removeRepeatable`, + () => `.removeRepeatable ${this.name}.${name}`, async () => { const repeat = await this.repeat; const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); @@ -534,7 +534,7 @@ export class Queue< async removeDebounceKey(id: string): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} ${id} Queue.removeDebounceKey`, + () => `removeDebounceKey ${this.name}.${id}`, async () => { const client = await this.client; @@ -569,7 +569,7 @@ export class Queue< async removeRepeatableByKey(key: string): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} ${key} Queue.removeRepeatableByKey`, + () => `removeRepeatableByKey ${this.name}.${key} `, async span => { span?.setAttributes({ [TelemetryAttributes.JobKey]: key, @@ -595,7 +595,7 @@ export class Queue< async remove(jobId: string, { removeChildren = true } = {}): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} ${jobId} Queue.remove`, + () => `remove ${this.name}.${jobId}`, async span => { span?.setAttributes({ [TelemetryAttributes.JobId]: jobId, @@ -621,7 +621,7 @@ export class Queue< ): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.updateJobProgress`, + () => `updateJobProgress ${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.JobId]: jobId, @@ -660,7 +660,7 @@ export class Queue< async drain(delayed = false): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.drain`, + () => `drain ${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.QueueDrainDelay]: delayed, @@ -695,7 +695,7 @@ export class Queue< ): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.clean`, + () => `clean ${this.name}`, async span => { const maxCount = limit || Infinity; const maxCountPerCall = Math.min(10000, maxCount); @@ -746,7 +746,7 @@ export class Queue< async obliterate(opts?: ObliterateOpts): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.obliterate`, + () => `obliterate ${this.name}`, async () => { await this.pause(); @@ -777,7 +777,7 @@ export class Queue< ): Promise { await this.trace( SpanKind.PRODUCER, - () => `${this.name} Queue.retryJobs`, + () => `retryJobs ${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), @@ -806,7 +806,7 @@ export class Queue< async promoteJobs(opts: { count?: number } = {}): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.promoteJobs`, + () => `promoteJobs ${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), @@ -828,7 +828,7 @@ export class Queue< async trimEvents(maxLength: number): Promise { return this.trace( SpanKind.INTERNAL, - () => `${this.name} Queue.trimEvents`, + () => `trimEvents ${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.QueueEventMaxLength]: maxLength, diff --git a/src/classes/worker.ts b/src/classes/worker.ts index ca34f5f6be..b8d49684ba 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -184,7 +184,8 @@ export class Worker< private extendLocksTimer: NodeJS.Timeout | null = null; private limitUntil = 0; private resumeWorker: () => void; - private stalledCheckTimer: NodeJS.Timeout; + + private stalledCheckStopper?: () => void; private waiting: Promise | null = null; private _repeat: Repeat; // To be deprecated in v6 in favor of Job Scheduler @@ -424,11 +425,10 @@ export class Worker< async run() { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.run`, + () => this.getSpanName('run'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerOptions]: JSON.stringify(this.opts), }); if (!this.processFn) { @@ -553,7 +553,7 @@ export class Worker< async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { return this.trace>( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.getNextJob`, + () => this.getSpanName('getNextJob'), async span => { const nextJob = await this._getNextJob( await this.client, @@ -633,7 +633,7 @@ export class Worker< async rateLimit(expireTimeMs: number): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.rateLimit`, + () => this.getSpanName('rateLimit'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -825,7 +825,7 @@ will never work with more accuracy than 1ms. */ return this.trace>( SpanKind.CONSUMER, - () => `${this.name} ${this.id} Worker.processJob`, + () => this.getSpanName('process'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -841,6 +841,11 @@ will never work with more accuracy than 1ms. */ fetchNextCallback() && !(this.closing || this.paused), ); this.emit('completed', job, result, 'active'); + + span?.addEvent('job completed', { + [TelemetryAttributes.JobResult]: JSON.stringify(result), + }); + const [jobData, jobId, limitUntil, delayUntil] = completed || []; this.updateDelays(limitUntil, delayUntil); @@ -851,6 +856,7 @@ will never work with more accuracy than 1ms. */ const handleFailed = async (err: Error) => { if (!this.connection.closing) { try { + // Check if the job was manually rate-limited if (err.message == RATE_LIMIT_ERROR) { this.limitUntil = await this.moveLimitedBackToWait(job, token); return; @@ -868,6 +874,10 @@ will never work with more accuracy than 1ms. */ const result = await job.moveToFailed(err, token, true); this.emit('failed', job, err, 'active'); + span?.addEvent('job failed', { + [TelemetryAttributes.JobFailedReason]: err.message, + }); + if (result) { const [jobData, jobId, limitUntil, delayUntil] = result; this.updateDelays(limitUntil, delayUntil); @@ -878,13 +888,15 @@ will never work with more accuracy than 1ms. */ // It probably means that the job has lost the lock before completion // A worker will (or already has) moved the job back // to the waiting list (as stalled) + span?.recordException((err).message); } } }; this.emit('active', job, 'waiting'); - const inProgressItem = { job, ts: Date.now() }; + const processedOn = Date.now(); + const inProgressItem = { job, ts: processedOn }; try { jobsInProgress.add(inProgressItem); @@ -892,15 +904,13 @@ will never work with more accuracy than 1ms. */ return await handleCompleted(result); } catch (err) { const failed = await handleFailed(err); - + return failed; + } finally { span?.setAttributes({ - [TelemetryAttributes.JobFinishedTimestamp]: job.finishedOn, - [TelemetryAttributes.JobProcessedTimestamp]: job.processedOn, - [TelemetryAttributes.JobFailedReason]: job.failedReason, + [TelemetryAttributes.JobFinishedTimestamp]: Date.now(), + [TelemetryAttributes.JobProcessedTimestamp]: processedOn, }); - return failed; - } finally { jobsInProgress.delete(inProgressItem); } }, @@ -908,6 +918,10 @@ will never work with more accuracy than 1ms. */ ); } + private getSpanName(operation: string): string { + return `${operation} ${this.name}.${this.opts.name || this.id}`; + } + /** * * Pauses the processing of this queue only for this worker. @@ -915,7 +929,7 @@ will never work with more accuracy than 1ms. */ async pause(doNotWaitActive?: boolean): Promise { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.pause`, + () => this.getSpanName('pause'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -945,7 +959,7 @@ will never work with more accuracy than 1ms. */ if (this.resumeWorker) { this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.resume`, + () => this.getSpanName('resume'), span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -996,7 +1010,7 @@ will never work with more accuracy than 1ms. */ await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.close`, + () => this.getSpanName('close'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -1029,7 +1043,8 @@ will never work with more accuracy than 1ms. */ } clearTimeout(this.extendLocksTimer); - clearTimeout(this.stalledCheckTimer); + //clearTimeout(this.stalledCheckTimer); + this.stalledCheckStopper?.(); this.closed = true; this.emit('closed'); @@ -1054,33 +1069,42 @@ will never work with more accuracy than 1ms. */ */ async startStalledCheckTimer(): Promise { if (!this.opts.skipStalledCheck) { - clearTimeout(this.stalledCheckTimer); - if (!this.closing) { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.startStalledCheckTimer`, + () => this.getSpanName('startStalledCheckTimer'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, }); - try { - await this.checkConnectionError(() => - this.moveStalledJobsToWait(), - ); - this.stalledCheckTimer = setTimeout(async () => { - await this.startStalledCheckTimer(); - }, this.opts.stalledInterval); - } catch (err) { + this.stalledChecker().catch(err => { this.emit('error', err); - } + }); }, ); } } } + private async stalledChecker() { + while (!this.closing) { + try { + await this.checkConnectionError(() => this.moveStalledJobsToWait()); + } catch (err) { + this.emit('error', err); + } + + await new Promise(resolve => { + const timeout = setTimeout(resolve, this.opts.stalledInterval); + this.stalledCheckStopper = () => { + clearTimeout(timeout); + resolve(); + }; + }); + } + } + private startLockExtenderTimer( jobsInProgress: Set<{ job: Job; ts: number }>, ): void { @@ -1162,7 +1186,7 @@ will never work with more accuracy than 1ms. */ protected async extendLocks(jobs: Job[]) { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.extendLocks`, + () => this.getSpanName('extendLocks'), async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -1202,11 +1226,17 @@ will never work with more accuracy than 1ms. */ private async moveStalledJobsToWait() { await this.trace( SpanKind.INTERNAL, - () => `${this.name} ${this.id} Worker.moveStalledJobsToWait`, + () => this.getSpanName('moveStalledJobsToWait'), async span => { const chunkSize = 50; const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerStalledJobs]: stalled, + [TelemetryAttributes.WorkerFailedJobs]: failed, + }); + stalled.forEach((jobId: string) => this.emit('stalled', jobId, 'active'), ); @@ -1227,11 +1257,6 @@ will never work with more accuracy than 1ms. */ } this.notifyFailedJobs(await Promise.all(jobPromises)); - - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerStalledJobs]: stalled, - }); }, ); } diff --git a/src/commands/updateJobOption-1.lua b/src/commands/updateJobOption-1.lua new file mode 100644 index 0000000000..03949faf29 --- /dev/null +++ b/src/commands/updateJobOption-1.lua @@ -0,0 +1,26 @@ +--[[ + Update a job option + + Input: + KEYS[1] Job id key + + ARGV[1] field + ARGV[2] value + + Output: + 0 - OK + -1 - Missing job. +]] +local rcall = redis.call + +if rcall("EXISTS", KEYS[1]) == 1 then -- // Make sure job exists + + local opts = rcall("HGET", KEYS[1], "opts") + local jsonOpts = cjson.decode(opts) + jsonOpts[ARGV[1]] = ARGV[2] + + rcall("HSET", KEYS[1], "opts", cjson.encode(jsonOpts)) + return 0 +else + return -1 +end diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index 088d525ab9..c5dcb6732b 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -21,9 +21,11 @@ export enum TelemetryAttributes { WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', WorkerForceClose = 'bullmq.worker.force.close', WorkerStalledJobs = 'bullmq.worker.stalled.jobs', + WorkerFailedJobs = 'bullmq.worker.failed.jobs', WorkerJobsToExtendLocks = 'bullmq.worker.jobs.to.extend.locks', JobFinishedTimestamp = 'bullmq.job.finished.timestamp', JobProcessedTimestamp = 'bullmq.job.processed.timestamp', + JobResult = 'bullmq.job.result', JobFailedReason = 'bullmq.job.failed.reason', } diff --git a/src/interfaces/base-job-options.ts b/src/interfaces/base-job-options.ts index 9d1cd81ccc..bb10f1caa3 100644 --- a/src/interfaces/base-job-options.ts +++ b/src/interfaces/base-job-options.ts @@ -1,4 +1,4 @@ -import { RepeatOptions, KeepJobs, BackoffOptions, Carrier } from './'; +import { RepeatOptions, KeepJobs, BackoffOptions } from './'; export interface DefaultJobOptions { /** @@ -116,5 +116,5 @@ export interface BaseJobOptions extends DefaultJobOptions { /** * TelemetryMetadata, provide for context propagation. */ - telemetryMetadata?: Carrier; + telemetryMetadata?: string; } diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 23e22abab9..e01c91ccbb 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -1,9 +1,8 @@ import { SpanKind } from '../enums'; export interface Telemetry { - trace: Trace; + tracer: Tracer; contextManager: ContextManager; - tracerName: string; } export interface ContextManager { @@ -18,25 +17,10 @@ export interface ContextManager { fn: A, ): ReturnType; active(): Context; - getMetadata(context: Context): Carrier; - fromMetadata(activeContext: Context, metadata: Carrier): Context; + getMetadata(context: Context): string; + fromMetadata(activeContext: Context, metadata: string): Context; } -export interface Carrier { - traceparent?: string; - tracestate?: string; -} - -export interface Trace { - getTracer(name: string, version?: string): Tracer; - setSpan: SetSpan; -} - -export type SetSpan = ( - context: Context, - span: Span, -) => Context; - export interface Tracer { startSpan(name: string, options?: SpanOptions, context?: Context): Span; } @@ -45,30 +29,26 @@ export interface SpanOptions { kind: SpanKind; } -export interface Span { - setAttribute(key: string, value: Attribute): Span; - setAttributes(attributes: Attributes): Span; +export interface Span { + setSpanOnContext(ctx: Context): void; + setAttribute(key: string, value: AttributeValue): void; + setAttributes(attributes: Attributes): void; + addEvent(name: string, attributes?: Attributes): void; recordException(exception: Exception, time?: Time): void; - spanContext(): SpanContext; end(): void; } -export interface SpanContext { - traceId: string; - spanId: string; -} - export interface Attributes { - [attribute: string]: Attribute | undefined; + [attribute: string]: AttributeValue | undefined; } -export type Attribute = +export type AttributeValue = | string | number | boolean - | null - | undefined - | (null | undefined | string | number | boolean)[]; + | Array + | Array + | Array; export type Exception = string | ExceptionType; diff --git a/src/types/job-options.ts b/src/types/job-options.ts index 9b195f53c6..fb8b74d264 100644 --- a/src/types/job-options.ts +++ b/src/types/job-options.ts @@ -1,4 +1,4 @@ -import { BaseJobOptions, Carrier, DebounceOptions } from '../interfaces'; +import { BaseJobOptions, DebounceOptions } from '../interfaces'; export type JobsOptions = BaseJobOptions & { /** @@ -60,5 +60,5 @@ export type RedisJobOptions = BaseJobOptions & { /** * TelemetryMetadata, provide for context propagation. */ - tm?: Carrier; + tm?: string; }; diff --git a/src/types/minimal-queue.ts b/src/types/minimal-queue.ts index 2f435c07da..f27f0c1b0d 100644 --- a/src/types/minimal-queue.ts +++ b/src/types/minimal-queue.ts @@ -14,4 +14,5 @@ export type MinimalQueue = Pick< | 'emit' | 'on' | 'redisVersion' + | 'trace' >; From 6742a03442f9cbee1f17ee54bfdeb7bb5eeb8048 Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Tue, 29 Oct 2024 11:25:56 +0100 Subject: [PATCH 21/26] fix(telemetry): fix return type of setSpanOnContext --- src/interfaces/telemetry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index e01c91ccbb..975e4c9169 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -30,7 +30,7 @@ export interface SpanOptions { } export interface Span { - setSpanOnContext(ctx: Context): void; + setSpanOnContext(ctx: Context): Context; setAttribute(key: string, value: AttributeValue): void; setAttributes(attributes: Attributes): void; addEvent(name: string, attributes?: Attributes): void; From 167a7dde054c82e4e9511e81bfc015b273a4f39b Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Tue, 29 Oct 2024 11:26:50 +0100 Subject: [PATCH 22/26] fix(job): return result of trace --- src/classes/job.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classes/job.ts b/src/classes/job.ts index f65e188b3f..ec24a71572 100644 --- a/src/classes/job.ts +++ b/src/classes/job.ts @@ -739,7 +739,7 @@ export class Job< command = 'moveToFinished'; } - await this.queue.trace>( + return this.queue.trace>( SpanKind.INTERNAL, () => this.getSpanName(command), async (span, srcPropagationMedatada) => { From 210d36e0486d152a5adc44eeadc69dde6ceb0a4c Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Tue, 29 Oct 2024 11:27:17 +0100 Subject: [PATCH 23/26] test(telemetry): fix broken tests --- tests/test_telemetry_interface.ts | 73 ++++++++++++++----------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts index 4126617630..718257fdbf 100644 --- a/tests/test_telemetry_interface.ts +++ b/tests/test_telemetry_interface.ts @@ -6,7 +6,6 @@ import { Queue, Worker } from '../src/classes'; import { removeAllQueueData } from '../src/utils'; import { Telemetry, - Trace, ContextManager, Tracer, Span, @@ -14,7 +13,6 @@ import { Attributes, Exception, Time, - SpanContext, } from '../src/interfaces'; import * as sinon from 'sinon'; import { SpanKind, TelemetryAttributes } from '../src/enums'; @@ -28,25 +26,18 @@ describe('Telemetry', () => { const prefix = process.env.BULLMQ_TEST_PREFIX || 'bull'; class MockTelemetry implements Telemetry { - public trace: Trace; + public tracer: Tracer; public contextManager: ContextManager; - public tracerName: string; constructor(name: string) { - this.trace = new MockTrace(); + this.tracer = new MockTracer(); this.contextManager = new MockContextManager(); - this.tracerName = name; } } - class MockTrace implements Trace { - getTracer(): Tracer { - return new MockTracer(); - } - - setSpan(context: Context, span: Span): Context { - context['getSpan'] = () => span; - return { ...context, getMetadata_span: span['name'] }; + class MockTracer implements Tracer { + startSpan(name: string, options?: SpanOptions): Span { + return new MockSpan(name, options); } } @@ -65,7 +56,10 @@ describe('Telemetry', () => { return this.activeContext; } - getMetadata(context: Context): Record { + getMetadata(context: Context): string { + if (!context) { + return ''; + } const metadata: Record = {}; Object.keys(context as object).forEach(key => { if (key.startsWith('getMetadata_')) { @@ -73,27 +67,21 @@ describe('Telemetry', () => { metadata[key] = value; } }); - return metadata; + return JSON.stringify(metadata); } - fromMetadata( - activeContext: Context, - metadata: Record, - ): Context { + fromMetadata(activeContext: Context, metadataString: string): Context { const newContext = { ...activeContext }; - Object.keys(metadata).forEach(key => { - newContext[key] = () => metadata[key]; - }); + if (metadataString) { + const metadata = JSON.parse(metadataString); + Object.keys(metadata).forEach(key => { + newContext[key] = () => metadata[key]; + }); + } return newContext; } } - class MockTracer implements Tracer { - startSpan(name: string, options?: SpanOptions): Span { - return new MockSpan(name, options); - } - } - class MockSpan implements Span { attributes: Attributes = {}; name: string; @@ -105,24 +93,25 @@ describe('Telemetry', () => { this.options = options; } - setAttribute(key: string, value: any): Span { + setSpanOnContext(ctx: any): any { + context['getSpan'] = () => this; + return { ...context, getMetadata_span: this['name'] }; + } + + addEvent(name: string, attributes?: Attributes): void {} + + setAttribute(key: string, value: any): void { this.attributes[key] = value; - return this; } - setAttributes(attributes: Attributes): Span { + setAttributes(attributes: Attributes): void { this.attributes = { ...this.attributes, ...attributes }; - return this; } recordException(exception: ExtendedException, time?: Time): void { this.exception = exception; } - spanContext(): SpanContext { - return { traceId: 'mock-trace-id', spanId: 'mock-span-id' }; - } - end(): void {} } @@ -139,6 +128,7 @@ describe('Telemetry', () => { beforeEach(async function () { queueName = `test-${v4()}`; telemetryClient = new MockTelemetry('mockTracer'); + queue = new Queue(queueName, { connection, prefix, @@ -160,9 +150,10 @@ describe('Telemetry', () => { await queue.add('testJob', { foo: 'bar' }); const activeContext = telemetryClient.contextManager.active(); + const span = activeContext.getSpan?.() as MockSpan; expect(span).to.be.an.instanceOf(MockSpan); - expect(span.name).to.equal(`${queueName}.testJob Queue.add`); + expect(span.name).to.equal(`add ${queueName}.testJob`); expect(span.options?.kind).to.equal(SpanKind.PRODUCER); expect(span.attributes[TelemetryAttributes.QueueName]).to.equal( queueName, @@ -208,7 +199,7 @@ describe('Telemetry', () => { const activeContext = telemetryClient.contextManager.active(); const span = activeContext.getSpan?.() as MockSpan; expect(span).to.be.an.instanceOf(MockSpan); - expect(span.name).to.equal(`${queueName} Queue.addBulk`); + expect(span.name).to.equal(`addBulk ${queueName}`); expect(span.options?.kind).to.equal(SpanKind.PRODUCER); expect(span.attributes[TelemetryAttributes.BulkNames]).to.deep.equal( jobs.map(job => job.name), @@ -255,7 +246,7 @@ describe('Telemetry', () => { await worker.waitUntilReady(); const moveToCompletedStub = sinon.stub(job, 'moveToCompleted').resolves(); - const startSpanSpy = sinon.spy(worker.tracer, 'startSpan'); + const startSpanSpy = sinon.spy(worker['tracer'], 'startSpan'); const token = 'some-token'; @@ -264,7 +255,7 @@ describe('Telemetry', () => { const span = startSpanSpy.returnValues[0] as MockSpan; expect(span).to.be.an.instanceOf(MockSpan); - expect(span.name).to.equal(`${queueName} ${worker.id} Worker.processJob`); + expect(span.name).to.equal(`process ${queueName}.${worker.id}`); expect(span.options?.kind).to.equal(SpanKind.CONSUMER); expect(span.attributes[TelemetryAttributes.WorkerId]).to.equal(worker.id); expect(span.attributes[TelemetryAttributes.WorkerToken]).to.equal(token); From 23924d3247e864b0ae25baeac7d1fe6fcb9b084f Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Tue, 29 Oct 2024 12:03:30 +0100 Subject: [PATCH 24/26] chore(job): rename srcPropagationMetadata to dstPropagationMetadata --- src/classes/job.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/classes/job.ts b/src/classes/job.ts index e4097bb1fb..9c5532cdc2 100644 --- a/src/classes/job.ts +++ b/src/classes/job.ts @@ -744,12 +744,12 @@ export class Job< return this.queue.trace>( SpanKind.INTERNAL, () => this.getSpanName(command), - async (span, srcPropagationMedatada) => { - if (srcPropagationMedatada) { + async (span, dstPropagationMedatadata) => { + if (dstPropagationMedatadata) { (multi).updateJobOption([ this.toKey(this.id), 'tm', - srcPropagationMedatada, + dstPropagationMedatadata, ]); } From 49b2c36759ab9e94cbe86d2b3dab1f2e13dcfe40 Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Wed, 30 Oct 2024 11:17:43 +0100 Subject: [PATCH 25/26] docs(telemetry): initial interface documentation --- src/interfaces/telemetry.ts | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/src/interfaces/telemetry.ts b/src/interfaces/telemetry.ts index 975e4c9169..e55c0990dc 100644 --- a/src/interfaces/telemetry.ts +++ b/src/interfaces/telemetry.ts @@ -1,10 +1,37 @@ import { SpanKind } from '../enums'; +/** + * Telemetry interface + * + * This interface allows third-party libraries to integrate their own telemetry + * system. The interface is heavily inspired by OpenTelemetry but it's not + * limited to it. + * + */ export interface Telemetry { + /** + * Tracer instance + * + * The tracer is responsible for creating spans and propagating the context + * across the application. + */ tracer: Tracer; + + /** + * Context manager instance + * + * The context manager is responsible for managing the context and propagating + * it across the application. + */ contextManager: ContextManager; } +/** + * Context manager interface + * + * The context manager is responsible for managing the context and propagating + * it across the application. + */ export interface ContextManager { /** * Creates a new context and sets it as active for the fn passed as last argument @@ -16,12 +43,45 @@ export interface ContextManager { context: Context, fn: A, ): ReturnType; + + /** + * Returns the active context + */ active(): Context; + + /** + * Returns a serialized version of the current context. The metadata + * is the mechanism used to propagate the context across a distributed + * application. + * + * @param context + */ getMetadata(context: Context): string; + + /** + * Creates a new context from a serialized version effectively + * linking the new context to the parent context. + * + * @param activeContext + * @param metadata + */ fromMetadata(activeContext: Context, metadata: string): Context; } +/** + * Tracer interface + * + */ export interface Tracer { + /** + * startSpan creates a new Span with the given name and options on an optional + * context. If the context is not provided, the current active context should be + * used. + * + * @param name + * @param options + * @param context + */ startSpan(name: string, options?: SpanOptions, context?: Context): Span; } @@ -29,12 +89,53 @@ export interface SpanOptions { kind: SpanKind; } +/** + * Span interface + */ export interface Span { + /** + * setSpanOnContext sets the span on the context. This is useful when you want + * to propagate the span across the application. + * + * @param ctx + */ setSpanOnContext(ctx: Context): Context; + + /** + * setAttribute sets an attribute on the span. + * + * @param ctx + */ setAttribute(key: string, value: AttributeValue): void; + + /** + * setAttributes sets multiple attributes on the span. + * + * @param attributes + */ setAttributes(attributes: Attributes): void; + + /** + * addEvent adds an event to the span. + * + * @param name + * @param attributes + */ addEvent(name: string, attributes?: Attributes): void; + + /** + * recordException records an exception on the span. + * + * @param exception + * @param time + */ recordException(exception: Exception, time?: Time): void; + + /** + * end ends the span. + * + * Note: spans must be ended so that they can be exported. + */ end(): void; } From 9b0b3fa5cf5a5a6d1e3301ebc1a805e72074c55b Mon Sep 17 00:00:00 2001 From: Manuel Astudillo Date: Thu, 31 Oct 2024 15:46:04 +0100 Subject: [PATCH 26/26] chore: small improvements to naming of telemetry spans --- src/classes/job.ts | 18 +- src/classes/queue-base.ts | 9 +- src/classes/queue.ts | 121 +++++---- src/classes/worker.ts | 303 ++++++++++++----------- src/commands/moveStalledJobsToWait-9.lua | 2 +- src/enums/telemetry-attributes.ts | 6 +- tests/test_telemetry_interface.ts | 7 +- 7 files changed, 249 insertions(+), 217 deletions(-) diff --git a/src/classes/job.ts b/src/classes/job.ts index 9c5532cdc2..313bc283d0 100644 --- a/src/classes/job.ts +++ b/src/classes/job.ts @@ -743,10 +743,11 @@ export class Job< return this.queue.trace>( SpanKind.INTERNAL, - () => this.getSpanName(command), + this.getSpanOperation(command), + this.queue.name, async (span, dstPropagationMedatadata) => { if (dstPropagationMedatadata) { - (multi).updateJobOption([ + this.scripts.execCommand(multi, 'updateJobOption', [ this.toKey(this.id), 'tm', dstPropagationMedatadata, @@ -788,20 +789,15 @@ export class Job< ); } - private getSpanName(command: string) { - let operation; + private getSpanOperation(command: string) { switch (command) { case 'moveToDelayed': - operation = 'delay'; - break; + return 'delay'; case 'retryJob': - operation = 'retry'; - break; + return 'retry'; case 'moveToFinished': - operation = 'fail'; - break; + return 'fail'; } - return `${operation} ${this.queue.name}`; } /** diff --git a/src/classes/queue-base.ts b/src/classes/queue-base.ts index 609f719adf..6b5ef9d385 100644 --- a/src/classes/queue-base.ts +++ b/src/classes/queue-base.ts @@ -192,14 +192,16 @@ export class QueueBase extends EventEmitter implements MinimalQueue { * Wraps the code with telemetry and provides a span for configuration. * * @param spanKind - kind of the span: Producer, Consumer, Internal - * @param getSpanName - name of the span + * @param operation - operation name (such as add, process, etc) + * @param destination - destination name (normally the queue name) * @param callback - code to wrap with telemetry * @param srcPropagationMedatada - * @returns */ async trace( spanKind: SpanKind, - getSpanName: () => string, + operation: string, + destination: string, callback: (span?: Span, dstPropagationMetadata?: string) => Promise | T, srcPropagationMetadata?: string, ) { @@ -217,7 +219,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { ); } - const spanName = getSpanName(); + const spanName = `${operation} ${destination}`; const span = this.tracer.startSpan( spanName, { @@ -229,6 +231,7 @@ export class QueueBase extends EventEmitter implements MinimalQueue { try { span.setAttributes({ [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.QueueOperation]: operation, }); let messageContext; diff --git a/src/classes/queue.ts b/src/classes/queue.ts index ef38893679..ddbadb9cd5 100644 --- a/src/classes/queue.ts +++ b/src/classes/queue.ts @@ -253,7 +253,8 @@ export class Queue< ): Promise> { return this.trace>( SpanKind.PRODUCER, - () => `add ${this.name}.${name}`, + 'add', + `${this.name}.${name}`, async (span, srcPropagationMedatada) => { if (srcPropagationMedatada) { opts = { ...opts, telemetryMetadata: srcPropagationMedatada }; @@ -314,12 +315,15 @@ export class Queue< ): Promise[]> { return this.trace[]>( SpanKind.PRODUCER, - () => `addBulk ${this.name}`, + 'addBulk', + this.name, async (span, srcPropagationMedatada) => { - span?.setAttributes({ - [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), - [TelemetryAttributes.BulkCount]: jobs.length, - }); + if (span) { + span.setAttributes({ + [TelemetryAttributes.BulkNames]: jobs.map(job => job.name), + [TelemetryAttributes.BulkCount]: jobs.length, + }); + } return await this.Job.createBulk( this as MinimalQueue, @@ -393,15 +397,11 @@ export class Queue< * and in that case it will add it there instead of the wait list. */ async pause(): Promise { - await this.trace( - SpanKind.INTERNAL, - () => `pause ${this.name}`, - async () => { - await this.scripts.pause(true); + await this.trace(SpanKind.INTERNAL, 'pause', this.name, async () => { + await this.scripts.pause(true); - this.emit('paused'); - }, - ); + this.emit('paused'); + }); } /** @@ -409,19 +409,15 @@ export class Queue< * */ async close(): Promise { - await this.trace( - SpanKind.INTERNAL, - () => `close ${this.name}`, - async () => { - if (!this.closing) { - if (this._repeat) { - await this._repeat.close(); - } + await this.trace(SpanKind.INTERNAL, 'close', this.name, async () => { + if (!this.closing) { + if (this._repeat) { + await this._repeat.close(); } + } - await super.close(); - }, - ); + await super.close(); + }); } /** * Resumes the processing of this queue globally. @@ -430,15 +426,11 @@ export class Queue< * queue. */ async resume(): Promise { - await this.trace( - SpanKind.INTERNAL, - () => `resume ${this.name}`, - async () => { - await this.scripts.pause(false); + await this.trace(SpanKind.INTERNAL, 'resume', this.name, async () => { + await this.scripts.pause(false); - this.emit('resumed'); - }, - ); + this.emit('resumed'); + }); } /** @@ -514,8 +506,14 @@ export class Queue< ): Promise { return this.trace( SpanKind.INTERNAL, - () => `.removeRepeatable ${this.name}.${name}`, - async () => { + 'removeRepeatable', + `${this.name}.${name}`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.JobName]: name, + [TelemetryAttributes.JobId]: jobId, + }); + const repeat = await this.repeat; const removed = await repeat.removeRepeatable(name, repeatOpts, jobId); @@ -548,8 +546,13 @@ export class Queue< async removeDebounceKey(id: string): Promise { return this.trace( SpanKind.INTERNAL, - () => `removeDebounceKey ${this.name}.${id}`, - async () => { + 'removeDebounceKey', + `${this.name}`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.JobKey]: id, + }); + const client = await this.client; return await client.del(`${this.keys.de}:${id}`); @@ -563,9 +566,19 @@ export class Queue< * @param id - identifier */ async removeDeduplicationKey(id: string): Promise { - const client = await this.client; + return this.trace( + SpanKind.INTERNAL, + 'removeDeduplicationKey', + `${this.name}`, + async span => { + span?.setAttributes({ + [TelemetryAttributes.DeduplicationKey]: id, + }); - return client.del(`${this.keys.de}:${id}`); + const client = await this.client; + return client.del(`${this.keys.de}:${id}`); + }, + ); } /** @@ -583,7 +596,8 @@ export class Queue< async removeRepeatableByKey(key: string): Promise { return this.trace( SpanKind.INTERNAL, - () => `removeRepeatableByKey ${this.name}.${key} `, + 'removeRepeatableByKey', + `${this.name}`, async span => { span?.setAttributes({ [TelemetryAttributes.JobKey]: key, @@ -609,7 +623,8 @@ export class Queue< async remove(jobId: string, { removeChildren = true } = {}): Promise { return this.trace( SpanKind.INTERNAL, - () => `remove ${this.name}.${jobId}`, + 'remove', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.JobId]: jobId, @@ -635,7 +650,8 @@ export class Queue< ): Promise { await this.trace( SpanKind.INTERNAL, - () => `updateJobProgress ${this.name}`, + 'updateJobProgress', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.JobId]: jobId, @@ -674,7 +690,8 @@ export class Queue< async drain(delayed = false): Promise { await this.trace( SpanKind.INTERNAL, - () => `drain ${this.name}`, + 'drain', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.QueueDrainDelay]: delayed, @@ -709,7 +726,8 @@ export class Queue< ): Promise { return this.trace( SpanKind.INTERNAL, - () => `clean ${this.name}`, + 'clean', + this.name, async span => { const maxCount = limit || Infinity; const maxCountPerCall = Math.min(10000, maxCount); @@ -737,8 +755,7 @@ export class Queue< [TelemetryAttributes.QueueGrace]: grace, [TelemetryAttributes.JobType]: type, [TelemetryAttributes.QueueCleanLimit]: maxCount, - [TelemetryAttributes.JobTimestamp]: timestamp, - [TelemetryAttributes.JobId]: deletedJobsIds, + [TelemetryAttributes.JobIds]: deletedJobsIds, }); return deletedJobsIds; @@ -760,7 +777,8 @@ export class Queue< async obliterate(opts?: ObliterateOpts): Promise { await this.trace( SpanKind.INTERNAL, - () => `obliterate ${this.name}`, + 'obliterate', + this.name, async () => { await this.pause(); @@ -791,7 +809,8 @@ export class Queue< ): Promise { await this.trace( SpanKind.PRODUCER, - () => `retryJobs ${this.name}`, + 'retryJobs', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), @@ -820,7 +839,8 @@ export class Queue< async promoteJobs(opts: { count?: number } = {}): Promise { await this.trace( SpanKind.INTERNAL, - () => `promoteJobs ${this.name}`, + 'promoteJobs', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.QueueOptions]: JSON.stringify(opts), @@ -842,7 +862,8 @@ export class Queue< async trimEvents(maxLength: number): Promise { return this.trace( SpanKind.INTERNAL, - () => `trimEvents ${this.name}`, + 'trimEvents', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.QueueEventMaxLength]: maxLength, diff --git a/src/classes/worker.ts b/src/classes/worker.ts index b8d49684ba..f354c94253 100644 --- a/src/classes/worker.ts +++ b/src/classes/worker.ts @@ -13,6 +13,7 @@ import { JobJsonRaw, Processor, RedisClient, + Span, WorkerOptions, } from '../interfaces'; import { MinimalQueue } from '../types'; @@ -423,126 +424,116 @@ export class Worker< } async run() { - await this.trace( - SpanKind.INTERNAL, - () => this.getSpanName('run'), - async span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); + if (!this.processFn) { + throw new Error('No process function is defined.'); + } - if (!this.processFn) { - throw new Error('No process function is defined.'); - } + if (this.running) { + throw new Error('Worker is already running.'); + } - if (this.running) { - throw new Error('Worker is already running.'); - } + try { + this.running = true; - try { - this.running = true; + if (this.closing) { + return; + } - if (this.closing) { - return; - } + await this.startStalledCheckTimer(); - await this.startStalledCheckTimer(); + const jobsInProgress = new Set<{ job: Job; ts: number }>(); + this.startLockExtenderTimer(jobsInProgress); - const jobsInProgress = new Set<{ job: Job; ts: number }>(); - this.startLockExtenderTimer(jobsInProgress); + const asyncFifoQueue = (this.asyncFifoQueue = + new AsyncFifoQueue>()); - const asyncFifoQueue = (this.asyncFifoQueue = - new AsyncFifoQueue>()); - - let tokenPostfix = 0; - - const client = await this.client; - const bclient = await this.blockingConnection.client; - - /** - * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue - * as efficiently as possible, providing concurrency and minimal unnecessary calls - * to Redis. - */ - while (!this.closing) { - let numTotal = asyncFifoQueue.numTotal(); - - /** - * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job - * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) - */ - while ( - !this.waiting && - numTotal < this.opts.concurrency && - (!this.limitUntil || numTotal == 0) - ) { - const token = `${this.id}:${tokenPostfix++}`; - - const fetchedJob = this.retryIfFailed>( - () => this._getNextJob(client, bclient, token, { block: true }), - this.opts.runRetryDelay, - ); - asyncFifoQueue.add(fetchedJob); + let tokenPostfix = 0; - numTotal = asyncFifoQueue.numTotal(); + const client = await this.client; + const bclient = await this.blockingConnection.client; - if (this.waiting && numTotal > 1) { - // We are waiting for jobs but we have others that we could start processing already - break; - } + /** + * This is the main loop in BullMQ. Its goals are to fetch jobs from the queue + * as efficiently as possible, providing concurrency and minimal unnecessary calls + * to Redis. + */ + while (!this.closing) { + let numTotal = asyncFifoQueue.numTotal(); - // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls - // to Redis in high concurrency scenarios. - const job = await fetchedJob; + /** + * This inner loop tries to fetch jobs concurrently, but if we are waiting for a job + * to arrive at the queue we should not try to fetch more jobs (as it would be pointless) + */ + while ( + !this.waiting && + numTotal < this.opts.concurrency && + (!this.limitUntil || numTotal == 0) + ) { + const token = `${this.id}:${tokenPostfix++}`; + + const fetchedJob = this.retryIfFailed>( + () => this._getNextJob(client, bclient, token, { block: true }), + this.opts.runRetryDelay, + ); + asyncFifoQueue.add(fetchedJob); - // No more jobs waiting but we have others that could start processing already - if (!job && numTotal > 1) { - break; - } + numTotal = asyncFifoQueue.numTotal(); - // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting - // for processing this job. - if (this.blockUntil) { - break; - } - } + if (this.waiting && numTotal > 1) { + // We are waiting for jobs but we have others that we could start processing already + break; + } - // Since there can be undefined jobs in the queue (when a job fails or queue is empty) - // we iterate until we find a job. - let job: Job | void; - do { - job = await asyncFifoQueue.fetch(); - } while (!job && asyncFifoQueue.numQueued() > 0); - - if (job) { - const token = job.token; - asyncFifoQueue.add( - this.retryIfFailed>( - () => - this.processJob( - >job, - token, - () => asyncFifoQueue.numTotal() <= this.opts.concurrency, - jobsInProgress, - ), - this.opts.runRetryDelay, - ), - ); - } + // We await here so that we fetch jobs in sequence, this is important to avoid unnecessary calls + // to Redis in high concurrency scenarios. + const job = await fetchedJob; + + // No more jobs waiting but we have others that could start processing already + if (!job && numTotal > 1) { + break; } - this.running = false; - return await asyncFifoQueue.waitAll(); - } catch (error) { - this.running = false; - throw error; + // If there are potential jobs to be processed and blockUntil is set, we should exit to avoid waiting + // for processing this job. + if (this.blockUntil) { + break; + } } - }, - ); + + // Since there can be undefined jobs in the queue (when a job fails or queue is empty) + // we iterate until we find a job. + let job: Job | void; + do { + job = await asyncFifoQueue.fetch(); + } while (!job && asyncFifoQueue.numQueued() > 0); + + if (job) { + const token = job.token; + asyncFifoQueue.add( + this.retryIfFailed>( + () => + this.processJob( + >job, + token, + () => asyncFifoQueue.numTotal() <= this.opts.concurrency, + jobsInProgress, + ), + this.opts.runRetryDelay, + ), + ); + } + } + + this.running = false; + return await asyncFifoQueue.waitAll(); + } catch (error) { + this.running = false; + throw error; + } } /** @@ -551,26 +542,29 @@ export class Worker< * @returns a Job or undefined if no job was available in the queue. */ async getNextJob(token: string, { block = true }: GetNextJobOptions = {}) { + const nextJob = await this._getNextJob( + await this.client, + await this.blockingConnection.client, + token, + { block }, + ); + return this.trace>( SpanKind.INTERNAL, - () => this.getSpanName('getNextJob'), + 'getNextJob', + this.name, async span => { - const nextJob = await this._getNextJob( - await this.client, - await this.blockingConnection.client, - token, - { block }, - ); - span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.QueueName]: this.name, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.WorkerOptions]: JSON.stringify({ block }), [TelemetryAttributes.JobId]: nextJob?.id, }); return nextJob; }, + nextJob?.opts.telemetryMetadata, ); } @@ -633,7 +627,8 @@ export class Worker< async rateLimit(expireTimeMs: number): Promise { await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('rateLimit'), + 'rateLimit', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, @@ -825,11 +820,12 @@ will never work with more accuracy than 1ms. */ return this.trace>( SpanKind.CONSUMER, - () => this.getSpanName('process'), + 'process', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, - [TelemetryAttributes.WorkerToken]: token, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.JobId]: job.id, }); @@ -918,10 +914,6 @@ will never work with more accuracy than 1ms. */ ); } - private getSpanName(operation: string): string { - return `${operation} ${this.name}.${this.opts.name || this.id}`; - } - /** * * Pauses the processing of this queue only for this worker. @@ -929,10 +921,12 @@ will never work with more accuracy than 1ms. */ async pause(doNotWaitActive?: boolean): Promise { await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('pause'), + 'pause', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.WorkerDoNotWaitActive]: doNotWaitActive, }); @@ -957,18 +951,15 @@ will never work with more accuracy than 1ms. */ */ resume(): void { if (this.resumeWorker) { - this.trace( - SpanKind.INTERNAL, - () => this.getSpanName('resume'), - span => { - span?.setAttributes({ - [TelemetryAttributes.WorkerId]: this.id, - }); + this.trace(SpanKind.INTERNAL, 'resume', this.name, span => { + span?.setAttributes({ + [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, + }); - this.resumeWorker(); - this.emit('resumed'); - }, - ); + this.resumeWorker(); + this.emit('resumed'); + }); } } @@ -1010,10 +1001,12 @@ will never work with more accuracy than 1ms. */ await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('close'), + 'close', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.WorkerForceClose]: force, }); @@ -1072,10 +1065,12 @@ will never work with more accuracy than 1ms. */ if (!this.closing) { await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('startStalledCheckTimer'), + 'startStalledCheckTimer', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, }); this.stalledChecker().catch(err => { @@ -1186,10 +1181,12 @@ will never work with more accuracy than 1ms. */ protected async extendLocks(jobs: Job[]) { await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('extendLocks'), + 'extendLocks', + this.name, async span => { span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.WorkerJobsToExtendLocks]: jobs.map( job => job.id, ), @@ -1226,20 +1223,25 @@ will never work with more accuracy than 1ms. */ private async moveStalledJobsToWait() { await this.trace( SpanKind.INTERNAL, - () => this.getSpanName('moveStalledJobsToWait'), + 'moveStalledJobsToWait', + this.name, async span => { const chunkSize = 50; const [failed, stalled] = await this.scripts.moveStalledJobsToWait(); span?.setAttributes({ [TelemetryAttributes.WorkerId]: this.id, + [TelemetryAttributes.WorkerName]: this.opts.name, [TelemetryAttributes.WorkerStalledJobs]: stalled, [TelemetryAttributes.WorkerFailedJobs]: failed, }); - stalled.forEach((jobId: string) => - this.emit('stalled', jobId, 'active'), - ); + stalled.forEach((jobId: string) => { + span?.addEvent('job stalled', { + [TelemetryAttributes.JobId]: jobId, + }); + this.emit('stalled', jobId, 'active'); + }); const jobPromises: Promise>[] = []; for (let i = 0; i < failed.length; i++) { @@ -1261,15 +1263,20 @@ will never work with more accuracy than 1ms. */ ); } - private notifyFailedJobs(failedJobs: Job[]) { - failedJobs.forEach((job: Job) => - this.emit( - 'failed', - job, - new Error('job stalled more than allowable limit'), - 'active', - ), - ); + private notifyFailedJobs( + failedJobs: Job[], + span?: Span, + ) { + const failedReason = 'job stalled more than allowable limit'; + + failedJobs.forEach((job: Job) => { + span?.addEvent('job failed', { + [TelemetryAttributes.JobId]: job.id, + [TelemetryAttributes.JobName]: job.name, + [TelemetryAttributes.JobFailedReason]: failedReason, + }); + this.emit('failed', job, new Error(failedReason), 'active'); + }); } private moveLimitedBackToWait( diff --git a/src/commands/moveStalledJobsToWait-9.lua b/src/commands/moveStalledJobsToWait-9.lua index 0c35adb541..2e6161ebee 100644 --- a/src/commands/moveStalledJobsToWait-9.lua +++ b/src/commands/moveStalledJobsToWait-9.lua @@ -149,7 +149,7 @@ if (#stalling > 0) then table.insert(failed, jobId) else - local target, isPausedOrMaxed= + local target, isPausedOrMaxed = getTargetQueueList(metaKey, activeKey, waitKey, pausedKey) -- Move the job back to the wait queue, to immediately be picked up by a waiting worker. diff --git a/src/enums/telemetry-attributes.ts b/src/enums/telemetry-attributes.ts index c5dcb6732b..3806242e1b 100644 --- a/src/enums/telemetry-attributes.ts +++ b/src/enums/telemetry-attributes.ts @@ -1,21 +1,23 @@ export enum TelemetryAttributes { QueueName = 'bullmq.queue.name', + QueueOperation = 'bullmq.queue.operation', BulkCount = 'bullmq.job.bulk.count', BulkNames = 'bullmq.job.bulk.names', JobName = 'bullmq.job.name', JobId = 'bullmq.job.id', JobKey = 'bullmq.job.key', + JobIds = 'bullmq.job.ids', + DeduplicationKey = 'bullmq.job.deduplication.key', JobOptions = 'bullmq.job.options', JobProgress = 'bullmq.job.progress', QueueDrainDelay = 'bullmq.queue.drain.delay', QueueGrace = 'bullmq.queue.grace', QueueCleanLimit = 'bullmq.queue.clean.limit', JobType = 'bullmq.job.type', - JobTimestamp = 'bullmq.job.timestamp', QueueOptions = 'bullmq.queue.options', QueueEventMaxLength = 'bullmq.queue.event.max.length', WorkerOptions = 'bullmq.worker.options', - WorkerToken = 'bullmq.worker.token', + WorkerName = 'bullmq.worker.name', WorkerId = 'bullmq.worker.id', WorkerRateLimit = 'bullmq.worker.rate.limit', WorkerDoNotWaitActive = 'bullmq.worker.do.not.wait.active', diff --git a/tests/test_telemetry_interface.ts b/tests/test_telemetry_interface.ts index 718257fdbf..125db85216 100644 --- a/tests/test_telemetry_interface.ts +++ b/tests/test_telemetry_interface.ts @@ -241,6 +241,7 @@ describe('Telemetry', () => { const worker = new Worker(queueName, async () => 'some result', { connection, telemetry: telemetryClient, + name: 'testWorker', }); await worker.waitUntilReady(); @@ -255,10 +256,12 @@ describe('Telemetry', () => { const span = startSpanSpy.returnValues[0] as MockSpan; expect(span).to.be.an.instanceOf(MockSpan); - expect(span.name).to.equal(`process ${queueName}.${worker.id}`); + expect(span.name).to.equal(`process ${queueName}`); expect(span.options?.kind).to.equal(SpanKind.CONSUMER); expect(span.attributes[TelemetryAttributes.WorkerId]).to.equal(worker.id); - expect(span.attributes[TelemetryAttributes.WorkerToken]).to.equal(token); + expect(span.attributes[TelemetryAttributes.WorkerName]).to.equal( + 'testWorker', + ); expect(span.attributes[TelemetryAttributes.JobId]).to.equal(job.id); moveToCompletedStub.restore();