From 9993c6ce15dff46a9e60bb6d467dc3d3a9935b14 Mon Sep 17 00:00:00 2001 From: Trent Mick Date: Wed, 1 Dec 2021 16:11:17 -0800 Subject: [PATCH] fix: ensure correct run-context for instrumented aws-sdk client usage (#2472) This fixes aws-sdk (S3, SQS, SNS) instrumentation to ensure the automatically created AWS spans do not spill into user code. This also ensures that any captured APM error for a failed AWS command is a child of the AWS span. Refs: #2430 --- CHANGELOG.asciidoc | 6 ++ lib/instrumentation/modules/aws-sdk/s3.js | 5 -- lib/instrumentation/modules/aws-sdk/sns.js | 33 +++++----- lib/instrumentation/modules/aws-sdk/sqs.js | 31 +++++---- .../modules/aws-sdk/fixtures/sqs.js | 2 +- .../modules/aws-sdk/fixtures/use-s3.js | 22 +++++++ .../modules/aws-sdk/s3.test.js | 66 ++++++++++++++----- .../modules/aws-sdk/sns.test.js | 54 ++++++++++++++- .../modules/aws-sdk/sqs.test.js | 5 ++ 9 files changed, 172 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 04ed117a82..4201910dc5 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -39,6 +39,12 @@ Notes: [float] ===== Bug fixes +* Fixes for run context handling for 'aws-sdk' instrumentation (S3, SQS, SNS) + so that a span created after an AWS client command (in the same tick, in + the command callback, or promise) is not a child of the automatic AWS + span. This change also ensures captured errors from failing client commands + are a child of the AWS span. + * Fix 'http' and 'https' instrumentation for outgoing requests to not have the 'http' span context be active in user code. ({pull}2470[#2470]) diff --git a/lib/instrumentation/modules/aws-sdk/s3.js b/lib/instrumentation/modules/aws-sdk/s3.js index 3278e5d7ff..42b2480eae 100644 --- a/lib/instrumentation/modules/aws-sdk/s3.js +++ b/lib/instrumentation/modules/aws-sdk/s3.js @@ -54,11 +54,6 @@ function instrumentationS3 (orig, origArguments, request, AWS, agent, { version, const ins = agent._instrumentation - // `instrumentationS3` is called *synchronously* for an S3 client method call. - // We must use `ins.createSpan` rather than `ins.startSpan`, otherwise we - // would impact apm.currentSpan in the calling code. For example: - // s3Client.listBuckets({}, function (...) { ... }) - // // The "S3 ListBuckets" span should *not* be apm.currentSpan here. const span = ins.createSpan(name, TYPE, SUBTYPE, opName) if (!span) { return orig.apply(request, origArguments) diff --git a/lib/instrumentation/modules/aws-sdk/sns.js b/lib/instrumentation/modules/aws-sdk/sns.js index 29f8eb5f05..c2a33cc0b0 100644 --- a/lib/instrumentation/modules/aws-sdk/sns.js +++ b/lib/instrumentation/modules/aws-sdk/sns.js @@ -1,4 +1,4 @@ -const constants = require('../../../constants') +'use strict' const TYPE = 'messaging' const SUBTYPE = 'sns' @@ -105,16 +105,13 @@ function shouldIgnoreRequest (request, agent) { } function instrumentationSns (orig, origArguments, request, AWS, agent, { version, enabled }) { - const type = TYPE - const subtype = SUBTYPE - const action = ACTION - if (shouldIgnoreRequest(request, agent)) { return orig.apply(request, origArguments) } + const ins = agent._instrumentation const name = getSpanNameFromRequest(request) - const span = agent.startSpan(name, type, subtype, action) + const span = ins.createSpan(name, TYPE, SUBTYPE, ACTION) if (!span) { return orig.apply(request, origArguments) } @@ -122,19 +119,23 @@ function instrumentationSns (orig, origArguments, request, AWS, agent, { version span.setDestinationContext(getMessageDestinationContextFromRequest(request)) span.setMessageContext(getMessageContextFromRequest(request)) - request.on('complete', function (response) { + const onComplete = function (response) { if (response && response.error) { - const errOpts = { - skipOutcome: true - } - agent.captureError(response.error, errOpts) - span._setOutcomeFromErrorCapture(constants.OUTCOME_FAILURE) + agent.captureError(response.error) } - span.end() - }) - - return orig.apply(request, origArguments) + } + // Bind onComplete to the span's run context so that `captureError` picks + // up the correct currentSpan. + const parentRunContext = ins.currRunContext() + const spanRunContext = parentRunContext.enterSpan(span) + request.on('complete', ins.bindFunctionToRunContext(spanRunContext, onComplete)) + + const cb = origArguments[origArguments.length - 1] + if (typeof cb === 'function') { + origArguments[origArguments.length - 1] = ins.bindFunctionToRunContext(parentRunContext, cb) + } + return ins.withRunContext(spanRunContext, orig, request, ...origArguments) } module.exports = { diff --git a/lib/instrumentation/modules/aws-sdk/sqs.js b/lib/instrumentation/modules/aws-sdk/sqs.js index 06dd838a3a..43ee5cee6c 100644 --- a/lib/instrumentation/modules/aws-sdk/sqs.js +++ b/lib/instrumentation/modules/aws-sdk/sqs.js @@ -1,6 +1,7 @@ 'use strict' + const { URL } = require('url') -const constants = require('../../../constants') + const OPERATIONS_TO_ACTIONS = { deleteMessage: 'delete', deleteMessageBatch: 'delete_batch', @@ -145,11 +146,10 @@ function instrumentationSqs (orig, origArguments, request, AWS, agent, { version return orig.apply(request, origArguments) } - const type = TYPE - const subtype = SUBTYPE + const ins = agent._instrumentation const action = getActionFromRequest(request) const name = getSpanNameFromRequest(request) - const span = agent.startSpan(name, type, subtype, action) + const span = ins.createSpan(name, TYPE, SUBTYPE, action) if (!span) { return orig.apply(request, origArguments) } @@ -157,13 +157,9 @@ function instrumentationSqs (orig, origArguments, request, AWS, agent, { version span.setDestinationContext(getMessageDestinationContextFromRequest(request)) span.setMessageContext(getMessageContextFromRequest(request)) - request.on('complete', function (response) { + const onComplete = function (response) { if (response && response.error) { - const errOpts = { - skipOutcome: true - } - agent.captureError(response.error, errOpts) - span._setOutcomeFromErrorCapture(constants.OUTCOME_FAILURE) + agent.captureError(response.error) } span.end() @@ -171,9 +167,18 @@ function instrumentationSqs (orig, origArguments, request, AWS, agent, { version if (request.operation === 'receiveMessage' && response && response.data) { recordMetrics(getQueueNameFromRequest(request), response.data, agent) } - }) - - return orig.apply(request, origArguments) + } + // Bind onComplete to the span's run context so that `captureError` picks + // up the correct currentSpan. + const parentRunContext = ins.currRunContext() + const spanRunContext = parentRunContext.enterSpan(span) + request.on('complete', ins.bindFunctionToRunContext(spanRunContext, onComplete)) + + const cb = origArguments[origArguments.length - 1] + if (typeof cb === 'function') { + origArguments[origArguments.length - 1] = ins.bindFunctionToRunContext(parentRunContext, cb) + } + return ins.withRunContext(spanRunContext, orig, request, ...origArguments) } module.exports = { diff --git a/test/instrumentation/modules/aws-sdk/fixtures/sqs.js b/test/instrumentation/modules/aws-sdk/fixtures/sqs.js index 5883c1b047..3d38752668 100644 --- a/test/instrumentation/modules/aws-sdk/fixtures/sqs.js +++ b/test/instrumentation/modules/aws-sdk/fixtures/sqs.js @@ -55,7 +55,7 @@ module.exports = { MessageBody: 'Information about current NY Times fiction bestseller for week of 12/11/2016.' }] }, - respnse: ` + response: ` diff --git a/test/instrumentation/modules/aws-sdk/fixtures/use-s3.js b/test/instrumentation/modules/aws-sdk/fixtures/use-s3.js index 15b774cfb1..1c93307da5 100644 --- a/test/instrumentation/modules/aws-sdk/fixtures/use-s3.js +++ b/test/instrumentation/modules/aws-sdk/fixtures/use-s3.js @@ -175,19 +175,41 @@ function useS3 (s3Client, bucketName, cb) { Bucket: bucketName, Key: key }).promise() + assert(apm.currentSpan === null, + 'S3 span should NOT be the currentSpan after .promise()') req.then( function onResolve (data) { log.info({ data }, 'getObject using Promise, resolve') + assert(apm.currentSpan === null, + 'S3 span should NOT be the currentSpan in promise resolve') next() }, function onReject (err) { log.info({ err }, 'getObject using Promise, reject') + assert(apm.currentSpan === null, + 'S3 span should NOT be the currentSpan in promise reject') next(err) } ) }, + // Get a non-existant object to test APM captureError. + function getObjNonExistantObject (_, next) { + const nonExistantKey = key + '-does-not-exist' + s3Client.getObject({ + Bucket: bucketName, + Key: nonExistantKey + }, function (err, data) { + log.info({ err, data }, 'getObject non-existant key, expect error') + if (err) { + next() + } else { + next(new Error(`did not get an error from getObject(${nonExistantKey})`)) + } + }) + }, + // https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#deleteObject-property function deleteTheObj (_, next) { s3Client.deleteObject({ diff --git a/test/instrumentation/modules/aws-sdk/s3.test.js b/test/instrumentation/modules/aws-sdk/s3.test.js index 1266ae5e16..b90d41b6ef 100644 --- a/test/instrumentation/modules/aws-sdk/s3.test.js +++ b/test/instrumentation/modules/aws-sdk/s3.test.js @@ -59,25 +59,25 @@ tape.test('simple S3 usage scenario', function (t) { // First the transaction. t.ok(events[0].transaction, 'got the transaction') const tx = events.shift().transaction - t.equal(events.filter(e => e.span).length, events.length, - 'all remaining events are spans') + const errors = events.filter(e => e.error).map(e => e.error) // Currently HTTP spans under each S3 span are included. Eventually // those will be excluded. Filter those out for now. // https://github.com/elastic/apm-agent-nodejs/issues/2125 - const spans = events.map(e => e.span).filter(e => e.subtype !== 'http') + const spans = events.filter(e => e.span) + .map(e => e.span) + .filter(e => e.subtype !== 'http') // Compare some common fields across all spans. t.equal(spans.filter(s => s.trace_id === tx.trace_id).length, spans.length, 'all spans have the same trace_id') t.equal(spans.filter(s => s.transaction_id === tx.id).length, spans.length, 'all spans have the same transaction_id') - t.equal(spans.filter(s => s.outcome === 'success').length, - spans.length, 'all spans have outcome="success"') t.equal(spans.filter(s => s.sync === false).length, spans.length, 'all spans have sync=false') t.equal(spans.filter(s => s.sample_rate === 1).length, spans.length, 'all spans have sample_rate=1') + const failingSpanId = spans[8].id // index of `getObjNonExistantObject` spans.forEach(s => { // Remove variable and common fields to facilitate t.deepEqual below. delete s.id @@ -86,7 +86,6 @@ tape.test('simple S3 usage scenario', function (t) { delete s.trace_id delete s.timestamp delete s.duration - delete s.outcome delete s.sync delete s.sample_rate }) @@ -105,7 +104,8 @@ tape.test('simple S3 usage scenario', function (t) { service: { name: 's3', type: 'storage' }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'listAllBuckets produced expected span') t.deepEqual(spans.shift(), { @@ -124,7 +124,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'createTheBucketIfNecessary produced expected span') t.deepEqual(spans.shift(), { @@ -143,7 +144,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'waitForBucketToExist produced expected span') t.deepEqual(spans.shift(), { @@ -162,7 +164,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'createObj produced expected span') t.deepEqual(spans.shift(), { @@ -181,7 +184,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'waitForObjectToExist produced expected span') t.deepEqual(spans.shift(), { @@ -200,7 +204,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'getObj produced expected span') t.deepEqual(spans.shift(), { @@ -219,7 +224,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'getObjConditionalGet produced expected span') t.deepEqual(spans.shift(), { @@ -238,9 +244,35 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'getObjUsingPromise produced expected span') + // This is the GetObject to a non-existant-key, so we expect a failure. + t.deepEqual(spans.shift(), { + name: 'S3 GetObject elasticapmtest-bucket-1', + type: 'storage', + subtype: 's3', + action: 'GetObject', + context: { + destination: { + address: LOCALSTACK_HOST, + port: 4566, + service: { + name: 's3', + type: 'storage', + resource: 'elasticapmtest-bucket-1' + }, + cloud: { region: 'us-east-2' } + } + }, + outcome: 'failure' + }, 'getObjNonExistantObject produced expected span') + t.equal(errors.length, 1, 'got 1 error') + t.equal(errors[0].parent_id, failingSpanId, 'error is a child of the failing span from getObjNonExistantObject') + t.equal(errors[0].transaction_id, tx.id, 'error.transaction_id') + t.equal(errors[0].exception.type, 'NoSuchKey', 'error.exception.type') + t.deepEqual(spans.shift(), { name: 'S3 DeleteObject elasticapmtest-bucket-1', type: 'storage', @@ -257,7 +289,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'deleteTheObj produced expected span') t.deepEqual(spans.shift(), { @@ -276,7 +309,8 @@ tape.test('simple S3 usage scenario', function (t) { }, cloud: { region: 'us-east-2' } } - } + }, + outcome: 'success' }, 'deleteTheBucketIfCreatedIt produced expected span') t.equal(spans.length, 0, 'all spans accounted for') diff --git a/test/instrumentation/modules/aws-sdk/sns.test.js b/test/instrumentation/modules/aws-sdk/sns.test.js index 42849c79fe..a182ef30ea 100644 --- a/test/instrumentation/modules/aws-sdk/sns.test.js +++ b/test/instrumentation/modules/aws-sdk/sns.test.js @@ -1,3 +1,5 @@ +'use strict' + const agent = require('../../../..').start({ serviceName: 'test', secretToken: 'test', @@ -226,7 +228,7 @@ tape.test('AWS SNS: Unit Test Functions', function (test) { }) tape.test('AWS SNS: End to End Test', function (test) { - test.test('API: publish', function (t) { + test.test('API: publish (using promise)', function (t) { const params = { Message: 'this is my test, there are many like it but this one is mine', /* required */ TopicArn: 'arn:aws:sns:us-west-2:111111111111:topic-name' @@ -260,16 +262,62 @@ tape.test('AWS SNS: End to End Test', function (test) { agent.startTransaction('myTransaction') const publishTextPromise = new AWS.SNS({ apiVersion: '2010-03-31' }) .publish(params).promise() + t.ok(agent.currentSpan === null, 'no currentSpan in sync code after sns.publish(...).promise()') // Handle promise's fulfilled/rejected states publishTextPromise.then(function (data) { + t.ok(agent.currentSpan === null, 'no currentSpan in SNS promise resolve') agent.endTransaction() listener.close() }).catch(function (err) { + t.ok(agent.currentSpan === null, 'no currentSpan in SNS promise catch') + t.error(err) + agent.endTransaction() + listener.close() + }) + }) + }) + + test.test('API: publish (using callback)', function (t) { + const params = { + Message: 'this is my test, there are many like it but this one is mine', /* required */ + TopicArn: 'arn:aws:sns:us-west-2:111111111111:topic-name' + } + + const app = createMockServer( + fixtures.publish + ) + const listener = app.listen(0, function () { + const port = listener.address().port + resetAgent(function (data) { + const span = data.spans.filter((span) => span.type === 'messaging').pop() + t.equals(span.name, 'SNS PUBLISH to topic-name', 'span named correctly') + t.equals(span.type, 'messaging', 'span type correctly set') + t.equals(span.subtype, 'sns', 'span subtype set correctly') + t.equals(span.action, 'publish', 'span action set correctly') + t.equals(span.sync, false, 'span.sync is false') + t.equals(span.context.message.queue.name, 'topic-name') + t.equals(span.context.destination.service.resource, 'sns/topic-name') + t.equals(span.context.destination.service.type, 'messaging') + t.equals(span.context.destination.service.name, 'sns') + t.equals(span.context.destination.address, 'localhost') + t.equals(span.context.destination.port, port) + t.equals(span.context.destination.cloud.region, 'us-west-2') + t.end() + }) + + AWS.config.update({ + endpoint: `http://localhost:${port}` + }) + agent.startTransaction('myTransaction') + const sns = new AWS.SNS({ apiVersion: '2010-03-31' }) + sns.publish(params, function (err, _data) { t.error(err) + t.ok(agent.currentSpan === null, 'no currentSpan in sns.promise callback') agent.endTransaction() listener.close() }) + t.ok(agent.currentSpan === null, 'no currentSpan in sync code after sns.publish(...)') }) }) @@ -319,6 +367,10 @@ tape.test('AWS SNS: End to End Test', function (test) { resetAgent(function (data) { const span = data.spans.filter((span) => span.type === 'messaging').pop() t.equals(span.outcome, 'failure', 'error produces outcome=failure span') + t.equal(data.errors.length, 1, 'got 1 error') + const error = data.errors[0] + t.equal(error.parent_id, span.id, 'error is a child of the failing span') + t.equal(error.exception.type, 'NotFound', 'error.exception.type') t.end() }) const port = listener.address().port diff --git a/test/instrumentation/modules/aws-sdk/sqs.test.js b/test/instrumentation/modules/aws-sdk/sqs.test.js index 36b9cd6284..144943a085 100644 --- a/test/instrumentation/modules/aws-sdk/sqs.test.js +++ b/test/instrumentation/modules/aws-sdk/sqs.test.js @@ -212,9 +212,11 @@ tape.test('AWS SQS: End to End Tests', function (test) { const params = getParams('sendMessage', listener.address().port) sqs.sendMessage(params, function (err, data) { t.error(err) + t.ok(agent.currentSpan === null, 'no currentSpan in sqs.sendMessage callback') agent.endTransaction() listener.close() }) + t.ok(agent.currentSpan === null, 'no currentSpan in sync code after sqs.sendMessage') }) }) test.test('API: sendMessageBatch', function (t) { @@ -414,12 +416,15 @@ tape.test('AWS SQS: End to End Tests', function (test) { const sqs = new AWS.SQS({ apiVersion: '2012-11-05' }) const params = getParams('sendMessage', listener.address().port) const request = sqs.sendMessage(params).promise() + t.ok(agent.currentSpan === null, 'no currentSpan in sync code after sqs.sendMessage(...).promise()') request.then( function (data) { + t.ok(agent.currentSpan === null, 'no currentSpan in SQS promise resolve') awsPromiseFinally(agent, listener) }, function (err) { + t.ok(agent.currentSpan === null, 'no currentSpan in SQS promise reject') t.fail(err) awsPromiseFinally(agent, listener) }