Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

feat: truncate payloads according to intake API limits #8

Merged
merged 6 commits into from
Aug 9, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,17 @@ Streaming configuration:

Data sanitizing configuration:

- `truncateStringsAt` - Maximum size in bytes for strings stored as
- `truncateKeywordsAt` - Maximum size in bytes for strings stored as
Elasticsearch keywords. Strings larger than this will be trucated
(default: `1024` bytes)
- `truncateErrorMessagesAt` - The maximum size in bytes for error
messages. Messages above this length will be truncated. Set to `-1` to
disable truncation. This applies to the following properties:
`error.exception.message` and `error.log.message` (default: `2048`
bytes)
- `truncateSourceLinesAt` - The maximum size in bytes for souce code
lines in stack traces. Lines above this length will be truncated
(default: `1000` bytes)

### Event: `close`

Expand Down
23 changes: 17 additions & 6 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const eos = require('end-of-stream')
const safeStringify = require('fast-safe-stringify')
const streamToBuffer = require('fast-stream-to-buffer')
const StreamChopper = require('stream-chopper')
const truncate = require('unicode-byte-truncate')
const truncate = require('./lib/truncate')
const pkg = require('./package')

module.exports = Client
Expand Down Expand Up @@ -44,7 +44,7 @@ util.inherits(Client, Writable)
function Client (opts) {
if (!(this instanceof Client)) return new Client(opts)

opts = normalizeOptions(opts)
this._opts = opts = normalizeOptions(opts)

Writable.call(this, opts)

Expand Down Expand Up @@ -102,6 +102,13 @@ Client.prototype._write = function (obj, enc, cb) {
this._chopper.chop(cb)
}
} else {
if ('transaction' in obj) {
truncate.transaction(obj, this._opts)
} else if ('span' in obj) {
truncate.span(obj, this._opts)
} else if ('error' in obj) {
truncate.error(obj, this._opts)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these not point to the sub-object, not the base object? ie: obj.transaction?

Also, is there a reason to else-if these? Are we unable to send different types at the same time?

Copy link
Contributor Author

@watson watson Aug 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh you're right, it should be truncate.error(obj.error, this._opts) etc. I'll make an update.

The client is an object stream, so each write will be a single object containing either a transaction, a span, or an error. I could use a switch(Object.keys(obj)[0]), but I felt that was less performant than 3 if-statements.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see a case for sending a span and error at the same time. You wrap an async call in a span from call to callback, and an error occurs, so you report the error and span when the callback is reached. 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Version 2 of the APM Server intake API currently doesn't support that an object contains more than a single root property - either transaction, span or error.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that's fine then. Might be worth considering for @elastic/apm-server to support in the future though.

this._received++
this._stream.write(obj, cb)
}
Expand Down Expand Up @@ -228,7 +235,9 @@ function onStream (opts, client, onerror) {
})

// All requests to the APM Server must start with a metadata object
stream.write(safeStringify({metadata: metadata(opts)}) + '\n')
const metadata = getMetadata(opts)
truncate.metadata(metadata, opts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could do this truncation in the write method too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stream.write isn't the same write method. Here I write directly to the StreamChopper stream.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason not to just do this through the regular write? Maybe we don't want users sending their own metadata objects?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My primary reason for writing directly to the internal stream was to skip having to go through an extra unnecessary step in the pipeline (performance). But it's definitely the idea that we want to handle writing the metadata on behalf of the user. The api is such that the user doesn't have to concern them selfs with when an HTTP request ends and another begins. So they shouldn't have to know when metadata is written either.

But I'm actually just about to submit a new PR that changes the internal pipeline structure, so if we hold off merging this for a short while it might all change anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another and actually more important reason why I write directly to the internal request stream is that we want to make sure that the metadata is the first object in the new HTTP request. I'm not sure how I can easily guarantee this unless I write it this way 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR I referred to two comments ago, didn't make such a huge change after all, but as I said in my last comment, I don't think there's any reasonable way around doing it this way. But I might not fully understand the reason for wanting to write the metadata to the client stream instead of the http request stream?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was just thinking it might simplify things to have all the truncation logic in one place, but if sending the metadata at a request start is a concern, then it makes sense to keep it this way. 👍

stream.write(safeStringify({metadata}) + '\n')
}
}

Expand Down Expand Up @@ -262,7 +271,9 @@ function normalizeOptions (opts) {
if (!normalized.serverTimeout && normalized.serverTimeout !== 0) normalized.serverTimeout = 15000
if (!normalized.serverUrl) normalized.serverUrl = 'http://localhost:8200'
if (!normalized.hostname) normalized.hostname = hostname
if (!normalized.truncateStringsAt) normalized.truncateStringsAt = 1024
if (!normalized.truncateKeywordsAt) normalized.truncateKeywordsAt = 1024
if (!normalized.truncateErrorMessagesAt) normalized.truncateErrorMessagesAt = 2048
if (!normalized.truncateSourceLinesAt) normalized.truncateSourceLinesAt = 1000
normalized.keepAlive = normalized.keepAlive !== false

// process
Expand Down Expand Up @@ -294,7 +305,7 @@ function getHeaders (opts) {
return Object.assign(headers, opts.headers)
}

function metadata (opts) {
function getMetadata (opts) {
var payload = {
service: {
name: opts.serviceName,
Expand All @@ -313,7 +324,7 @@ function metadata (opts) {
process: {
pid: process.pid,
ppid: process.ppid,
title: truncate(String(process.title), opts.truncateStringsAt),
title: process.title,
argv: process.argv
},
system: {
Expand Down
130 changes: 130 additions & 0 deletions lib/truncate.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
'use strict'

var truncate = require('unicode-byte-truncate')

exports.metadata = truncMetadata
exports.transaction = truncTransaction
exports.span = truncSpan
exports.error = truncError

function truncMetadata (metadata, opts) {
metadata.process.title = truncate(String(metadata.process.title), opts.truncateKeywordsAt)
}

function truncTransaction (trans, opts) {
trans.name = truncate(String(trans.name), opts.truncateKeywordsAt)
trans.type = truncate(String(trans.type), opts.truncateKeywordsAt)
trans.result = truncate(String(trans.result), opts.truncateKeywordsAt)

// Unless sampled, context will be null
if (trans.sampled) truncContext(trans.context, opts.truncateKeywordsAt)
}

function truncSpan (span, opts) {
span.name = truncate(String(span.name), opts.truncateKeywordsAt)
span.type = truncate(String(span.type), opts.truncateKeywordsAt)
if (span.stacktrace) span.stacktrace = truncFrames(span.stacktrace, opts.truncateSourceLinesAt)
}

function truncError (error, opts) {
if (error.log) {
if (error.log.level) {
error.log.level = truncate(String(error.log.level), opts.truncateKeywordsAt)
}
if (error.log.logger_name) {
error.log.logger_name = truncate(String(error.log.logger_name), opts.truncateKeywordsAt)
}
if (error.log.message && opts.truncateErrorMessagesAt >= 0) {
error.log.message = truncate(String(error.log.message), opts.truncateErrorMessagesAt)
}
if (error.log.param_message) {
error.log.param_message = truncate(String(error.log.param_message), opts.truncateKeywordsAt)
}
if (error.log.stacktrace) {
error.log.stacktrace = truncFrames(error.log.stacktrace, opts.truncateSourceLinesAt)
}
}

if (error.exception) {
if (error.exception.message && opts.truncateErrorMessagesAt >= 0) {
error.exception.message = truncate(String(error.exception.message), opts.truncateErrorMessagesAt)
}
if (error.exception.type) {
error.exception.type = truncate(String(error.exception.type), opts.truncateKeywordsAt)
}
if (error.exception.code) {
error.exception.code = truncate(String(error.exception.code), opts.truncateKeywordsAt)
}
if (error.exception.module) {
error.exception.module = truncate(String(error.exception.module), opts.truncateKeywordsAt)
}
if (error.exception.stacktrace) {
error.exception.stacktrace = truncFrames(error.exception.stacktrace, opts.truncateSourceLinesAt)
}
}

truncContext(error.context, opts.truncateKeywordsAt)
}

function truncContext (context, max) {
if (!context) return

if (context.request) {
if (context.request.method) {
context.request.method = truncate(String(context.request.method), max)
}
if (context.request.url) {
if (context.request.url.protocol) {
context.request.url.protocol = truncate(String(context.request.url.protocol), max)
}
if (context.request.url.hostname) {
context.request.url.hostname = truncate(String(context.request.url.hostname), max)
}
if (context.request.url.port) {
context.request.url.port = truncate(String(context.request.url.port), max)
}
if (context.request.url.pathname) {
context.request.url.pathname = truncate(String(context.request.url.pathname), max)
}
if (context.request.url.search) {
context.request.url.search = truncate(String(context.request.url.search), max)
}
if (context.request.url.hash) {
context.request.url.hash = truncate(String(context.request.url.hash), max)
}
if (context.request.url.raw) {
context.request.url.raw = truncate(String(context.request.url.raw), max)
}
if (context.request.url.full) {
context.request.url.full = truncate(String(context.request.url.full), max)
}
}
}
if (context.user) {
if (context.user.id) {
context.user.id = truncate(String(context.user.id), max)
}
if (context.user.email) {
context.user.email = truncate(String(context.user.email), max)
}
if (context.user.username) {
context.user.username = truncate(String(context.user.username), max)
}
}
}

function truncFrames (frames, max) {
frames.forEach(function (frame, i) {
if (frame.pre_context) frame.pre_context = truncEach(frame.pre_context, max)
if (frame.context_line) frame.context_line = truncate(String(frame.context_line), max)
if (frame.post_context) frame.post_context = truncEach(frame.post_context, max)
})

return frames
}

function truncEach (arr, len) {
return arr.map(function (str) {
return truncate(String(str), len)
})
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
},
"scripts": {
"coverage": "nyc report --reporter=text-lcov > coverage.lcov && codecov",
"test": "standard && nyc node test/test.js"
"test": "standard && nyc tape test/*.js"
},
"engines": {
"node": "6 || 8 || 10"
Expand Down
2 changes: 1 addition & 1 deletion test/unref-client.js → test/lib/unref-client.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use strict'

const Client = require('../')
const Client = require('../../')

const client = new Client({
serverUrl: process.argv[2],
Expand Down
42 changes: 38 additions & 4 deletions test/utils.js → test/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ const zlib = require('zlib')
const semver = require('semver')
const pem = require('https-pem')
const ndjson = require('ndjson')
const pkg = require('../package')
const Client = require('../')
const pkg = require('../../package')
const Client = require('../../')

exports.APMServer = APMServer
exports.processReq = processReq
exports.assertReq = assertReq
exports.assertMetadata = assertMetadata
exports.assertEvent = assertEvent
exports.validOpts = validOpts

function APMServer (opts, onreq) {
Expand Down Expand Up @@ -79,11 +80,20 @@ function assertMetadata (t, obj) {
const _process = metadata.process
t.ok(_process.pid > 0)
t.ok(_process.ppid > 0)
t.ok(/(\/node|^node)$/.test(_process.title), `process.title should match /(\\/node|^node)$/ (was: ${_process.title})`)

if (_process.title.length === 1) {
// because of truncation test
t.equal(_process.title, process.title[0])
} else {
const regex = /(\/node|^node)$/
t.ok(regex.test(_process.title), `process.title should match ${regex} (was: ${_process.title})`)
}

t.ok(Array.isArray(_process.argv), 'process.title should be an array')
t.ok(_process.argv.length >= 2, 'process.title should contain at least two elements')
t.ok(/\/node$/.test(_process.argv[0]), `process.argv[0] should match /\\/node$/ (was: ${_process.argv[0]})`)
t.ok(/\/test\/(test|unref-client)\.js$/.test(_process.argv[1]), `process.argv[1] should match /\\/test\\/(test|unref-client)\\.js$/ (was: ${_process.argv[1]})"`)
const regex = /(\/test\/(test|truncate|lib\/unref-client)\.js|node_modules\/\.bin\/tape)$/
t.ok(regex.test(_process.argv[1]), `process.argv[1] should match ${regex} (was: ${_process.argv[1]})"`)
const system = metadata.system
t.ok(typeof system.hostname, 'string')
t.ok(system.hostname.length > 0)
Expand All @@ -94,6 +104,30 @@ function assertMetadata (t, obj) {
}
assertMetadata.asserts = 22

function assertEvent (expect) {
return function (t, obj) {
const key = Object.keys(expect)[0]
const val = expect[key]
switch (key) {
case 'transaction':
if (!('name' in val)) val.name = 'undefined'
if (!('type' in val)) val.type = 'undefined'
if (!('result' in val)) val.result = 'undefined'
break
case 'span':
if (!('name' in val)) val.name = 'undefined'
if (!('type' in val)) val.type = 'undefined'
break
case 'error':
break
default:
t.fail('unexpected event type: ' + key)
}
t.deepEqual(obj, expect)
}
}
assertEvent.asserts = 1

function validOpts (opts) {
return Object.assign({
agentName: 'my-agent-name',
Expand Down
Loading