Merge branch 'api-v2' into truncate

elastic · Aug 9, 2018 · efab51b · efab51b
2 parents ff48598 + c09fb94
commit efab51b
Show file tree

Hide file tree

Showing 13 changed files with 327 additions and 184 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -65,7 +65,7 @@ Once your changes are ready to submit for review:
 ### Testing
 
 For information about how to run the test suite,
-see [TESTING.md](https://github.com/elastic/apm-agent-nodejs/blob/master/TESTING.md).
+see [TESTING.md](TESTING.md).
 
 ### Workflow
 
@@ -79,20 +79,20 @@ should "Squash and merge".
 
 The following is an overview of what's required in order to add support to the agent for automatic instrumentation of an npm package.
 
-1. Add the instrumentation logic to a new file in the [`lib/instrumentation/modules`](https://github.com/elastic/apm-agent-nodejs/tree/master/lib/instrumentation/modules) directory named `<package-name>.js`,
+1. Add the instrumentation logic to a new file in the [`lib/instrumentation/modules`](lib/instrumentation/modules) directory named `<package-name>.js`,
    E.g. `mysql.js` for the `mysql` package
-1. Add the name of the package to the `MODULES` array in [`lib/instrumentation/index.js`](https://github.com/elastic/apm-agent-nodejs/blob/master/lib/instrumentation/index.js)
-1. Add accompanying tests in the [`test/instrumentation/modules`](https://github.com/elastic/apm-agent-nodejs/tree/master/test/instrumentation/modules) directory.
+1. Add the name of the package to the `MODULES` array in [`lib/instrumentation/index.js`](lib/instrumentation/index.js)
+1. Add accompanying tests in the [`test/instrumentation/modules`](test/instrumentation/modules) directory.
    If you only have one test file,
    place it in the root of the `modules` directory and name it the same as the `lib` file.
    If you have more than one test file,
    create a sub-directory with the name of the package and place all test files inside that
    1. If you created a sub-directory under `test/instrumentation/modules`,
-      add it to the `directories` array in [`test/test.js`](https://github.com/elastic/apm-agent-nodejs/blob/master/test/test.js)
-1. List the supported versions of the package in [`docs/compatibility.asciidoc`](https://github.com/elastic/apm-agent-nodejs/blob/master/docs/compatibility.asciidoc)
+      add it to the `directories` array in [`test/test.js`](test/test.js)
+1. List the supported versions of the package in [`docs/compatibility.asciidoc`](docs/compatibility.asciidoc)
 1. We use the [test-all-versions](https://github.com/watson/test-all-versions) module to test the agent against all supported versions of each package we instrument.
-   Add the supported versions and required test commands to the [`.tav.yml`](https://github.com/elastic/apm-agent-nodejs/blob/master/.tav.yml) file
-1. Add the name of the module to one of the TAV groups in both [`.travis.yml`](https://github.com/elastic/apm-agent-nodejs/blob/master/.travis.yml) and [`test/.jenkins_tav.yml`](https://github.com/elastic/apm-agent-nodejs/blob/master/test/.jenkins_tav.yml) for all Node.js versions.
+   Add the supported versions and required test commands to the [`.tav.yml`](.tav.yml) file
+1. Add the name of the module to one of the TAV groups in both [`.travis.yml`](.travis.yml) and [`test/.jenkins_tav.yml`](test/.jenkins_tav.yml) for all Node.js versions.
    To better balance the work requried to run each TAV group,
    pick the TAV group that is currently running the fastest.
    Look at the "Dependencies" stage of one of our latest [Travis cron job builds](https://travis-ci.org/elastic/apm-agent-nodejs/builds) for an overview

diff --git a/README.md b/README.md
@@ -45,15 +45,15 @@ npm install elastic-apm-node --save
 ## Contributing
 
 Contributions are welcome,
-but we recommend that you take a moment and read our [contribution guide](https://github.com/elastic/apm-agent-nodejs/blob/master/CONTRIBUTING.md) first.
+but we recommend that you take a moment and read our [contribution guide](CONTRIBUTING.md) first.
 
 To ease development,
 set the environment variable `DEBUG_PAYLOAD=1` to have the agent dump the JSON payload sent to the APM Server to a temporary file on your local harddrive.
 
-Please see the [testing section](CONTRIBUTING.MD#testing) in CONTRIBUTING.md for testing instructions.
+Please see [TESTING.md](TESTING.md) for instructions on how to run the test suite.
 
 ## License
 
-[BSD-2-Clause](https://github.com/elastic/apm-agent-nodejs/blob/master/LICENSE)
+[BSD-2-Clause](LICENSE)
 
 <br>Made with ♥️ and ☕️ by Elastic and our community.
diff --git a/TESTING.md b/TESTING.md
@@ -29,7 +29,7 @@ Arguments:
 
 - `node_version` - Specify major version of Node.js to run test suite on (default: same version as is installed locally)
 - `packages` - Comma separated list of npm package names for which to run [tav](https://github.com/watson/test-all-versions) tests.
-  See [`.tav.yml`](https://github.com/elastic/apm-agent-nodejs/blob/master/.tav.yml) for list of possible names.
+  See [`.tav.yml`](.tav.yml) for list of possible names.
   If used,
   `node_version` must be specified (default: none)
 

diff --git a/docs/agent-api.asciidoc b/docs/agent-api.asciidoc
@@ -535,41 +535,46 @@ Specify the maximum number of spans to capture within a request transaction
 before dropping further spans.
 Setting to `Infinity` means that spans will never be dropped.
 
-[[flush-interval]]
-===== `flushInterval`
+[[api-request-time]]
+===== `apiRequestTime`
 
 * *Type:* Number
 * *Default:* `10`
-* *Env:* `ELASTIC_APM_FLUSH_INTERVAL`
+* *Env:* `ELASTIC_APM_API_REQUEST_TIME`
 
-The agent maintains an in-memory queue to which recorded transactions are added when they end.
-Unless empty,
-this queue is flushed and sent to the APM Server for processing approximately every 10 seconds.
+The agent maintains an open HTTP request to the APM Server that is used to transmit the collected transactions,
+spans,
+and errors to the server.
 
-Use this option to change that interval.
+To avoid issues with intermittent proxies and load balancers,
+the HTTP request is ended and a new one created at regular intervals controlled by this config option.
 The value is expected to be in seconds.
 
-Lowering this interval can reduce memory usage on Node.js applications with a high number of transactions.
-
 [NOTE]
 ====
-The queue is flushed approximately 5 seconds after the first transaction has ended on a newly started Node process.
-
-This ensures that you don't have to wait for the entire `flushInterval` to pass for the first data to be sent to the APM Server.
-From there on the `flushInterval` option is used.
+The HTTP request is ended before the time threshold is reached if enough bytes are sent over it.
+Use the <<api-request-size,`apiRequestSize`>> config option to control the byte threshold.
 ====
 
-[NOTE]
-====
-After each flush of the queue,
-the next flush isn't scheduled until a transaction have ended.
+[[api-request-size]]
+===== `apiRequestSize`
 
-This is done to introduce variance and also ensures that empty queues are not scheduled for flushing.
+* *Type:* Number
+* *Default:* `1048576` (1 MiB)
+* *Env:* `ELASTIC_APM_API_REQUEST_SIZE`
+
+The agent maintains an open HTTP request to the APM Server that is used to transmit the collected transactions,
+spans,
+and errors to the server.
 
-On top of that,
-the actual interval is ajusted by +/- 5% between each flush.
+To avoid issues with intermittent proxies and load balancers,
+the HTTP request is ended and a new one created if its body becomes too large.
+That limit is controlled by this config option.
+The value is expected to be in bytes.
 
-This all helps to ensure that multiple servers started at the same time will not establish connections to the APM Server simultaneously.
+[NOTE]
+====
+The HTTP request is otherwise ended at regular intervals controlled by the <<api-request-time,`apiRequestTime`>> config option.
 ====
 
 [[server-timeout]]
@@ -579,22 +584,11 @@ This all helps to ensure that multiple servers started at the same time will not
 * *Default:* `30`
 * *Env:* `ELASTIC_APM_SERVER_TIMEOUT`
 
-Specify the timeout in seconds when reporting transactions to APM Server.
-
-[[max-queue-size]]
-===== `maxQueueSize`
-
-* *Type:* Number
-* *Default:* `100`
-* *Env:* `ELASTIC_APM_MAX_QUEUE_SIZE`
-
-The agent maintains an in-memory queue to which recorded transactions are added when they end.
-The queue is flushed with regular intervals controlled by the <<flush-interval,`flushInterval`>> config option.
-
-Use the `maxQueueSize` option to force a flush of the queue when it reaches a certain size (number of ended transactions) - even if the `flushInterval` time isn't reached yet.
+Specify the response timeout in seconds when reporting transactions to APM Server.
 
-Set to `-1` to disable,
-in which case only `flushInterval` counts.
+The timeout is applied once the agent have sent the entire request body to the APM Server.
+If the response from the server takes longer than allowed by this timeout,
+the HTTP request is terminated and the TCP socket closed.
 
 [[filter-http-headers]]
 ===== `filterHttpHeaders`
@@ -1150,11 +1144,12 @@ apm.flush(function (err) {
 })
 ----
 
-Manually flush the in-memory transaction queue and send all the transactions to the APM Server.
-The queue is otherwise flushed automatically,
-controlled by the <<flush-interval,`flushInterval`>> and/or <<max-queue-size,`maxQueueSize`>> config options.
+Manually end the active outgoing HTTP request to the APM Server.
+The HTTP request is otherwise ended automatically at regular intervals,
+controlled by the <<api-request-time,`apiRequestTime`>> and <<api-request-size,`apiRequestSize`>> config options.
 
-The callback is called *after* the event has been sent to the APM Server with a possible error argument.
+The callback is called *after* the active HTTP request have ended.
+The callback is called even if no HTTP request is currently active.
 
 [[apm-lambda]]
 ==== `apm.lambda([type, ] handler)`

diff --git a/docs/performance-tuning.asciidoc b/docs/performance-tuning.asciidoc
@@ -36,42 +36,58 @@ require('elastic-apm-node').start({
 ----
 
 [float]
-[[performance-transaction-queue]]
-=== Transaction Queue
+[[performance-apm-server-communication]]
+=== APM Server communication
 
-The agent buffers the collected data using an in-memory queue before sending it to the APM Server.
-The queue is flushed either after a specific <<performance-flush-interval,amount of time>> or when it reaches <<performance-max-queue-size,a certain size>> -
-whichever comes first.
-Lowering these defaults can reduce memory usage,
-but will increase the number of requests to the APM Server.
+The agent uses a persistent outgoing HTTP request to stream data to the APM Server.
+To avoid issues with intermittent proxies and load balancers,
+the HTTP request is ended and a new one created at regular intervals or when the size of the request becomes too big.
+
+There's an overhead involved in each HTTP request:
+Besides sending new HTTP headers,
+the agent needs to re-send certain metadata to the APM Server each time a new HTTP request is made.
+However,
+if allowed by the network,
+the TCP socket is reused between each HTTP request.
 
 [float]
-[[performance-flush-interval]]
-==== Flush Interval
+[[performance-api-request-time]]
+==== Max HTTP request duration
 
-To prevent items from staying in the queue for a long time during low activity,
-the <<flush-interval,`flushInterval`>> setting is used to ensure the queue empties if anything in it is too old.
+By default an HTTP request to the APM Server is ended after a maximum of 10 seconds.
+Using the <<api-request-time,`apiRequestTime`>> config option,
+this time limit can be modified.
 
-Lowering the flush interval will ensure transactions are sent to the APM Server faster,
-but may also result in increased HTTP traffic and cpu usage.
+Lowering the time limit might be necessary if dealing with very agressive proxies,
+but increasing the time limit means that the combined overhead of these HTTP requests is reduced,
+as headers and metadata doesn't need to be re-sent that often.
 
 [float]
-[[performance-max-queue-size]]
-==== Max Queue Size
+[[performance-api-request-size]]
+==== Max HTTP request size
 
-The <<max-queue-size,`maxQueueSize`>> controls the maximum number of transactions that may remain in the queue before they must be sent.
+By default an HTTP request to the APM Server is ended after approximately 1 MiB of gzip compressed data have been written to the body.
+Using the <<api-request-size,`apiRequestSize`>> config option,
+this time limit can be modified.
 
-Lowering this will reduce memory consumption,
-however it will increase the number of requests made to the APM Server.
+Lowering the size limit might be necessary if dealing with very agressive proxies,
+but increasing the size limit means that the combined overhead of these HTTP requests is reduced,
+as headers and metadata doesn't need to be re-sent that often.
 
 [float]
 [[performance-server-timeout]]
-=== APM Server Timeout
+==== APM Server Timeout
+
+In the event that the APM Server or the connection to the APM Server is slow or unstable,
+the <<server-timeout,`serverTimeout`>> setting can be set to ensure the agent doesn't wait too long for a response.
+
+The agent only allows for a single TCP socket to be opened to the APM Server at any given time.
+This is to avoid the overhead of opening too many sockets.
+If the agent is stuck waiting for a response from the previous HTTP request,
+it might start dropping new data in order to keep its memory footprint low.
 
-In the event that the connection to the APM Server is slow or unstable,
-the <<server-timeout,`serverTimeout`>> setting can be set to ensure connections don't stay open too long.
-If the server timeout is too high,
-it may result in too many socket descriptors being held open.
+Keeping this timeout low,
+helps alleviate that problem.
 
 [float]
 [[performance-stack-traces]]

diff --git a/lib/agent.js b/lib/agent.js
@@ -5,7 +5,6 @@ var parseUrl = require('url').parse
 var path = require('path')
 
 var afterAll = require('after-all-results')
-var consoleLogLevel = require('console-log-level')
 var ElasticAPMHttpClient = require('elastic-apm-http-client')
 var isError = require('core-util-is').isError
 var ancestors = require('require-ancestors')
@@ -18,6 +17,7 @@ var Instrumentation = require('./instrumentation')
 var parsers = require('./parsers')
 var stackman = require('./stackman')
 var symbols = require('./symbols')
+var truncate = require('./truncate')
 
 var IncomingMessage = http.IncomingMessage
 var ServerResponse = http.ServerResponse
@@ -37,6 +37,12 @@ function Agent () {
   this._config()
 }
 
+Object.defineProperty(Agent.prototype, 'logger', {
+  get () {
+    return this._conf.logger
+  }
+})
+
 Object.defineProperty(Agent.prototype, 'currentTransaction', {
   get () {
     return this._instrumentation.currentTransaction
@@ -70,22 +76,11 @@ Agent.prototype.buildSpan = function () {
 }
 
 Agent.prototype._config = function (opts) {
-  if (opts && opts.logger) {
-    this.logger = opts.logger
-    delete opts.logger
-  }
-
   this._conf = config(opts)
 
   this._conf.serverHost = this._conf.serverUrl
     ? parseUrl(this._conf.serverUrl).hostname
     : 'localhost'
-
-  if (!this.logger) {
-    this.logger = consoleLogLevel({
-      level: this._conf.logLevel
-    })
-  }
 }
 
 Agent.prototype.isStarted = function () {

diff --git a/lib/config.js b/lib/config.js
@@ -3,6 +3,7 @@
 var fs = require('fs')
 var path = require('path')
 
+var consoleLogLevel = require('console-log-level')
 var normalizeBool = require('normalize-bool')
 var truncate = require('unicode-byte-truncate')
 
@@ -44,10 +45,8 @@ var DEFAULTS = {
   sourceLinesSpanAppFrames: 0,
   sourceLinesSpanLibraryFrames: 0,
   errorMessageMaxLength: 2048,
-  flushInterval: 10, // TODO: Deprecate
   transactionMaxSpans: Infinity,
   transactionSampleRate: 1.0,
-  maxQueueSize: 100, // TODO: Deprecate
   serverTimeout: 30,
   disableInstrumentations: []
 }
@@ -74,8 +73,6 @@ var ENV_TABLE = {
   errorOnAbortedRequests: 'ELASTIC_APM_ERROR_ON_ABORTED_REQUESTS',
   abortedErrorThreshold: 'ELASTIC_APM_ABORTED_ERROR_THRESHOLD',
   instrument: 'ELASTIC_APM_INSTRUMENT',
-  flushInterval: 'ELASTIC_APM_FLUSH_INTERVAL',
-  maxQueueSize: 'ELASTIC_APM_MAX_QUEUE_SIZE',
   asyncHooks: 'ELASTIC_APM_ASYNC_HOOKS',
   sourceLinesErrorAppFrames: 'ELASTIC_APM_SOURCE_LINES_ERROR_APP_FRAMES',
   sourceLinesErrorLibraryFrames: 'ELASTIC_APM_SOURCE_LINES_ERROR_LIBRARY_FRAMES',
@@ -132,6 +129,13 @@ function config (opts) {
   normalizeBools(opts)
   truncateOptions(opts)
 
+  // NOTE: A logger will already exists if a custom logger was given to start()
+  if (typeof opts.logger !== 'function') {
+    opts.logger = consoleLogLevel({
+      level: opts.logLevel
+    })
+  }
+
   return opts
 }
 

diff --git a/lib/instrumentation/index.js b/lib/instrumentation/index.js
@@ -7,6 +7,7 @@ var hook = require('require-in-the-middle')
 var semver = require('semver')
 
 var Transaction = require('./transaction')
+var truncate = require('../truncate')
 var shimmer = require('./shimmer')
 
 var MODULES = [