From 188cde19cba9310943eabcb90dbca0a4b4aed9dd Mon Sep 17 00:00:00 2001 From: Helio Machado <0x2b3bfa0+git@googlemail.com> Date: Wed, 12 Oct 2022 00:36:29 +0200 Subject: [PATCH] Enhance telemetry data and exclude `tensorboard` tracebacks (#1172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Enrich analytics with container information and options * Add reminder * Avoid stack trace leak on Tensorboard * Make the code self-mindblowing * Pin `is-docker` in `package.json` 🙀 * Update tests * Fix double `.options` * Simplify container * Fix tests, again * Simplify telemetry options * Fix tests, again * Restore name/full differentiation * Revert telemetryData * Fix legacy commands * Surely not meant to be here * Apply suggestions from code review Co-authored-by: Domas Monkus * Unpack yargs in function body * fixup! Unpack yargs in function body * Add a Helpful Comment™ * Fix merge * Apply suggestions from code review Co-authored-by: Domas Monkus --- Dockerfile | 2 ++ bin/cml.js | 28 +++++++++++++++++++++---- bin/cml/asset/publish.js | 9 ++++++-- bin/cml/check/create.js | 6 +++++- bin/cml/comment/create.js | 15 +++++++++---- bin/cml/pr.js | 1 + bin/cml/pr/create.js | 6 +++++- bin/cml/repo/prepare.js | 6 +++++- bin/cml/runner.js | 1 + bin/cml/runner/launch.js | 21 +++++++++++++------ bin/cml/tensorboard/connect.e2e.test.js | 2 +- bin/cml/tensorboard/connect.js | 17 +++++++++------ bin/cml/workflow/rerun.js | 6 +++++- package-lock.json | 20 ++++++++++++++++++ package.json | 1 + src/analytics.e2e.test.js | 5 +++-- src/analytics.js | 9 ++++++-- 17 files changed, 124 insertions(+), 31 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9cc787155..807f18ec9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -116,6 +116,8 @@ WORKDIR ${CML_RUNNER_PATH} # SET SPECIFIC ENVIRONMENT VARIABLES ENV IN_DOCKER=1 ENV RUNNER_ALLOW_RUNASROOT=1 +# Environment variable used by cml to detect it's been installed using the docker image. +ENV _CML_CONTAINER_IMAGE=true # DEFINE ENTRY POINT AND COMMAND # Smart entrypoint understands commands like `bash` or `/bin/sh` but defaults to `cml`; diff --git a/bin/cml.js b/bin/cml.js index 836767332..80df965e1 100755 --- a/bin/cml.js +++ b/bin/cml.js @@ -48,7 +48,6 @@ const setupOpts = (opts) => { const { markdownfile } = opts; opts.markdownFile = markdownfile; - opts.cmlCommand = opts._[0]; opts.cml = new CML(opts); }; @@ -76,9 +75,29 @@ const setupLogger = (opts) => { }); }; -const setupTelemetry = async (opts) => { - const { cml, cmlCommand: action } = opts; - opts.telemetryEvent = await jitsuEventPayload({ action, cml }); +const setupTelemetry = async (opts, yargs) => { + const { cml, _: command } = opts; + + const options = {}; + for (const [name, option] of Object.entries(opts.options)) { + // Skip options with default values (i.e. not explicitly set by users) + if (opts[name] && !yargs.parsed.defaulted[name]) { + switch (option.telemetryData) { + case 'name': + options[name] = null; + break; + case 'full': + options[name] = opts[name]; + break; + } + } + } + + opts.telemetryEvent = await jitsuEventPayload({ + action: command.join(':'), + extra: { options }, + cml + }); }; const runPlugin = async ({ $0: executable, command }) => { @@ -99,6 +118,7 @@ const handleError = (message, error) => { (async () => { setupLogger({ log: 'debug' }); + try { await yargs .env('CML') diff --git a/bin/cml/asset/publish.js b/bin/cml/asset/publish.js index d545c0fe2..09fce8e20 100644 --- a/bin/cml/asset/publish.js +++ b/bin/cml/asset/publish.js @@ -23,7 +23,11 @@ exports.handler = async (opts) => { else await fs.writeFile(file, output); }; -exports.builder = (yargs) => yargs.env('CML_ASSET').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_ASSET') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ url: { @@ -51,7 +55,8 @@ exports.options = kebabcaseKeys({ }, rmWatermark: { type: 'boolean', - description: 'Avoid CML watermark.' + description: 'Avoid CML watermark.', + telemetryData: 'name' }, mimeType: { type: 'string', diff --git a/bin/cml/check/create.js b/bin/cml/check/create.js index 43637d7b3..89ebcea9b 100755 --- a/bin/cml/check/create.js +++ b/bin/cml/check/create.js @@ -10,7 +10,11 @@ exports.handler = async (opts) => { await cml.checkCreate({ ...opts, report }); }; -exports.builder = (yargs) => yargs.env('CML_CHECK').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_CHECK') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ token: { diff --git a/bin/cml/comment/create.js b/bin/cml/comment/create.js index 8036dca6f..b325b45d0 100644 --- a/bin/cml/comment/create.js +++ b/bin/cml/comment/create.js @@ -8,7 +8,11 @@ exports.handler = async (opts) => { console.log(await cml.commentCreate(opts)); }; -exports.builder = (yargs) => yargs.env('CML_COMMENT').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_COMMENT') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ pr: { @@ -30,7 +34,8 @@ exports.options = kebabcaseKeys({ publishUrl: { type: 'string', default: 'https://asset.cml.dev', - description: 'Self-hosted image server URL' + description: 'Self-hosted image server URL', + telemetryData: 'name' }, watch: { type: 'boolean', @@ -44,7 +49,8 @@ exports.options = kebabcaseKeys({ native: { type: 'boolean', description: - "Uses driver's native capabilities to upload assets instead of CML's storage; not available on GitHub" + "Uses driver's native capabilities to upload assets instead of CML's storage; not available on GitHub", + telemetryData: 'name' }, update: { type: 'boolean', @@ -55,6 +61,7 @@ exports.options = kebabcaseKeys({ rmWatermark: { type: 'boolean', description: - 'Avoid watermark; CML needs a watermark to be able to distinguish CML comments from others' + 'Avoid watermark; CML needs a watermark to be able to distinguish CML comments from others', + telemetryData: 'name' } }); diff --git a/bin/cml/pr.js b/bin/cml/pr.js index 65930cbfb..4e9f6b3a3 100644 --- a/bin/cml/pr.js +++ b/bin/cml/pr.js @@ -16,5 +16,6 @@ exports.builder = (yargs) => ]) ) ) + .option('options', { default: options, hidden: true }) .check(({ globpath }) => globpath) .strict(); diff --git a/bin/cml/pr/create.js b/bin/cml/pr/create.js index 3e8274104..dc98eda9c 100755 --- a/bin/cml/pr/create.js +++ b/bin/cml/pr/create.js @@ -15,7 +15,11 @@ exports.handler = async (opts) => { console.log(link); }; -exports.builder = (yargs) => yargs.env('CML_PR').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_PR') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ md: { diff --git a/bin/cml/repo/prepare.js b/bin/cml/repo/prepare.js index b3218f788..302f75c5c 100644 --- a/bin/cml/repo/prepare.js +++ b/bin/cml/repo/prepare.js @@ -10,7 +10,11 @@ exports.handler = async (opts) => { await cml.ci(opts); }; -exports.builder = (yargs) => yargs.env('CML_REPO').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_REPO') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ unshallow: { diff --git a/bin/cml/runner.js b/bin/cml/runner.js index f294d4369..c97d0a0c5 100644 --- a/bin/cml/runner.js +++ b/bin/cml/runner.js @@ -16,5 +16,6 @@ exports.builder = (yargs) => ]) ) ) + .option('options', { default: options, hidden: true }) .check(() => process.argv.some((arg) => arg.startsWith('-'))) .strict(); diff --git a/bin/cml/runner/launch.js b/bin/cml/runner/launch.js index 8b568db12..bc9c2e5af 100755 --- a/bin/cml/runner/launch.js +++ b/bin/cml/runner/launch.js @@ -418,7 +418,11 @@ exports.handler = async (opts) => { } }; -exports.builder = (yargs) => yargs.env('CML_RUNNER').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_RUNNER') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ labels: { @@ -454,13 +458,15 @@ exports.options = kebabcaseKeys({ type: 'boolean', conflicts: ['single', 'reuseIdle'], description: - "Don't launch a new runner if an existing one has the same name or overlapping labels" + "Don't launch a new runner if an existing one has the same name or overlapping labels", + telemetryData: 'name' }, reuseIdle: { type: 'boolean', conflicts: ['reuse', 'single'], description: - "Creates a new runner only if the matching labels don't exist or are already busy" + "Creates a new runner only if the matching labels don't exist or are already busy", + telemetryData: 'name' }, workdir: { type: 'string', @@ -476,7 +482,8 @@ exports.options = kebabcaseKeys({ cloud: { type: 'string', choices: ['aws', 'azure', 'gcp', 'kubernetes'], - description: 'Cloud to deploy the runner' + description: 'Cloud to deploy the runner', + telemetryData: 'full' }, cloudRegion: { type: 'string', @@ -529,12 +536,14 @@ exports.options = kebabcaseKeys({ type: 'number', default: -1, description: - 'Maximum spot instance bidding price in USD. Defaults to the current spot bidding price' + 'Maximum spot instance bidding price in USD. Defaults to the current spot bidding price', + telemetryData: 'name' }, cloudStartupScript: { type: 'string', description: - 'Run the provided Base64-encoded Linux shell script during the instance initialization' + 'Run the provided Base64-encoded Linux shell script during the instance initialization', + telemetryData: 'name' }, cloudAwsSecurityGroup: { type: 'string', diff --git a/bin/cml/tensorboard/connect.e2e.test.js b/bin/cml/tensorboard/connect.e2e.test.js index f4dec0840..1b3f0b40e 100644 --- a/bin/cml/tensorboard/connect.e2e.test.js +++ b/bin/cml/tensorboard/connect.e2e.test.js @@ -34,7 +34,7 @@ describe('tbLink', () => { error = err; } - expect(error.message).toBe(`Tensorboard took too long. ${message}`); + expect(error.message).toBe(`Tensorboard took too long`); }); test('valid url is returned', async () => { diff --git a/bin/cml/tensorboard/connect.js b/bin/cml/tensorboard/connect.js index 809d50809..7e0b073f3 100644 --- a/bin/cml/tensorboard/connect.js +++ b/bin/cml/tensorboard/connect.js @@ -3,6 +3,7 @@ const kebabcaseKeys = require('kebabcase-keys'); const { spawn } = require('child_process'); const { homedir } = require('os'); const tempy = require('tempy'); +const winston = require('winston'); const { exec, watermarkUri, sleep } = require('../../../src/utils'); @@ -28,8 +29,8 @@ const tbLink = async (opts = {}) => { chrono = chrono + chronoStep; } - const error = await fs.readFile(stderror, 'utf8'); - throw new Error(`Tensorboard took too long. ${error}`); + winston.error(await fs.readFile(stderror, 'utf8')); + throw new Error(`Tensorboard took too long`); }; const launchAndWaitLink = async (opts = {}) => { @@ -51,8 +52,8 @@ const launchAndWaitLink = async (opts = {}) => { proc.unref(); proc.on('exit', async (code, signal) => { if (code || signal) { - const error = await fs.readFile(stderrPath, 'utf8'); - throw new Error(`Tensorboard failed with error: ${error}`); + winston.error(await fs.readFile(stderrPath, 'utf8')); + throw new Error(`Tensorboard failed with error ${code || signal}`); } }); @@ -95,7 +96,10 @@ exports.handler = async (opts) => { }; exports.builder = (yargs) => - yargs.env('CML_TENSORBOARD').options(exports.options); + yargs + .env('CML_TENSORBOARD') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ credentials: { @@ -136,6 +140,7 @@ exports.options = kebabcaseKeys({ }, rmWatermark: { type: 'boolean', - description: 'Avoid CML watermark' + description: 'Avoid CML watermark', + telemetryData: 'name' } }); diff --git a/bin/cml/workflow/rerun.js b/bin/cml/workflow/rerun.js index 18c5f0936..40639434f 100644 --- a/bin/cml/workflow/rerun.js +++ b/bin/cml/workflow/rerun.js @@ -8,7 +8,11 @@ exports.handler = async (opts) => { await cml.pipelineRerun(opts); }; -exports.builder = (yargs) => yargs.env('CML_WORKFLOW').options(exports.options); +exports.builder = (yargs) => + yargs + .env('CML_WORKFLOW') + .option('options', { default: exports.options, hidden: true }) + .options(exports.options); exports.options = kebabcaseKeys({ id: { diff --git a/package-lock.json b/package-lock.json index b12751de9..41a5ee36b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,6 +26,7 @@ "git-url-parse": "^13.1.0", "globby": "^11.0.4", "https-proxy-agent": "^5.0.1", + "is-docker": "^2.2.1", "js-base64": "^3.7.2", "kebabcase-keys": "^1.0.0", "node-fetch": "^2.6.5", @@ -3967,6 +3968,20 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/is-docker": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", + "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==", + "bin": { + "is-docker": "cli.js" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "license": "MIT", @@ -10137,6 +10152,11 @@ "is-decimal": { "version": "1.0.4" }, + "is-docker": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-2.2.1.tgz", + "integrity": "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==" + }, "is-extglob": { "version": "2.1.1" }, diff --git a/package.json b/package.json index 73d2b48ec..48f03c75d 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,7 @@ "git-url-parse": "^13.1.0", "globby": "^11.0.4", "https-proxy-agent": "^5.0.1", + "is-docker": "2.2.1", "js-base64": "^3.7.2", "kebabcase-keys": "^1.0.0", "node-fetch": "^2.6.5", diff --git a/src/analytics.e2e.test.js b/src/analytics.e2e.test.js index 1a632892b..365bba3ac 100644 --- a/src/analytics.e2e.test.js +++ b/src/analytics.e2e.test.js @@ -8,8 +8,9 @@ describe('analytics tests', () => { const cml = new CML({ repo: REPO, token: TOKEN }); const action = 'test'; const cloud = 'azure'; + const container = 'cml'; const more = { one: 1, two: 2 }; - const extra = { cloud, ...more }; + const extra = { cloud, container, ...more }; const error = 'Ouch!'; const regex = /\d+\.\d+\.\d+/; @@ -25,7 +26,7 @@ describe('analytics tests', () => { expect(pl.backend).toBe(cloud); expect(pl.error).toBe(error); expect(Object.keys(pl.extra).sort()).toEqual( - ['ci'].concat(Object.keys(more)).sort() + ['ci', 'container'].concat(Object.keys(more)).sort() ); if (isCI()) { diff --git a/src/analytics.js b/src/analytics.js index 239651569..bb810f3be 100644 --- a/src/analytics.js +++ b/src/analytics.js @@ -9,6 +9,7 @@ const { scrypt } = require('crypto'); const { v4: uuidv4, v5: uuidv5, parse } = require('uuid'); const { userConfigDir } = require('appdirs'); const winston = require('winston'); +const isDocker = require('is-docker'); const { version: VERSION } = require('../package.json'); const { exec, fileExists, getos } = require('./utils'); @@ -150,7 +151,6 @@ const jitsuEventPayload = async ({ } = {}) => { try { const { cloud: backend = '', ...extraRest } = extra; - extraRest.ci = guessCI(); const osname = OS(); let { release = os.release() } = await getos(); @@ -171,7 +171,12 @@ const jitsuEventPayload = async ({ os_version: release, backend, error, - extra: extraRest + extra: { + ...extraRest, + ci: guessCI(), + container: + process.env._CML_CONTAINER_IMAGE === 'true' ? 'cml' : isDocker() + } }; } catch (err) { return {};