Skip to content

Commit

Permalink
fix(otel): ensure OTEL sends final span when -o opt is used (#6505)
Browse files Browse the repository at this point in the history
* fix(otel): ensure OTEL sends final span when -o opt is used

Before this fix, when the -o/--output option was used, Garden would call
the `shutdown` function early which prevented the final OTEL span to be
emitted.

Now we only run the shutdown logic and (wait for output flush) at the
very end of the command run.

This also means that the `Cli.run()` method no longer has
hard-to-predict side effects, depending on what flags were passed.

* refactor(cli): remove `exitOnAbort` option

This option didn't appear to have any real meaning after we moved the
call to the `shutdown` function to the parent caller (i.e. the
`runCli()` function) as opposed to the `Cli.run()` method so we're
removing it here.

* fix(otel): do not attempt to shutdown if target exporter not set

Before this fix, attempting to shutdown the OTEL SDK would hang if
called before a target exporter is set. The target exporter is set in
the Garden class so this would e.g. happen when running 'garden help'
and cause a delay before the attempt to shutdown OTEL times out.

With this commit we now check whether an exporter is configured, either
via env vars or the ReconfigurableExporter, and only attempt to shut down
if that's the case.

Note that we need to handle this in the enclosing context as opposed to
having the ReconfigurableExporter handle it itself because the exporter
doesn't "know" if a target exporter will be set later or not.
  • Loading branch information
eysi09 authored Oct 7, 2024
1 parent 8cd472d commit 7b96bdb
Show file tree
Hide file tree
Showing 12 changed files with 5,611 additions and 4,897 deletions.
21 changes: 13 additions & 8 deletions cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ import type { RunOutput } from "@garden-io/core/build/src/cli/cli.js"
import { GardenCli } from "@garden-io/core/build/src/cli/cli.js"
import type { GardenPluginReference } from "@garden-io/core/build/src/plugin/plugin.js"
import { GlobalConfigStore } from "@garden-io/core/build/src/config-store/global.js"
import { getOtelSDK } from "@garden-io/core/build/src/util/open-telemetry/tracing.js"
import { getOtelSDK, isOtelExporterConfigured } from "@garden-io/core/build/src/util/open-telemetry/tracing.js"
import { withContextFromEnv } from "@garden-io/core/build/src/util/open-telemetry/propagation.js"
import { wrapActiveSpan } from "@garden-io/core/build/src/util/open-telemetry/spans.js"
import { InternalError } from "@garden-io/core/build/src/exceptions.js"
import { styles } from "@garden-io/core/build/src/logger/styles.js"
import { gardenEnv, IGNORE_UNCAUGHT_EXCEPTION_VARNAME } from "@garden-io/core/build/src/constants.js"
import { inspect } from "node:util"
import { waitForOutputFlush } from "@garden-io/core/build/src/process.js"

// These plugins are always registered
export const getBundledPlugins = (): GardenPluginReference[] => [
Expand Down Expand Up @@ -85,9 +86,8 @@ if (gardenEnv.GARDEN_IGNORE_UNCAUGHT_EXCEPTION) {
export async function runCli({
args,
cli,
exitOnError = true,
initLogger = true,
}: { args?: string[]; cli?: GardenCli; exitOnError?: boolean; initLogger?: boolean } = {}) {
}: { args?: string[]; cli?: GardenCli; initLogger?: boolean } = {}) {
let code = 0
let result: RunOutput | undefined = undefined

Expand All @@ -104,7 +104,7 @@ export async function runCli({
}

// Note: We slice off the binary/script name from argv.
const results = await cli.run({ args: args || [], exitOnError })
const results = await cli.run({ args: args || [] })

return results
})
Expand All @@ -119,10 +119,14 @@ export async function runCli({
await globalConfigStore.delete("activeProcesses", String(cli.processRecord.pid))
}

try {
await Promise.race([getOtelSDK().shutdown(), new Promise((resolve) => setTimeout(resolve, 3000))])
} catch (err) {
logUnexpectedError(err, "OTEL shutdown failed")
// Calling "shutdown" will hang if the command exits before OTEL is set up. This will happen if an
// exporter is NOT set via the OTEL_ env var AND if Garden exits before it sets an exporter.
if (isOtelExporterConfigured()) {
try {
await Promise.race([getOtelSDK().shutdown(), new Promise((resolve) => setTimeout(resolve, 3000))])
} catch (err) {
logUnexpectedError(err, "OTEL shutdown failed")
}
}

if (ignoredUncaughtExceptions) {
Expand All @@ -134,6 +138,7 @@ export async function runCli({
code = 1
}

await waitForOutputFlush()
await shutdown(code)
}

Expand Down
4 changes: 1 addition & 3 deletions cli/test/unit/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ describe("runCli", () => {
const projectRoot = resolve(testRoot, "test-projects", "bundled-projects")
const { cli, result } = await runCli({
args: ["tools", "--root", projectRoot],
exitOnError: false,
initLogger: false,
})

Expand Down Expand Up @@ -67,7 +66,6 @@ describe("runCli", () => {
const { result } = await runCli({
args: [cmd.name, "--root", projectRootA],
cli,
exitOnError: false,
initLogger: false,
})

Expand All @@ -89,7 +87,7 @@ describe("runCli", () => {
const cmd = new TestCommand()
cli.addCommand(cmd)

await runCli({ args: [cmd.name, "--root", projectRootA], cli, exitOnError: false, initLogger: false })
await runCli({ args: [cmd.name, "--root", projectRootA], cli, initLogger: false })

const allProcesses = Object.values(await globalConfigStore.get("activeProcesses"))
const record = find(allProcesses, (p) => p.command)
Expand Down
5 changes: 3 additions & 2 deletions core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"@opentelemetry/instrumentation-http": "^0.52.0",
"@opentelemetry/otlp-exporter-base": "^0.47.0",
"@opentelemetry/resources": "^1.26.0",
"@opentelemetry/sdk-node": "^0.47.0",
"@opentelemetry/sdk-node": "^0.53.0",
"@opentelemetry/sdk-trace-base": "^1.21.0",
"@opentelemetry/semantic-conventions": "^1.21.0",
"@scg82/exit-hook": "^3.4.1",
Expand Down Expand Up @@ -274,4 +274,5 @@
"fsevents": "^2.3.3"
},
"gitHead": "b0647221a4d2ff06952bae58000b104215aed922"
}
}

23 changes: 7 additions & 16 deletions core/src/cli/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { resolve, join } from "path"
import fsExtra from "fs-extra"
import { getBuiltinCommands } from "../commands/commands.js"
import { getCloudDistributionName } from "../util/cloud.js"
import { shutdown, getPackageVersion } from "../util/util.js"
import { getPackageVersion } from "../util/util.js"
import type { Command, CommandResult, BuiltinArgs } from "../commands/base.js"
import { CommandGroup } from "../commands/base.js"
import type { GardenError } from "../exceptions.js"
Expand Down Expand Up @@ -59,7 +59,7 @@ import type { Log } from "../logger/log-entry.js"
import { dedent } from "../util/string.js"
import type { GardenProcess } from "../config-store/global.js"
import { GlobalConfigStore } from "../config-store/global.js"
import { registerProcess, waitForOutputFlush } from "../process.js"
import { registerProcess } from "../process.js"
import { uuidv4 } from "../util/random.js"
import { withSessionContext } from "../util/open-telemetry/context.js"
import { wrapActiveSpan } from "../util/open-telemetry/spans.js"
Expand Down Expand Up @@ -409,12 +409,10 @@ ${renderCommands(commands)}

async run({
args,
exitOnError,
processRecord,
cwd,
}: {
args: string[]
exitOnError: boolean
processRecord?: GardenProcess
cwd?: string
}): Promise<RunOutput> {
Expand All @@ -423,14 +421,8 @@ ${renderCommands(commands)}
const errors: (GardenError | Error)[] = []

async function done(abortCode: number, consoleOutput: string, result: any = {}) {
if (exitOnError) {
// eslint-disable-next-line no-console
console.log(consoleOutput)
await waitForOutputFlush()
await shutdown(abortCode)
} else {
await waitForOutputFlush()
}
// eslint-disable-next-line no-console
console.log(consoleOutput)

return { argv, code: abortCode, errors, result, consoleOutput }
}
Expand Down Expand Up @@ -484,9 +476,9 @@ ${renderCommands(commands)}

if (projectConfig) {
const customCommands = await this.getCustomCommands(log, workingDir)
const picked = pickCommand(customCommands, argv._)
command = picked.command
matchedPath = picked.matchedPath
const pickedCommand = pickCommand(customCommands, argv._)
command = pickedCommand.command
matchedPath = pickedCommand.matchedPath
}
} catch (error) {
return done(1, toGardenError(error).explain("Failed to get custom commands"))
Expand Down Expand Up @@ -635,7 +627,6 @@ ${renderCommands(commands)}
let code = 0
if (gardenErrors.length > 0) {
renderCommandErrors(logger, gardenErrors)
await waitForOutputFlush()
code = commandResult.exitCode || 1
}

Expand Down
1 change: 0 additions & 1 deletion core/src/commands/custom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ export class CustomCommandWrapper extends Command {

const res = await cli.run({
args: gardenCommand,
exitOnError: false,
cwd: garden.projectRoot,
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

import type { ExportResult } from "@opentelemetry/core"
import { type ExportResult } from "@opentelemetry/core"
import type { ReadableSpan, SpanExporter } from "@opentelemetry/sdk-trace-base"
import type { Deferred } from "../../util.js"
import { defer } from "../../util.js"
Expand Down Expand Up @@ -107,4 +107,8 @@ export class ReconfigurableExporter implements SpanExporter {
})
}
}

public hasTargetExporter() {
return !!this.targetExporter
}
}
16 changes: 13 additions & 3 deletions core/src/util/open-telemetry/tracing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ import type { OTLPExporterNodeConfigBase } from "@opentelemetry/otlp-exporter-ba
import { NoOpExporter } from "./exporters/no-op-exporter.js"

export const tracer = opentelemetry.api.trace.getTracer("garden")

export const reconfigurableExporter = new ReconfigurableExporter()
const hasOtelEnvConfiguration = !!process.env.OTEL_TRACES_EXPORTER

// Singleton we initialize either when we get the SDK the first time
// or when we call `initTracing` explicitly
Expand All @@ -39,6 +39,18 @@ export const getOtelSDK: () => opentelemetry.NodeSDK = () => {
}
}

/**
* Used to check if OTEL has been properly configured, either via the OTEL_ env vars or by
* setting a target exporter for the ReconfigurableExporter.
*
* This is needed because attempting to shut down OTEL will hang if no target exporter has been set.
*
* @returns boolean
*/
export function isOtelExporterConfigured() {
return hasOtelEnvConfiguration || reconfigurableExporter.hasTargetExporter()
}

/**
* Initializes the tracing and auto-instrumentations.
* Should be called as early as possible in the application initialization
Expand All @@ -54,8 +66,6 @@ export function initTracing(): opentelemetry.NodeSDK {
process.env.OTEL_SDK_DISABLED = "true"
}

const hasOtelEnvConfiguration = !!process.env.OTEL_TRACES_EXPORTER

otelSDK = new opentelemetry.NodeSDK({
serviceName: "garden-cli",
instrumentations: [
Expand Down
1 change: 1 addition & 0 deletions core/src/util/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ export async function shutdown(code?: number) {
// eslint-disable-next-line no-console
console.log(getDefaultProfiler().report())
}

gracefulExit(code)
}
}
Expand Down
6 changes: 3 additions & 3 deletions core/test/unit/src/cli/analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ describe("cli analytics", () => {
const command = new TestCommand()
cli.addCommand(command)

await cli.run({ args: ["test-command"], exitOnError: false })
await cli.run({ args: ["test-command"] })

expect(scope.done()).to.not.throw
})
Expand Down Expand Up @@ -95,7 +95,7 @@ describe("cli analytics", () => {
const command = new TestCommand()
cli.addCommand(command)

await cli.run({ args: ["test-command"], exitOnError: false })
await cli.run({ args: ["test-command"] })

expect(scope.done()).to.not.throw
})
Expand All @@ -110,7 +110,7 @@ describe("cli analytics", () => {

cli.addCommand(command)

await cli.run({ args: ["test-command"], exitOnError: false })
await cli.run({ args: ["test-command"] })

expect(scope.isDone()).to.equal(false)
})
Expand Down
Loading

0 comments on commit 7b96bdb

Please sign in to comment.