Skip to content

Commit

Permalink
feat(k8s): introduce new flag waitForJobs to wait for k8s jobs (#4611)
Browse files Browse the repository at this point in the history
* feat(kubernetes): wait for kubernetes jobs

* feat(k8s): introduce waitForJobs flag to control jobs behavior

* chore: update docs

* test: fix broken tests

* chore: address pr comments
  • Loading branch information
shumailxyz authored Jun 30, 2023
1 parent 0a1d369 commit 6eae3a6
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 6 deletions.
8 changes: 8 additions & 0 deletions core/src/plugins/kubernetes/kubernetes-type/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export interface KubernetesDeployActionSpec extends KubernetesTypeCommonDeploySp
defaultTarget?: KubernetesTargetResourceSpec
sync?: KubernetesDeploySyncSpec
localMode?: KubernetesLocalModeSpec
waitForJobs?: boolean
}

export type KubernetesDeployActionConfig = DeployActionConfig<"kubernetes", KubernetesDeployActionSpec>
Expand Down Expand Up @@ -84,6 +85,13 @@ export const kubernetesCommonDeploySpecKeys = () => ({
namespace: namespaceNameSchema(),
portForwards: portForwardsSchema(),
timeout: k8sDeploymentTimeoutSchema(),
// TODO-0.14: flip this to true and change default behavior to
// wait for the jobs
waitForJobs: joi
.boolean()
.optional()
.default(false)
.description("Wait until the jobs have been completed. Garden will wait for as long as `timeout`."),
})

export const kubernetesDeploySchema = () =>
Expand Down
2 changes: 2 additions & 0 deletions core/src/plugins/kubernetes/kubernetes-type/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ export const kubernetesDeploy: DeployActionHandler<"deploy", KubernetesDeployAct
resources: namespaceManifests,
log,
timeoutSec: action.getConfig("timeout"),
waitForJobs: spec.waitForJobs,
})
}

Expand Down Expand Up @@ -284,6 +285,7 @@ export const kubernetesDeploy: DeployActionHandler<"deploy", KubernetesDeployAct
resources: preparedManifests,
log,
timeoutSec: action.getConfig("timeout"),
waitForJobs: spec.waitForJobs,
})
}

Expand Down
54 changes: 48 additions & 6 deletions core/src/plugins/kubernetes/status/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

import Bluebird from "bluebird"
import { diffString } from "json-diff"
import { DeploymentError } from "../../../exceptions"
import { PluginContext } from "../../../plugin-context"
import { KubeApi } from "../api"
import { getAppNamespace } from "../namespace"
import Bluebird from "bluebird"
import { KubernetesResource, KubernetesServerResource, BaseResource, KubernetesWorkload } from "../types"
import { zip, isArray, isPlainObject, pickBy, mapValues, flatten, cloneDeep, omit, isEqual, keyBy } from "lodash"
import { KubernetesProvider, KubernetesPluginContext } from "../config"
Expand All @@ -25,6 +25,7 @@ import {
V1Service,
V1Container,
KubernetesObject,
V1Job,
} from "@kubernetes/client-node"
import dedent = require("dedent")
import { getPods, getResourceKey, hashManifest } from "../util"
Expand All @@ -51,6 +52,7 @@ export interface StatusHandlerParams<T extends BaseResource | KubernetesObject =
resource: KubernetesServerResource<T>
log: Log
resourceVersion?: number
waitForJobs?: boolean
}

interface StatusHandler<T extends BaseResource | KubernetesObject = BaseResource> {
Expand Down Expand Up @@ -111,6 +113,37 @@ const objHandlers: { [kind: string]: StatusHandler } = {

return { state: "ready", resource }
},

Job: async ({ resource, waitForJobs }: StatusHandlerParams<V1Job>) => {
if (
resource.status?.failed &&
resource.spec?.backoffLimit &&
resource.status?.failed >= resource.spec?.backoffLimit
) {
// job has failed
return { state: "unhealthy", resource }
}
if (
resource.spec?.completions &&
resource.status?.succeeded &&
resource.status?.succeeded < resource.spec.completions
) {
// job is not yet completed
return { state: "deploying", resource }
}
// job has succeeded
if (resource.status.succeeded) {
return { state: "ready", resource }
}

// wait for job only if waitForJobs is set, otherwise
// mark it as ready and proceed.
if (waitForJobs) {
return { state: "deploying", resource }
} else {
return { state: "ready", resource }
}
},
}

/**
Expand All @@ -120,14 +153,21 @@ export async function checkResourceStatuses(
api: KubeApi,
namespace: string,
manifests: KubernetesResource[],
log: Log
log: Log,
waitForJobs?: boolean
): Promise<ResourceStatus[]> {
return Bluebird.map(manifests, async (manifest) => {
return checkResourceStatus(api, namespace, manifest, log)
return checkResourceStatus(api, namespace, manifest, log, waitForJobs)
})
}

export async function checkResourceStatus(api: KubeApi, namespace: string, manifest: KubernetesResource, log: Log) {
export async function checkResourceStatus(
api: KubeApi,
namespace: string,
manifest: KubernetesResource,
log: Log,
waitForJobs?: boolean
) {
const handler = objHandlers[manifest.kind]

if (manifest.metadata?.namespace) {
Expand All @@ -150,7 +190,7 @@ export async function checkResourceStatus(api: KubeApi, namespace: string, manif

let status: ResourceStatus
if (handler) {
status = await handler({ api, namespace, resource, log, resourceVersion })
status = await handler({ api, namespace, resource, log, resourceVersion, waitForJobs })
} else {
// if there is no explicit handler to check the status, we assume there's no rollout phase to wait for
status = { state: "ready", resource: manifest }
Expand All @@ -167,6 +207,7 @@ interface WaitParams {
resources: KubernetesResource[]
log: Log
timeoutSec: number
waitForJobs?: boolean
}

/**
Expand All @@ -180,6 +221,7 @@ export async function waitForResources({
resources,
log,
timeoutSec,
waitForJobs,
}: WaitParams) {
let loops = 0
const startTime = new Date().getTime()
Expand Down Expand Up @@ -217,7 +259,7 @@ export async function waitForResources({
await sleep(2000 + 500 * loops)
loops += 1

const statuses = await checkResourceStatuses(api, namespace, Object.values(pendingResources), log)
const statuses = await checkResourceStatuses(api, namespace, Object.values(pendingResources), log, waitForJobs)

for (const status of statuses) {
const key = getResourceKey(status.resource)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ describe("configureKubernetesModule", () => {
dependencies: [],
timeout: DEFAULT_BUILD_TIMEOUT_SEC,
},
waitForJobs: false,
dependencies: [],
files: [],
manifests: [
Expand Down
13 changes: 13 additions & 0 deletions docs/reference/action-types/Deploy/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ spec:
# The maximum duration (in seconds) to wait for resources to deploy and become healthy.
timeout: 300

# Wait until the jobs have been completed. Garden will wait for as long as `timeout`.
waitForJobs: false

# Specify a default resource in the deployment to use for syncs, local mode, and for the `garden exec` command.
#
# Specify either `kind` and `name`, or a `podSelector`. The resource should be one of the resources deployed by this
Expand Down Expand Up @@ -818,6 +821,16 @@ The maximum duration (in seconds) to wait for resources to deploy and become hea
| -------- | ------- | -------- |
| `number` | `300` | No |

### `spec.waitForJobs`

[spec](#spec) > waitForJobs

Wait until the jobs have been completed. Garden will wait for as long as `timeout`.

| Type | Default | Required |
| --------- | ------- | -------- |
| `boolean` | `false` | No |

### `spec.defaultTarget`

[spec](#spec) > defaultTarget
Expand Down
11 changes: 11 additions & 0 deletions docs/reference/module-types/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ portForwards:
# The maximum duration (in seconds) to wait for resources to deploy and become healthy.
timeout: 300

# Wait until the jobs have been completed. Garden will wait for as long as `timeout`.
waitForJobs: false

# The names of any services that this service depends on at runtime, and the names of any tasks that should be
# executed before this service is deployed.
dependencies: []
Expand Down Expand Up @@ -995,6 +998,14 @@ The maximum duration (in seconds) to wait for resources to deploy and become hea
| -------- | ------- | -------- |
| `number` | `300` | No |

### `waitForJobs`

Wait until the jobs have been completed. Garden will wait for as long as `timeout`.

| Type | Default | Required |
| --------- | ------- | -------- |
| `boolean` | `false` | No |

### `dependencies[]`

The names of any services that this service depends on at runtime, and the names of any tasks that should be executed before this service is deployed.
Expand Down

0 comments on commit 6eae3a6

Please sign in to comment.