Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(k8s): introduce new flag waitForJobs to wait for k8 jobs #4611

Merged
merged 5 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions core/src/plugins/kubernetes/kubernetes-type/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export interface KubernetesDeployActionSpec extends KubernetesTypeCommonDeploySp
defaultTarget?: KubernetesTargetResourceSpec
sync?: KubernetesDeploySyncSpec
localMode?: KubernetesLocalModeSpec
waitForJobs?: boolean
}

export type KubernetesDeployActionConfig = DeployActionConfig<"kubernetes", KubernetesDeployActionSpec>
Expand Down Expand Up @@ -84,6 +85,13 @@ export const kubernetesCommonDeploySpecKeys = () => ({
namespace: namespaceNameSchema(),
portForwards: portForwardsSchema(),
timeout: k8sDeploymentTimeoutSchema(),
// TODO-0.14: flip this to true and change default behavior to
// wait for the jobs
waitForJobs: joi
shumailxyz marked this conversation as resolved.
Show resolved Hide resolved
.boolean()
.optional()
.default(false)
.description("Wait until the jobs have been completed. Garden will wait for as long as `timeout`."),
})

export const kubernetesDeploySchema = () =>
Expand Down
2 changes: 2 additions & 0 deletions core/src/plugins/kubernetes/kubernetes-type/handlers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ export const kubernetesDeploy: DeployActionHandler<"deploy", KubernetesDeployAct
resources: namespaceManifests,
log,
timeoutSec: action.getConfig("timeout"),
waitForJobs: spec.waitForJobs,
})
}

Expand Down Expand Up @@ -284,6 +285,7 @@ export const kubernetesDeploy: DeployActionHandler<"deploy", KubernetesDeployAct
resources: preparedManifests,
log,
timeoutSec: action.getConfig("timeout"),
waitForJobs: spec.waitForJobs,
})
}

Expand Down
54 changes: 48 additions & 6 deletions core/src/plugins/kubernetes/status/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/

import Bluebird from "bluebird"
import { diffString } from "json-diff"
import { DeploymentError } from "../../../exceptions"
import { PluginContext } from "../../../plugin-context"
import { KubeApi } from "../api"
import { getAppNamespace } from "../namespace"
import Bluebird from "bluebird"
import { KubernetesResource, KubernetesServerResource, BaseResource, KubernetesWorkload } from "../types"
import { zip, isArray, isPlainObject, pickBy, mapValues, flatten, cloneDeep, omit, isEqual, keyBy } from "lodash"
import { KubernetesProvider, KubernetesPluginContext } from "../config"
Expand All @@ -25,6 +25,7 @@ import {
V1Service,
V1Container,
KubernetesObject,
V1Job,
} from "@kubernetes/client-node"
import dedent = require("dedent")
import { getPods, getResourceKey, hashManifest } from "../util"
Expand All @@ -51,6 +52,7 @@ export interface StatusHandlerParams<T extends BaseResource | KubernetesObject =
resource: KubernetesServerResource<T>
log: Log
resourceVersion?: number
waitForJobs?: boolean
}

interface StatusHandler<T extends BaseResource | KubernetesObject = BaseResource> {
Expand Down Expand Up @@ -111,6 +113,37 @@ const objHandlers: { [kind: string]: StatusHandler } = {

return { state: "ready", resource }
},

Job: async ({ resource, waitForJobs }: StatusHandlerParams<V1Job>) => {
if (
resource.status?.failed &&
resource.spec?.backoffLimit &&
resource.status?.failed >= resource.spec?.backoffLimit
) {
// job has failed
return { state: "unhealthy", resource }
}
if (
resource.spec?.completions &&
resource.status?.succeeded &&
resource.status?.succeeded < resource.spec.completions
) {
// job is not yet completed
return { state: "deploying", resource }
}
// job has succeeded
if (resource.status.succeeded) {
return { state: "ready", resource }
}

// wait for job only if waitForJobs is set, otherwise
// mark it as ready and proceed.
if (waitForJobs) {
return { state: "deploying", resource }
} else {
return { state: "ready", resource }
}
},
}

/**
Expand All @@ -120,14 +153,21 @@ export async function checkResourceStatuses(
api: KubeApi,
namespace: string,
manifests: KubernetesResource[],
log: Log
log: Log,
waitForJobs?: boolean
): Promise<ResourceStatus[]> {
return Bluebird.map(manifests, async (manifest) => {
return checkResourceStatus(api, namespace, manifest, log)
return checkResourceStatus(api, namespace, manifest, log, waitForJobs)
})
}

export async function checkResourceStatus(api: KubeApi, namespace: string, manifest: KubernetesResource, log: Log) {
export async function checkResourceStatus(
api: KubeApi,
namespace: string,
manifest: KubernetesResource,
log: Log,
waitForJobs?: boolean
) {
const handler = objHandlers[manifest.kind]

if (manifest.metadata?.namespace) {
Expand All @@ -150,7 +190,7 @@ export async function checkResourceStatus(api: KubeApi, namespace: string, manif

let status: ResourceStatus
if (handler) {
status = await handler({ api, namespace, resource, log, resourceVersion })
status = await handler({ api, namespace, resource, log, resourceVersion, waitForJobs })
} else {
// if there is no explicit handler to check the status, we assume there's no rollout phase to wait for
status = { state: "ready", resource: manifest }
Expand All @@ -167,6 +207,7 @@ interface WaitParams {
resources: KubernetesResource[]
log: Log
timeoutSec: number
waitForJobs?: boolean
}

/**
Expand All @@ -180,6 +221,7 @@ export async function waitForResources({
resources,
log,
timeoutSec,
waitForJobs,
}: WaitParams) {
let loops = 0
const startTime = new Date().getTime()
Expand Down Expand Up @@ -217,7 +259,7 @@ export async function waitForResources({
await sleep(2000 + 500 * loops)
loops += 1

const statuses = await checkResourceStatuses(api, namespace, Object.values(pendingResources), log)
const statuses = await checkResourceStatuses(api, namespace, Object.values(pendingResources), log, waitForJobs)

for (const status of statuses) {
const key = getResourceKey(status.resource)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ describe("configureKubernetesModule", () => {
dependencies: [],
timeout: DEFAULT_BUILD_TIMEOUT_SEC,
},
waitForJobs: false,
dependencies: [],
files: [],
manifests: [
Expand Down
13 changes: 13 additions & 0 deletions docs/reference/action-types/Deploy/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ spec:
# The maximum duration (in seconds) to wait for resources to deploy and become healthy.
timeout: 300

# Wait until the jobs have been completed. Garden will wait for as long as `timeout`.
waitForJobs: false

# Specify a default resource in the deployment to use for syncs, local mode, and for the `garden exec` command.
#
# Specify either `kind` and `name`, or a `podSelector`. The resource should be one of the resources deployed by this
Expand Down Expand Up @@ -818,6 +821,16 @@ The maximum duration (in seconds) to wait for resources to deploy and become hea
| -------- | ------- | -------- |
| `number` | `300` | No |

### `spec.waitForJobs`

[spec](#spec) > waitForJobs

Wait until the jobs have been completed. Garden will wait for as long as `timeout`.

| Type | Default | Required |
| --------- | ------- | -------- |
| `boolean` | `false` | No |

### `spec.defaultTarget`

[spec](#spec) > defaultTarget
Expand Down
11 changes: 11 additions & 0 deletions docs/reference/module-types/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ portForwards:
# The maximum duration (in seconds) to wait for resources to deploy and become healthy.
timeout: 300

# Wait until the jobs have been completed. Garden will wait for as long as `timeout`.
waitForJobs: false

# The names of any services that this service depends on at runtime, and the names of any tasks that should be
# executed before this service is deployed.
dependencies: []
Expand Down Expand Up @@ -995,6 +998,14 @@ The maximum duration (in seconds) to wait for resources to deploy and become hea
| -------- | ------- | -------- |
| `number` | `300` | No |

### `waitForJobs`

Wait until the jobs have been completed. Garden will wait for as long as `timeout`.

| Type | Default | Required |
| --------- | ------- | -------- |
| `boolean` | `false` | No |

### `dependencies[]`

The names of any services that this service depends on at runtime, and the names of any tasks that should be executed before this service is deployed.
Expand Down