From e7af72f88ee68f5202a05d0d29f59d49fbf5674d Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Thu, 16 Jun 2022 17:22:55 +0000 Subject: [PATCH 1/7] backport of commit ca3c5856d5a59bc98b20a795b2bbf9cfc5ff6e24 --- api/tasks.go | 19 +++++++++++++------ .../taskrunner/plugin_supervisor_hook.go | 6 +++--- command/agent/job_endpoint.go | 1 + nomad/structs/csi.go | 13 +++++++++---- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 6cdb44da3a3..f2339dd6fb6 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -1032,14 +1032,17 @@ type TaskCSIPluginConfig struct { // CSIPluginType instructs Nomad on how to handle processing a plugin Type CSIPluginType `mapstructure:"type" hcl:"type,optional"` - // MountDir is the destination that nomad should mount in its CSI - // directory for the plugin. It will then expect a file called CSISocketName - // to be created by the plugin, and will provide references into - // "MountDir/CSIIntermediaryDirname/VolumeName/AllocID for mounts. - // - // Default is /csi. + // MountDir is the directory (within its container) in which the plugin creates a + // socket (called CSISocketName) for communication with Nomad. Default is /csi. MountDir string `mapstructure:"mount_dir" hcl:"mount_dir,optional"` + // StagePublishDir is the base directory (within its container) in which the plugin + // mounts volumes being staged and bind mounts volumes being published. + // e.g. staging_target_path = {StagePublishDir}/staging/{volume-id}/{usage-mode} + // e.g. target_path = {StagePublishDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} + // Default is /local/csi. + StagePublishDir string `mapstructure:"stage_publish_dir" hcl:"stage_publish_dir,optional"` + // HealthTimeout is the time after which the CSI plugin tasks will be killed // if the CSI Plugin is not healthy. HealthTimeout time.Duration `mapstructure:"health_timeout" hcl:"health_timeout,optional"` @@ -1050,6 +1053,10 @@ func (t *TaskCSIPluginConfig) Canonicalize() { t.MountDir = "/csi" } + if t.StagePublishDir == "" { + t.StagePublishDir = filepath.Join("/local", "csi") + } + if t.HealthTimeout == 0 { t.HealthTimeout = 30 * time.Second } diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 4696bc53f38..35a3878034f 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -157,12 +157,12 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, } // where the staging and per-alloc directories will be mounted volumeStagingMounts := &drivers.MountConfig{ - // TODO(tgross): add this TaskPath to the CSIPluginConfig as well - TaskPath: "/local/csi", + TaskPath: h.task.CSIPluginConfig.StagePublishDir, HostPath: h.mountPoint, Readonly: false, PropagationMode: "bidirectional", } + h.logger.Info("", "volumeStagingMounts", volumeStagingMounts) // TODO: Remove this before merge. // devices from the host devMount := &drivers.MountConfig{ TaskPath: "/dev", @@ -360,7 +360,7 @@ func (h *csiPluginSupervisorHook) registerPlugin(client csi.CSIPlugin, socketPat Options: map[string]string{ "Provider": info.Name, // vendor name "MountPoint": h.mountPoint, - "ContainerMountPoint": "/local/csi", + "ContainerMountPoint": h.task.CSIPluginConfig.StagePublishDir, }, } } diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 97a57884f9d..2c10adef423 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1263,6 +1263,7 @@ func ApiCSIPluginConfigToStructsCSIPluginConfig(apiConfig *api.TaskCSIPluginConf sc.ID = apiConfig.ID sc.Type = structs.CSIPluginType(apiConfig.Type) sc.MountDir = apiConfig.MountDir + sc.StagePublishDir = apiConfig.StagePublishDir sc.HealthTimeout = apiConfig.HealthTimeout return sc } diff --git a/nomad/structs/csi.go b/nomad/structs/csi.go index eea20b597d6..79e4ba35f9f 100644 --- a/nomad/structs/csi.go +++ b/nomad/structs/csi.go @@ -62,12 +62,17 @@ type TaskCSIPluginConfig struct { // Type instructs Nomad on how to handle processing a plugin Type CSIPluginType - // MountDir is the destination that nomad should mount in its CSI - // directory for the plugin. It will then expect a file called CSISocketName - // to be created by the plugin, and will provide references into - // "MountDir/CSIIntermediaryDirname/{VolumeName}/{AllocID} for mounts. + // MountDir is the directory (within its container) in which the plugin creates a + // socket (called CSISocketName) for communication with Nomad. Default is /csi. MountDir string + // StagePublishDir is the base directory (within its container) in which the plugin + // mounts volumes being staged and bind mount volumes being published. + // e.g. staging_target_path = {StagePublishDir}/staging/{volume-id}/{usage-mode} + // e.g. target_path = {StagePublishDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} + // Default is /local/csi. + StagePublishDir string + // HealthTimeout is the time after which the CSI plugin tasks will be killed // if the CSI Plugin is not healthy. HealthTimeout time.Duration `mapstructure:"health_timeout" hcl:"health_timeout,optional"` From 0691d1faa7b0b0821c9a77e29ebf12afd98a3217 Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Thu, 16 Jun 2022 20:38:10 +0000 Subject: [PATCH 2/7] backport of commit c8010c44987ef1b52bf8db127acb3db5612a427b --- website/content/docs/concepts/plugins/csi.mdx | 13 +++++++++---- .../content/docs/job-specification/csi_plugin.mdx | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/website/content/docs/concepts/plugins/csi.mdx b/website/content/docs/concepts/plugins/csi.mdx index fa22319593d..c572e662229 100644 --- a/website/content/docs/concepts/plugins/csi.mdx +++ b/website/content/docs/concepts/plugins/csi.mdx @@ -38,9 +38,10 @@ A CSI plugin task requires the [`csi_plugin`][csi_plugin] block: ```hcl csi_plugin { - id = "csi-hostpath" - type = "monolith" - mount_dir = "/csi" + id = "csi-hostpath" + type = "monolith" + mount_dir = "/csi" + stage_publish_dir = "/local/csi" } ``` @@ -73,7 +74,11 @@ Nomad exposes a Unix domain socket named `csi.sock` inside each CSI plugin task, and communicates over the gRPC protocol expected by the CSI specification. The `mount_dir` field tells Nomad where the plugin expects to find the socket file. The path to this socket is exposed in -the container as the `CSI_ENDPOINT` environment variable. +the container as the `CSI_ENDPOINT` environment variable. In +addition, the `stage_publish_dir` field tells Nomad where the plugin +wants to be instructed to mount volumes for staging and/or publishing. +This field is generally not required and, like `mount_dir`, only +affects the plugin container's internal view of the file system. ### Plugin Lifecycle and State diff --git a/website/content/docs/job-specification/csi_plugin.mdx b/website/content/docs/job-specification/csi_plugin.mdx index 4048943e3b1..9593a6e1e89 100644 --- a/website/content/docs/job-specification/csi_plugin.mdx +++ b/website/content/docs/job-specification/csi_plugin.mdx @@ -17,10 +17,11 @@ to claim [volumes][csi_volumes]. ```hcl csi_plugin { - id = "csi-hostpath" - type = "monolith" - mount_dir = "/csi" - health_timeout = "30s" + id = "csi-hostpath" + type = "monolith" + mount_dir = "/csi" + stage_publish_dir = "/local/csi" + health_timeout = "30s" } ``` @@ -44,6 +45,10 @@ csi_plugin { container where the plugin will expect a Unix domain socket for bidirectional communication with Nomad. +- `stage_publish_dir` `(string: )` - The base directory + path inside the container where the plugin will be instructed to + stage and publish volumes. + - `health_timeout` `(duration: )` - The duration that the plugin supervisor will wait before restarting an unhealthy CSI plugin. Must be a duration value such as `30s` or `2m`. From 9bc140650bce857fed40dc2ff5bcb156f520b969 Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Thu, 16 Jun 2022 21:14:03 +0000 Subject: [PATCH 3/7] backport of commit 1eab7f51e9a37694d7c93335ac80ba9fbb9690a9 --- client/allocrunner/taskrunner/plugin_supervisor_hook.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 35a3878034f..b182f4f9d05 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -81,7 +81,7 @@ var _ interfaces.TaskStopHook = &csiPluginSupervisorHook{} // Per-allocation directories of unix domain sockets used to communicate // with the CSI plugin. Nomad creates the directory and the plugin creates // the socket file. This directory is bind-mounted to the -// csi_plugin.mount_config dir in the plugin task. +// csi_plugin.mount_dir in the plugin task. // // {plugin-type}/{plugin-id}/ // staging/ From 0eee542a39cdf024059f35513b1ae9f4196d2a19 Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Thu, 16 Jun 2022 22:00:01 +0000 Subject: [PATCH 4/7] backport of commit 3448143d065bddac239a41591be6f4c31776c89d --- .../allocrunner/taskrunner/plugin_supervisor_hook.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index b182f4f9d05..7346b4e02fc 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -103,6 +103,16 @@ func newCSIPluginSupervisorHook(config *csiPluginSupervisorHookConfig) *csiPlugi socketMountPoint := filepath.Join(config.clientStateDirPath, "csi", "plugins", config.runner.Alloc().ID) + // In v1.3.0, Nomad started instructing CSI plugins to stage and publish + // within /csi/local. Plugins deployed after the introduction of + // StagePublishDir default to StagePublishDir = /csi/local. However, + // plugins deployed between v1.3.0 and the introduction of + // StagePublishDir have StagePublishDir = "". Default to /csi/local here + // to avoid breaking plugins that aren't redeployed. + if task.CSIPluginConfig.StagePublishDir == "" { + task.CSIPluginConfig.StagePublishDir = filepath.Join("/local", "csi") + } + if task.CSIPluginConfig.HealthTimeout == 0 { task.CSIPluginConfig.HealthTimeout = 30 * time.Second } From 1e596b9d9a39d781cb0d781d88816cb23de79228 Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Mon, 25 Jul 2022 20:00:31 +0000 Subject: [PATCH 5/7] backport of commit 27e95c1830fd6b37383370fcf893062daa2d12a5 --- client/allocrunner/taskrunner/plugin_supervisor_hook.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 7346b4e02fc..fabe0435fb0 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -103,16 +103,16 @@ func newCSIPluginSupervisorHook(config *csiPluginSupervisorHookConfig) *csiPlugi socketMountPoint := filepath.Join(config.clientStateDirPath, "csi", "plugins", config.runner.Alloc().ID) - // In v1.3.0, Nomad started instructing CSI plugins to stage and publish + // In v1.3.0, Nomad started instructing CSI plugins to stage and publish // within /csi/local. Plugins deployed after the introduction of - // StagePublishDir default to StagePublishDir = /csi/local. However, - // plugins deployed between v1.3.0 and the introduction of + // StagePublishDir default to StagePublishDir = /csi/local. However, + // plugins deployed between v1.3.0 and the introduction of // StagePublishDir have StagePublishDir = "". Default to /csi/local here // to avoid breaking plugins that aren't redeployed. if task.CSIPluginConfig.StagePublishDir == "" { task.CSIPluginConfig.StagePublishDir = filepath.Join("/local", "csi") } - + if task.CSIPluginConfig.HealthTimeout == 0 { task.CSIPluginConfig.HealthTimeout = 30 * time.Second } From 9b1b3e59b266e8af2491e6669380890bafbb6cac Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Mon, 1 Aug 2022 23:00:57 +0000 Subject: [PATCH 6/7] backport of commit b5d29a3f95d223b1ddb4bed793251f7dc6ea4e70 --- api/tasks.go | 12 +++++------ .../taskrunner/plugin_supervisor_hook.go | 15 +++++++------- command/agent/job_endpoint.go | 2 +- nomad/structs/csi.go | 8 ++++---- website/content/docs/concepts/plugins/csi.mdx | 18 ++++++++--------- .../docs/job-specification/csi_plugin.mdx | 20 ++++++++++--------- 6 files changed, 38 insertions(+), 37 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index f2339dd6fb6..98957ecc397 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -1036,12 +1036,12 @@ type TaskCSIPluginConfig struct { // socket (called CSISocketName) for communication with Nomad. Default is /csi. MountDir string `mapstructure:"mount_dir" hcl:"mount_dir,optional"` - // StagePublishDir is the base directory (within its container) in which the plugin + // StagePublishBaseDir is the base directory (within its container) in which the plugin // mounts volumes being staged and bind mounts volumes being published. - // e.g. staging_target_path = {StagePublishDir}/staging/{volume-id}/{usage-mode} - // e.g. target_path = {StagePublishDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} + // e.g. staging_target_path = {StagePublishBaseDir}/staging/{volume-id}/{usage-mode} + // e.g. target_path = {StagePublishBaseDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} // Default is /local/csi. - StagePublishDir string `mapstructure:"stage_publish_dir" hcl:"stage_publish_dir,optional"` + StagePublishBaseDir string `mapstructure:"stage_publish_base_dir" hcl:"stage_publish_base_dir,optional"` // HealthTimeout is the time after which the CSI plugin tasks will be killed // if the CSI Plugin is not healthy. @@ -1053,8 +1053,8 @@ func (t *TaskCSIPluginConfig) Canonicalize() { t.MountDir = "/csi" } - if t.StagePublishDir == "" { - t.StagePublishDir = filepath.Join("/local", "csi") + if t.StagePublishBaseDir == "" { + t.StagePublishBaseDir = filepath.Join("/local", "csi") } if t.HealthTimeout == 0 { diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index fabe0435fb0..03e52e66b09 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -104,13 +104,13 @@ func newCSIPluginSupervisorHook(config *csiPluginSupervisorHookConfig) *csiPlugi "plugins", config.runner.Alloc().ID) // In v1.3.0, Nomad started instructing CSI plugins to stage and publish - // within /csi/local. Plugins deployed after the introduction of - // StagePublishDir default to StagePublishDir = /csi/local. However, + // within /local/csi. Plugins deployed after the introduction of + // StagePublishBaseDir default to StagePublishBaseDir = /local/csi. However, // plugins deployed between v1.3.0 and the introduction of - // StagePublishDir have StagePublishDir = "". Default to /csi/local here + // StagePublishBaseDir have StagePublishBaseDir = "". Default to /local/csi here // to avoid breaking plugins that aren't redeployed. - if task.CSIPluginConfig.StagePublishDir == "" { - task.CSIPluginConfig.StagePublishDir = filepath.Join("/local", "csi") + if task.CSIPluginConfig.StagePublishBaseDir == "" { + task.CSIPluginConfig.StagePublishBaseDir = filepath.Join("/local", "csi") } if task.CSIPluginConfig.HealthTimeout == 0 { @@ -167,12 +167,11 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, } // where the staging and per-alloc directories will be mounted volumeStagingMounts := &drivers.MountConfig{ - TaskPath: h.task.CSIPluginConfig.StagePublishDir, + TaskPath: h.task.CSIPluginConfig.StagePublishBaseDir, HostPath: h.mountPoint, Readonly: false, PropagationMode: "bidirectional", } - h.logger.Info("", "volumeStagingMounts", volumeStagingMounts) // TODO: Remove this before merge. // devices from the host devMount := &drivers.MountConfig{ TaskPath: "/dev", @@ -370,7 +369,7 @@ func (h *csiPluginSupervisorHook) registerPlugin(client csi.CSIPlugin, socketPat Options: map[string]string{ "Provider": info.Name, // vendor name "MountPoint": h.mountPoint, - "ContainerMountPoint": h.task.CSIPluginConfig.StagePublishDir, + "ContainerMountPoint": h.task.CSIPluginConfig.StagePublishBaseDir, }, } } diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 2c10adef423..4f72f75e6fd 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1263,7 +1263,7 @@ func ApiCSIPluginConfigToStructsCSIPluginConfig(apiConfig *api.TaskCSIPluginConf sc.ID = apiConfig.ID sc.Type = structs.CSIPluginType(apiConfig.Type) sc.MountDir = apiConfig.MountDir - sc.StagePublishDir = apiConfig.StagePublishDir + sc.StagePublishBaseDir = apiConfig.StagePublishBaseDir sc.HealthTimeout = apiConfig.HealthTimeout return sc } diff --git a/nomad/structs/csi.go b/nomad/structs/csi.go index 79e4ba35f9f..75601706b31 100644 --- a/nomad/structs/csi.go +++ b/nomad/structs/csi.go @@ -66,12 +66,12 @@ type TaskCSIPluginConfig struct { // socket (called CSISocketName) for communication with Nomad. Default is /csi. MountDir string - // StagePublishDir is the base directory (within its container) in which the plugin + // StagePublishBaseDir is the base directory (within its container) in which the plugin // mounts volumes being staged and bind mount volumes being published. - // e.g. staging_target_path = {StagePublishDir}/staging/{volume-id}/{usage-mode} - // e.g. target_path = {StagePublishDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} + // e.g. staging_target_path = {StagePublishBaseDir}/staging/{volume-id}/{usage-mode} + // e.g. target_path = {StagePublishBaseDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} // Default is /local/csi. - StagePublishDir string + StagePublishBaseDir string // HealthTimeout is the time after which the CSI plugin tasks will be killed // if the CSI Plugin is not healthy. diff --git a/website/content/docs/concepts/plugins/csi.mdx b/website/content/docs/concepts/plugins/csi.mdx index c572e662229..f711600fe96 100644 --- a/website/content/docs/concepts/plugins/csi.mdx +++ b/website/content/docs/concepts/plugins/csi.mdx @@ -38,10 +38,10 @@ A CSI plugin task requires the [`csi_plugin`][csi_plugin] block: ```hcl csi_plugin { - id = "csi-hostpath" - type = "monolith" - mount_dir = "/csi" - stage_publish_dir = "/local/csi" + id = "csi-hostpath" + type = "monolith" + mount_dir = "/csi" + stage_publish_base_dir = "/local/csi" } ``` @@ -74,11 +74,11 @@ Nomad exposes a Unix domain socket named `csi.sock` inside each CSI plugin task, and communicates over the gRPC protocol expected by the CSI specification. The `mount_dir` field tells Nomad where the plugin expects to find the socket file. The path to this socket is exposed in -the container as the `CSI_ENDPOINT` environment variable. In -addition, the `stage_publish_dir` field tells Nomad where the plugin -wants to be instructed to mount volumes for staging and/or publishing. -This field is generally not required and, like `mount_dir`, only -affects the plugin container's internal view of the file system. +the container as the `CSI_ENDPOINT` environment variable. + +Some plugins also require the `stage_publish_base_dir` field, which +tells Nomad where to instruct the plugin to mount volumes for staging +and/or publishing. ### Plugin Lifecycle and State diff --git a/website/content/docs/job-specification/csi_plugin.mdx b/website/content/docs/job-specification/csi_plugin.mdx index 9593a6e1e89..04a314a634d 100644 --- a/website/content/docs/job-specification/csi_plugin.mdx +++ b/website/content/docs/job-specification/csi_plugin.mdx @@ -17,11 +17,11 @@ to claim [volumes][csi_volumes]. ```hcl csi_plugin { - id = "csi-hostpath" - type = "monolith" - mount_dir = "/csi" - stage_publish_dir = "/local/csi" - health_timeout = "30s" + id = "csi-hostpath" + type = "monolith" + mount_dir = "/csi" + stage_publish__base_dir = "/local/csi" + health_timeout = "30s" } ``` @@ -41,13 +41,15 @@ csi_plugin { `node` at the same time, and these are called `monolith` plugins. Refer to your CSI plugin's documentation. -- `mount_dir` `(string: )` - The directory path inside the +- `mount_dir` `(string: )` - The directory path inside the container where the plugin will expect a Unix domain socket for - bidirectional communication with Nomad. + bidirectional communication with Nomad. This field is typically not + required. Refer to your CSI plugin's documentation for details. -- `stage_publish_dir` `(string: )` - The base directory +- `stage_publish_base_dir` `(string: )` - The base directory path inside the container where the plugin will be instructed to - stage and publish volumes. + stage and publish volumes. This field is typically not required. + Refer to your CSI plugin's documentation for details. - `health_timeout` `(duration: )` - The duration that the plugin supervisor will wait before restarting an unhealthy From 94c97c6532347957c5d9cc7d7cf7956abd2f5e40 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 2 Aug 2022 13:02:11 +0000 Subject: [PATCH 7/7] backport of commit 64d48fc3644da6af3908eb0a98c39a0063280bff --- .changelog/13919.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/13919.txt diff --git a/.changelog/13919.txt b/.changelog/13919.txt new file mode 100644 index 00000000000..559d948d863 --- /dev/null +++ b/.changelog/13919.txt @@ -0,0 +1,3 @@ +```release-note:improvement +csi: Add `stage_publish_base_dir` field to `csi_plugin` block to support plugins that require a specific staging/publishing directory for mounts +```