Skip to content

Commit

Permalink
Merge #771
Browse files Browse the repository at this point in the history
771: feat(nexus): implement set ANA state for an NVMf-published Nexus r=jonathan-teh a=jonathan-teh

Implement set_ana_state in nexus_bdev::Nexus and expose that over
gRPC and the client CLI.

Fixes CAS-756, #758

Co-authored-by: Jonathan Teh <[email protected]>
  • Loading branch information
mayastor-bors and jonathan-teh committed Mar 11, 2021
2 parents 8772a94 + 1a924d3 commit 41e988f
Show file tree
Hide file tree
Showing 12 changed files with 442 additions and 22 deletions.
44 changes: 38 additions & 6 deletions doc/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ It will also need at least 512 2 MB Hugepages configured.

> Learn more about hugepages: [parts 1][hugepages-lwn-one], [2][hugepages-lwn-two],
> [3][hugepages-lwn-three], [4][hugepages-lwn-four], [5][hugepages-lwn-five].
In NixOS:

```nix
Expand Down Expand Up @@ -78,7 +78,7 @@ In order to use the full feature set of Mayastor, some or all of the following c
+ `nvme_loop`: NVMe Loop Device support

To load these on NixOS:

```nix
# /etc/nixos/configuration.nix
boot.kernelModules = [
Expand All @@ -91,7 +91,40 @@ In order to use the full feature set of Mayastor, some or all of the following c
```bash
modprobe nbd nvmet nvmet_rdma nvme_fabrics nvme_tcp nvme_rdma nvme_loop
```
* An NVMe device. (Typically via PCI-E through an standard slot or [M.2][m-dot-2] port)
* For Asymmetric Namespace Access (ANA) support (early preview), the following kernel build configuration enabled:
+ `CONFIG_NVME_MULTIPATH`: enables support for multipath access to NVMe subsystems

This is usually already enabled in distributions kernels, at least for RHEL/CentOS 8.2, Ubuntu 20.04 LTS, and SUSE Linux Enterprise 15.2.

On some distributions such as RHEL 8, the feature must be enabled manually:

```sh
# /etc/modprobe.d/nvme-multipath
options nvme_core multipath=1
```

followed by reloading the `nvme-core` module or rebooting.

To build this on NixOS:

```nix
# /etc/nixos/configuration.nix
boot.kernelPackages = pkgs.linuxPackages;
boot.kernelPatches = [ {
name = "nvme-multipath";
patch = null;
extraConfig = ''
NVME_MULTIPATH y
'';
} ];
```

followed by:

```sh
sudo nixos-rebuild boot
```
* An NVMe device. (Typically via PCI-E through a standard slot or [M.2][m-dot-2] port)
* A version of [`nix`][nix-install] configured as in the [build guide.][doc-build]

## Running binaries directly
Expand Down Expand Up @@ -158,7 +191,7 @@ Why these parameters?
- `--privileged` to allow controlling memory policies.

> **TODO:** We can use [control groups][control-groups] for this!
- `-v /dev:/dev:rw` is needed to get access to any raw device you might want to consume as local
- `-v /dev:/dev:rw` is needed to get access to any raw device you might want to consume as local
storage and huge pages
- `-v /dev/shm:/dev/shm:rw` is needed as for a circular buffer that can trace any IO operations
as they happen
Expand All @@ -184,7 +217,7 @@ nixpkgs.overlays = [
];
systemd.services.mayastor = {
wantedBy = [ "multi-user.target" ];
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
description = "A cloud native declarative data plane.";
serviceConfig = {
Expand Down Expand Up @@ -273,4 +306,3 @@ production Mayastor deployment and operation instructions.
[lxd]: https://linuxcontainers.org/
[libvirtd]: https://libvirt.org/index.html
[terraform-readme]: ./terraform/readme.adoc
[aarch64-branch]:
55 changes: 55 additions & 0 deletions mayastor/src/bdev/nexus/nexus_bdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::{

use futures::{channel::oneshot, future::join_all};
use nix::errno::Errno;
use rpc::mayastor::NvmeAnaState;
use serde::Serialize;
use snafu::{ResultExt, Snafu};
use tonic::{Code, Status};
Expand Down Expand Up @@ -106,6 +107,8 @@ pub enum Error {
AlreadyShared { name: String },
#[snafu(display("The nexus {} has not been shared", name))]
NotShared { name: String },
#[snafu(display("The nexus {} has not been shared over NVMf", name))]
NotSharedNvmf { name: String },
#[snafu(display("Failed to share nexus over NBD {}", name))]
ShareNbdNexus { source: NbdError, name: String },
#[snafu(display("Failed to share iscsi nexus {}", name))]
Expand Down Expand Up @@ -233,6 +236,8 @@ pub enum Error {
},
#[snafu(display("Invalid ShareProtocol value {}", sp_value))]
InvalidShareProtocol { sp_value: i32 },
#[snafu(display("Invalid NvmeAnaState value {}", ana_value))]
InvalidNvmeAnaState { ana_value: i32 },
#[snafu(display("Failed to create nexus {}", name))]
NexusCreate { name: String },
#[snafu(display("Failed to destroy nexus {}", name))]
Expand All @@ -252,6 +257,16 @@ pub enum Error {
FailedGetHandle,
#[snafu(display("Failed to create snapshot on nexus {}", name))]
FailedCreateSnapshot { name: String, source: CoreError },
#[snafu(display("NVMf subsystem error: {}", e))]
SubsysNvmfError { e: String },
}

impl From<subsys::NvmfError> for Error {
fn from(error: subsys::NvmfError) -> Self {
Error::SubsysNvmfError {
e: error.to_string(),
}
}
}

impl From<Error> for tonic::Status {
Expand All @@ -272,6 +287,9 @@ impl From<Error> for tonic::Status {
Error::NotShared {
..
} => Status::invalid_argument(e.to_string()),
Error::NotSharedNvmf {
..
} => Status::invalid_argument(e.to_string()),
Error::CreateChild {
..
} => Status::invalid_argument(e.to_string()),
Expand Down Expand Up @@ -630,6 +648,43 @@ impl Nexus {
Ok(())
}

/// get ANA state of the NVMe subsystem
pub async fn get_ana_state(&self) -> Result<NvmeAnaState, Error> {
if let Some(Protocol::Nvmf) = self.shared() {
if let Some(subsystem) = NvmfSubsystem::nqn_lookup(&self.name) {
let ana_state = subsystem.get_ana_state().await? as i32;
return NvmeAnaState::from_i32(ana_state).ok_or({
Error::InvalidNvmeAnaState {
ana_value: ana_state,
}
});
}
}

Err(Error::NotSharedNvmf {
name: self.name.clone(),
})
}

/// set ANA state of the NVMe subsystem
pub async fn set_ana_state(
&self,
ana_state: NvmeAnaState,
) -> Result<(), Error> {
if let Some(Protocol::Nvmf) = self.shared() {
if let Some(subsystem) = NvmfSubsystem::nqn_lookup(&self.name) {
subsystem.pause().await?;
let res = subsystem.set_ana_state(ana_state as u32).await;
subsystem.resume().await?;
return Ok(res?);
}
}

Err(Error::NotSharedNvmf {
name: self.name.clone(),
})
}

/// register the bdev with SPDK and set the callbacks for io channel
/// creation. Once this function is called, the device is visible and can
/// be used for IO.
Expand Down
91 changes: 91 additions & 0 deletions mayastor/src/bin/mayastor-client/nexus_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,24 @@ pub fn subcommands<'a, 'b>() -> App<'a, 'b> {
.help("uuid for the nexus"),
);

let ana_state = SubCommand::with_name("ana_state")
.about("get or set the NVMe ANA state of the nexus")
.arg(
Arg::with_name("uuid")
.required(true)
.index(1)
.help("uuid for the nexus"),
)
.arg(
Arg::with_name("state")
.required(false)
.index(2)
.possible_value("optimized")
.possible_value("non_optimized")
.possible_value("inaccessible")
.help("NVMe ANA state of the nexus"),
);

let add = SubCommand::with_name("add")
.about("add a child")
.arg(
Expand Down Expand Up @@ -122,6 +140,7 @@ pub fn subcommands<'a, 'b>() -> App<'a, 'b> {
.subcommand(add)
.subcommand(remove)
.subcommand(unpublish)
.subcommand(ana_state)
.subcommand(list)
.subcommand(children)
.subcommand(nexus_child_cli::subcommands())
Expand All @@ -138,6 +157,7 @@ pub async fn handler(
("children", Some(args)) => nexus_children(ctx, &args).await,
("publish", Some(args)) => nexus_publish(ctx, &args).await,
("unpublish", Some(args)) => nexus_unpublish(ctx, &args).await,
("ana_state", Some(args)) => nexus_nvme_ana_state(ctx, &args).await,
("add", Some(args)) => nexus_add(ctx, &args).await,
("remove", Some(args)) => nexus_remove(ctx, &args).await,
("child", Some(args)) => nexus_child_cli::handler(ctx, args).await,
Expand Down Expand Up @@ -321,6 +341,66 @@ async fn nexus_unpublish(
Ok(())
}

async fn nexus_nvme_ana_state(
ctx: Context,
matches: &ArgMatches<'_>,
) -> Result<(), Status> {
let uuid = matches.value_of("uuid").unwrap().to_string();
let ana_state = matches.value_of("state").unwrap_or("").to_string();
if ana_state.is_empty() {
nexus_get_nvme_ana_state(ctx, uuid).await
} else {
nexus_set_nvme_ana_state(ctx, uuid, ana_state).await
}
}

async fn nexus_get_nvme_ana_state(
mut ctx: Context,
uuid: String,
) -> Result<(), Status> {
ctx.v2(&format!("Getting NVMe ANA state for nexus {}", uuid));
let resp = ctx
.client
.get_nvme_ana_state(rpc::GetNvmeAnaStateRequest {
uuid: uuid.clone(),
})
.await?;
ctx.v1(ana_state_idx_to_str(resp.get_ref().ana_state));
Ok(())
}

async fn nexus_set_nvme_ana_state(
mut ctx: Context,
uuid: String,
ana_state_str: String,
) -> Result<(), Status> {
let ana_state: rpc::NvmeAnaState = match ana_state_str.parse() {
Ok(a) => a,
_ => {
return Err(Status::new(
Code::Internal,
"Invalid value of NVMe ANA state".to_owned(),
));
}
};

ctx.v2(&format!(
"Setting NVMe ANA state for nexus {} to {:?}",
uuid, ana_state
));
ctx.client
.set_nvme_ana_state(rpc::SetNvmeAnaStateRequest {
uuid: uuid.clone(),
ana_state: ana_state.into(),
})
.await?;
ctx.v1(&format!(
"Set NVMe ANA state for nexus {} to {:?}",
uuid, ana_state
));
Ok(())
}

async fn nexus_add(
mut ctx: Context,
matches: &ArgMatches<'_>,
Expand Down Expand Up @@ -363,6 +443,17 @@ async fn nexus_remove(
Ok(())
}

fn ana_state_idx_to_str(idx: i32) -> &'static str {
match rpc::NvmeAnaState::from_i32(idx).unwrap() {
rpc::NvmeAnaState::NvmeAnaInvalidState => "invalid",
rpc::NvmeAnaState::NvmeAnaOptimizedState => "optimized",
rpc::NvmeAnaState::NvmeAnaNonOptimizedState => "non_optimized",
rpc::NvmeAnaState::NvmeAnaInaccessibleState => "inaccessible",
rpc::NvmeAnaState::NvmeAnaPersistentLossState => "persistent_loss",
rpc::NvmeAnaState::NvmeAnaChangeState => "change",
}
}

fn nexus_state_to_str(idx: i32) -> &'static str {
match rpc::NexusState::from_i32(idx).unwrap() {
rpc::NexusState::NexusUnknown => "unknown",
Expand Down
21 changes: 21 additions & 0 deletions mayastor/src/bin/mayastor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,33 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let free_pages: u32 = sysfs::parse_value(&hugepage_path, "free_hugepages")?;
let nr_pages: u32 = sysfs::parse_value(&hugepage_path, "nr_hugepages")?;
let uring_supported = uring::kernel_support();
let nvme_core_path = Path::new("/sys/module/nvme_core/parameters");
let nvme_mp: String =
match sysfs::parse_value::<String>(&nvme_core_path, "multipath") {
Ok(s) => match s.as_str() {
"Y" => "yes".to_string(),
"N" => "disabled".to_string(),
u => format!("unknown value {}", u),
},
Err(e) => {
if e.kind() == std::io::ErrorKind::NotFound {
if nvme_core_path.exists() {
"not built".to_string()
} else {
"nvme not loaded".to_string()
}
} else {
format!("unknown error: {}", e)
}
}
};

info!("Starting Mayastor ..");
info!(
"kernel io_uring support: {}",
if uring_supported { "yes" } else { "no" }
);
info!("kernel nvme initiator multipath support: {}", nvme_mp);
info!("free_pages: {} nr_pages: {}", free_pages, nr_pages);

let grpc_endpoint = grpc::endpoint(args.grpc_endpoint.clone());
Expand Down
46 changes: 46 additions & 0 deletions mayastor/src/grpc/mayastor_grpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,52 @@ impl mayastor_server::Mayastor for MayastorSvc {
.await
}

#[instrument(level = "debug", err)]
async fn get_nvme_ana_state(
&self,
request: Request<GetNvmeAnaStateRequest>,
) -> GrpcResult<GetNvmeAnaStateReply> {
let args = request.into_inner();
let uuid = args.uuid.clone();
debug!("Getting NVMe ANA state for nexus {} ...", uuid);

let ana_state = locally! { async move {
nexus_lookup(&args.uuid)?.get_ana_state().await
}};

info!("Got nexus {} NVMe ANA state {:?}", uuid, ana_state);
Ok(Response::new(GetNvmeAnaStateReply {
ana_state: ana_state as i32,
}))
}

#[instrument(level = "debug", err)]
async fn set_nvme_ana_state(
&self,
request: Request<SetNvmeAnaStateRequest>,
) -> GrpcResult<Null> {
let args = request.into_inner();
let uuid = args.uuid.clone();
debug!("Setting NVMe ANA state for nexus {} ...", uuid);

let ana_state = match NvmeAnaState::from_i32(args.ana_state) {
Some(ana_state) => ana_state,
None => {
return Err(nexus_bdev::Error::InvalidNvmeAnaState {
ana_value: args.ana_state as i32,
}
.into());
}
};

locally! { async move {
nexus_lookup(&args.uuid)?.set_ana_state(ana_state).await
}};

info!("Set nexus {} NVMe ANA state {:?}", uuid, ana_state);
Ok(Response::new(Null {}))
}

#[instrument(level = "debug", err)]
async fn child_operation(
&self,
Expand Down
Loading

0 comments on commit 41e988f

Please sign in to comment.