From 48145b62563a9ae1ad631d6b576c6b9a798fcbec Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 22 Apr 2021 17:04:07 +0900 Subject: [PATCH 1/7] nvme: fix controller ioctl through ns_head In multipath case, we should consider namespace attachment with controllers in a subsystem when we find out the live controller for the namespace. This patch manually reverted the commit 3557a4409701 ("nvme: don't bother to look up a namespace for controller ioctls") with few more updates to nvme_ns_head_chr_ioctl which has been newly updated. Fixes: 3557a4409701 ("nvme: don't bother to look up a namespace for controller ioctls") Cc: Christoph Hellwig Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 22 ------------- drivers/nvme/host/ioctl.c | 65 ++++++++++++++++++++++++--------------- drivers/nvme/host/nvme.h | 1 - 3 files changed, 41 insertions(+), 47 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b6f7815fa2390..c1c196459d798 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1999,28 +1999,6 @@ static const struct block_device_operations nvme_bdev_ops = { .pr_ops = &nvme_pr_ops, }; -#ifdef CONFIG_NVME_MULTIPATH -struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys) -{ - struct nvme_ctrl *ctrl; - int ret; - - ret = mutex_lock_killable(&nvme_subsystems_lock); - if (ret) - return ERR_PTR(ret); - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { - if (ctrl->state == NVME_CTRL_LIVE) - goto found; - } - mutex_unlock(&nvme_subsystems_lock); - return ERR_PTR(-EWOULDBLOCK); -found: - nvme_get_ctrl(ctrl); - mutex_unlock(&nvme_subsystems_lock); - return ctrl; -} -#endif /* CONFIG_NVME_MULTIPATH */ - static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) { unsigned long timeout = diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 502f8e4a2a1f0..9557ead02de10 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -370,41 +370,45 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } #ifdef CONFIG_NVME_MULTIPATH -static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head, - unsigned int cmd, void __user *argp) +static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, + void __user *argp, struct nvme_ns_head *head, int srcu_idx) { - struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys); + struct nvme_ctrl *ctrl = ns->ctrl; int ret; - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); - ret = nvme_ctrl_ioctl(ctrl, cmd, argp); - nvme_put_ctrl(ctrl); - return ret; -} + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); -static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head, - unsigned int cmd, void __user *argp) -{ - int srcu_idx = srcu_read_lock(&head->srcu); - struct nvme_ns *ns = nvme_find_path(head); - int ret = -EWOULDBLOCK; - - if (ns) - ret = nvme_ns_ioctl(ns, cmd, argp); - srcu_read_unlock(&head->srcu, srcu_idx); + nvme_put_ctrl(ctrl); return ret; } int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct nvme_ns_head *head = bdev->bd_disk->private_data; + struct nvme_ns_head *head = NULL; void __user *argp = (void __user *)arg; + struct nvme_ns *ns; + int srcu_idx, ret; + + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + /* + * Handle ioctls that apply to the controller instead of the namespace + * seperately and drop the ns SRCU reference early. This avoids a + * deadlock when deleting namespaces using the passthrough interface. + */ if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(head, cmd, argp); - return nvme_ns_head_ns_ioctl(head, cmd, argp); + ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + else { + ret = nvme_ns_ioctl(ns, cmd, argp); + nvme_put_ns_from_disk(head, srcu_idx); + } + + return ret; } long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, @@ -414,10 +418,23 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); void __user *argp = (void __user *)arg; + struct nvme_ns *ns; + int srcu_idx, ret; + + srcu_idx = srcu_read_lock(&head->srcu); + ns = nvme_find_path(head); + if (!ns) { + srcu_read_unlock(&head->srcu, srcu_idx); + return -EWOULDBLOCK; + } if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(head, cmd, argp); - return nvme_ns_head_ns_ioctl(head, cmd, argp); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); + + ret = nvme_ns_ioctl(ns, cmd, argp); + nvme_put_ns_from_disk(head, srcu_idx); + + return ret; } #endif /* CONFIG_NVME_MULTIPATH */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 773dde5b231da..c1e086a0bc3f6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -664,7 +664,6 @@ struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx); bool nvme_tryget_ns_head(struct nvme_ns_head *head); void nvme_put_ns_head(struct nvme_ns_head *head); -struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys); int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, const struct file_operations *fops, struct module *owner); void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device); From 4c74d1f80381996027bacc4f6c554948ef9bf374 Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 27 Apr 2021 12:17:46 +0530 Subject: [PATCH 2/7] nvme: add nvme_get_ns helper Add a helper to avoid opencoding ns->kref increment. Decrement is already done via nvme_put_ns helper. Signed-off-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c1c196459d798..f2d1f2d699b8e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -576,6 +576,11 @@ static void nvme_free_ns(struct kref *kref) kfree(ns); } +static inline bool nvme_get_ns(struct nvme_ns *ns) +{ + return kref_get_unless_zero(&ns->kref); +} + void nvme_put_ns(struct nvme_ns *ns) { kref_put(&ns->kref, nvme_free_ns); @@ -1494,7 +1499,7 @@ static int nvme_ns_open(struct nvme_ns *ns) /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head))) goto fail; - if (!kref_get_unless_zero(&ns->kref)) + if (!nvme_get_ns(ns)) goto fail; if (!try_module_get(ns->ctrl->ops->module)) goto fail_put_ns; @@ -3582,7 +3587,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->ns_id == nsid) { - if (!kref_get_unless_zero(&ns->kref)) + if (!nvme_get_ns(ns)) continue; ret = ns; break; From 51ad06cd698cb9ff280a769ed8d57210a1d2266d Mon Sep 17 00:00:00 2001 From: Kanchan Joshi Date: Tue, 27 Apr 2021 12:17:47 +0530 Subject: [PATCH 3/7] nvme: avoid memset for passthrough requests nvme_clear_nvme_request() clears the nvme_command, which is unncessary for passthrough requests as nvme_command is overwritten immediately. Move clearing part from this helper to the caller, so that double memset for passthrough requests is avoided. Signed-off-by: Kanchan Joshi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f2d1f2d699b8e..61e122cecc2a5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -589,9 +589,6 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { - struct nvme_command *cmd = nvme_req(req)->cmd; - - memset(cmd, 0, sizeof(*cmd)); nvme_req(req)->retries = 0; nvme_req(req)->flags = 0; req->rq_flags |= RQF_DONTPREP; @@ -903,8 +900,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) struct nvme_command *cmd = nvme_req(req)->cmd; blk_status_t ret = BLK_STS_OK; - if (!(req->rq_flags & RQF_DONTPREP)) + if (!(req->rq_flags & RQF_DONTPREP)) { nvme_clear_nvme_request(req); + memset(cmd, 0, sizeof(*cmd)); + } switch (req_op(req)) { case REQ_OP_DRV_IN: From a97157440e1e69c35d7804d3b72da0c626ef28e6 Mon Sep 17 00:00:00 2001 From: Tao Chiu Date: Mon, 26 Apr 2021 10:53:10 +0800 Subject: [PATCH 4/7] nvme: move the fabrics queue ready check routines to core queue_rq() in pci only checks if the dispatched queue (nvmeq) is ready, e.g. not being suspended. Since nvme_alloc_admin_tags() in reset flow restarts the admin queue, users are able to submit admin commands to a controller before reset_work() completes. Commands submitted under this condition may interfere with commands that performs identify, IO queue setup in reset_work(), and may result in a hang described in the following patch. As seen in the fabrics, user commands are prevented from being executed under inproper controller states. We may reuse this logic to maintain a clear admin queue during reset_work(). Signed-off-by: Tao Chiu Signed-off-by: Cody Wong Reviewed-by: Leon Chien Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 60 +++++++++++++++++++++++++++++++++++++ drivers/nvme/host/fabrics.c | 57 ----------------------------------- drivers/nvme/host/fabrics.h | 13 -------- drivers/nvme/host/fc.c | 4 +-- drivers/nvme/host/nvme.h | 15 ++++++++++ drivers/nvme/host/rdma.c | 4 +-- drivers/nvme/host/tcp.c | 4 +-- drivers/nvme/target/loop.c | 4 +-- 8 files changed, 83 insertions(+), 78 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 61e122cecc2a5..522c9b229f80e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -639,6 +639,66 @@ static struct request *nvme_alloc_request_qid(struct request_queue *q, return req; } +/* + * For something we're not in a state to send to the device the default action + * is to busy it and retry it after the controller state is recovered. However, + * if the controller is deleting or if anything is marked for failfast or + * nvme multipath it is immediately failed. + * + * Note: commands used to initialize the controller will be marked for failfast. + * Note: nvme cli/ioctl commands are marked for failfast. + */ +blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, + struct request *rq) +{ + if (ctrl->state != NVME_CTRL_DELETING_NOIO && + ctrl->state != NVME_CTRL_DEAD && + !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && + !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) + return BLK_STS_RESOURCE; + return nvme_host_path_error(rq); +} +EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); + +bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live) +{ + struct nvme_request *req = nvme_req(rq); + + /* + * currently we have a problem sending passthru commands + * on the admin_q if the controller is not LIVE because we can't + * make sure that they are going out after the admin connect, + * controller enable and/or other commands in the initialization + * sequence. until the controller will be LIVE, fail with + * BLK_STS_RESOURCE so that they will be rescheduled. + */ + if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) + return false; + + if (ctrl->ops->flags & NVME_F_FABRICS) { + /* + * Only allow commands on a live queue, except for the connect + * command, which is require to set the queue live in the + * appropinquate states. + */ + switch (ctrl->state) { + case NVME_CTRL_CONNECTING: + if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && + req->cmd->fabrics.fctype == nvme_fabrics_type_connect) + return true; + break; + default: + break; + case NVME_CTRL_DEAD: + return false; + } + } + + return queue_live; +} +EXPORT_SYMBOL_GPL(__nvme_check_ready); + static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { struct nvme_command c; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 13c2747e3d00d..a2bb7fc63a735 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -533,63 +533,6 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( return NULL; } -/* - * For something we're not in a state to send to the device the default action - * is to busy it and retry it after the controller state is recovered. However, - * if the controller is deleting or if anything is marked for failfast or - * nvme multipath it is immediately failed. - * - * Note: commands used to initialize the controller will be marked for failfast. - * Note: nvme cli/ioctl commands are marked for failfast. - */ -blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, - struct request *rq) -{ - if (ctrl->state != NVME_CTRL_DELETING_NOIO && - ctrl->state != NVME_CTRL_DEAD && - !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && - !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) - return BLK_STS_RESOURCE; - return nvme_host_path_error(rq); -} -EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); - -bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live) -{ - struct nvme_request *req = nvme_req(rq); - - /* - * currently we have a problem sending passthru commands - * on the admin_q if the controller is not LIVE because we can't - * make sure that they are going out after the admin connect, - * controller enable and/or other commands in the initialization - * sequence. until the controller will be LIVE, fail with - * BLK_STS_RESOURCE so that they will be rescheduled. - */ - if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) - return false; - - /* - * Only allow commands on a live queue, except for the connect command, - * which is require to set the queue live in the appropinquate states. - */ - switch (ctrl->state) { - case NVME_CTRL_CONNECTING: - if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && - req->cmd->fabrics.fctype == nvme_fabrics_type_connect) - return true; - break; - default: - break; - case NVME_CTRL_DEAD: - return false; - } - - return queue_live; -} -EXPORT_SYMBOL_GPL(__nvmf_check_ready); - static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 888b108d87a40..d7f7974dc2082 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -184,20 +184,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); -blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, - struct request *rq); -bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live); bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts); -static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live) -{ - if (likely(ctrl->state == NVME_CTRL_LIVE || - ctrl->state == NVME_CTRL_DELETING)) - return true; - return __nvmf_check_ready(ctrl, rq, queue_live); -} - #endif /* _NVME_FABRICS_H */ diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9b9b7be0f412c..d9ab9e7871d0f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2766,8 +2766,8 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, blk_status_t ret; if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || - !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); + !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); ret = nvme_setup_cmd(ns, rq); if (ret) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c1e086a0bc3f6..05f31a2c64bb2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -638,6 +638,21 @@ struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); +blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, + struct request *req); +bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live); + +static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, + bool queue_live) +{ + if (likely(ctrl->state == NVME_CTRL_LIVE)) + return true; + if (ctrl->ops->flags & NVME_F_FABRICS && + ctrl->state == NVME_CTRL_DELETING) + return true; + return __nvme_check_ready(ctrl, rq, queue_live); +} int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 660c774fa9e16..37943dc4c2c11 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2050,8 +2050,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); - if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); + if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); dev = queue->device->dev; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 75435cdb156ca..0222e23f5936d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2338,8 +2338,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); blk_status_t ret; - if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); + if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) + return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); ret = nvme_tcp_setup_cmd_pdu(ns, rq); if (unlikely(ret)) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 6665da3b634fa..74b3b150e1a57 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -138,8 +138,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags); blk_status_t ret; - if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) - return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req); + if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready)) + return nvme_fail_nonready_command(&queue->ctrl->ctrl, req); ret = nvme_setup_cmd(ns, req); if (ret) From d4060d2be1132596154f31f4d57976bd103e969d Mon Sep 17 00:00:00 2001 From: Tao Chiu Date: Mon, 26 Apr 2021 10:53:55 +0800 Subject: [PATCH 5/7] nvme-pci: fix controller reset hang when racing with nvme_timeout reset_work() in nvme-pci may hang forever in the following scenario: 1) A reset caused by a command timeout occurs due to a controller being temporarily irresponsive. 2) nvme_reset_work() restarts admin queue at nvme_alloc_admin_tags(). At the same time, a user-submitted admin command is queued and waiting for completion. Then, reset_work() changes its state to CONNECTING, and submits an identify command. 3) However, the controller does still not respond to any command, causing a timeout being fired at the user-submitted command. Unfortunately, nvme_timeout() does not see the completion on cq, and any timeout that takes place under CONNECTING state causes a controller shutdown. 4) Normally, the identify command in reset_work() would be canceled with SC_HOST_ABORTED by nvme_dev_disable(), then reset_work can tear down the controller accordingly. But the controller happens to return online and respond the identify command before nvme_dev_disable() should have been reaped it off. 5) reset_work() continues to setup_io_queues() as it observes no error in init_identify(). However, the admin queue has already been quiesced in dev_disable(). Thus, any following commands would be blocked forever in blk_execute_rq(). This can be fixed by restricting usercmd commands when controller is not in a LIVE state in nvme_queue_rq(), as what has been done previously in fabrics. ``` nvme_reset_work(): | nvme_alloc_admin_tags() | | nvme_submit_user_cmd(): nvme_init_identify(): | ... __nvme_submit_sync_cmd(): | ... | ... ---------------------------------------> nvme_timeout(): (Controller starts reponding commands) | nvme_dev_disable(, true): nvme_setup_io_queues(): | __nvme_submit_sync_cmd(): | (hung in blk_execute_rq | since run_hw_queue sees | queue quiesced) | ``` Signed-off-by: Tao Chiu Signed-off-by: Cody Wong Reviewed-by: Leon Chien Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 09d4c5f99fc30..a29b170701fc6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -933,6 +933,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; + if (!nvme_check_ready(&dev->ctrl, req, true)) + return nvme_fail_nonready_command(&dev->ctrl, req); + ret = nvme_setup_cmd(ns, req); if (ret) return ret; From ce86dad222e9074d3ec174ec81cb463a770331b5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mon, 3 May 2021 19:03:03 +0200 Subject: [PATCH 6/7] nvme-multipath: reset bdev to ns head when failover When a request finally completes in end_io() after it has failed over, the bdev pointer can be stale and thus the system can crash. Set the bdev back to ns head, so the request is map to an active path when resubmitted. Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0d0de3433f377..0551796517e61 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -70,6 +70,7 @@ void nvme_failover_req(struct request *req) struct nvme_ns *ns = req->q->queuedata; u16 status = nvme_req(req)->status & 0x7ff; unsigned long flags; + struct bio *bio; nvme_mpath_clear_current_path(ns); @@ -84,6 +85,8 @@ void nvme_failover_req(struct request *req) } spin_lock_irqsave(&ns->head->requeue_lock, flags); + for (bio = req->bio; bio; bio = bio->bi_next) + bio_set_dev(bio, ns->head->disk->part0); blk_steal_bios(&ns->head->requeue_list, req); spin_unlock_irqrestore(&ns->head->requeue_lock, flags); From 4a20342572f66c5b20a1ee680f5ac0a13703748f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 28 Apr 2021 21:25:58 -0700 Subject: [PATCH 7/7] nvmet: remove unsupported command noise Nothing can stop a host from submitting invalid commands. The target just needs to respond with an appropriate status, but that's not a target error. Demote invalid command messages to the debug level so these events don't spam the kernel logs. Reported-by: Yi Zhang Signed-off-by: Keith Busch Reviewed-by: Klaus Jensen Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index d2a26ff3f7b31..e7a367cf6d367 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -307,7 +307,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) case NVME_LOG_ANA: return nvmet_execute_get_log_page_ana(req); } - pr_err("unhandled lid %d on qid %d\n", + pr_debug("unhandled lid %d on qid %d\n", req->cmd->get_log_page.lid, req->sq->qid); req->error_loc = offsetof(struct nvme_get_log_page_command, lid); nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); @@ -659,7 +659,7 @@ static void nvmet_execute_identify(struct nvmet_req *req) return nvmet_execute_identify_desclist(req); } - pr_err("unhandled identify cns %d on qid %d\n", + pr_debug("unhandled identify cns %d on qid %d\n", req->cmd->identify.cns, req->sq->qid); req->error_loc = offsetof(struct nvme_identify, cns); nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); @@ -977,7 +977,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) return 0; } - pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, + pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode, req->sq->qid); req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;