From cf00b33058b196b4db928419dde68993b15a975b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:43 +0100 Subject: [PATCH 01/13] cxl/mbox: Add a check on input payload size A bug in the LSA code resulted in transfers slightly larger than the mailbox size. Let us make it easier to catch similar issues in future by adding a low level check. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220815154044.24733-2-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 16176b9278b4e1..0c90f13870a439 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -174,7 +174,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, }; int rc; - if (out_size > cxlds->payload_size) + if (in_size > cxlds->payload_size || out_size > cxlds->payload_size) return -E2BIG; rc = cxlds->mbox_send(cxlds, &mbox_cmd); From 2816e24b0510e0c185c0c46acff1ce7aa4c4443f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 18 Aug 2022 17:42:10 +0100 Subject: [PATCH 02/13] cxl/region: Fix null pointer dereference due to pass through decoder commit Not all decoders have a commit callback. The CXL specification allows a host bridge with a single root port to have no explicit HDM decoders. Currently the region driver assumes there are none. As such the CXL core creates a special pass through decoder instance without a commit callback. Prior to this patch, the ->commit() callback was called unconditionally. Thus a configuration with 1 Host Bridge, 1 Root Port, 1 switch with multiple downstream ports below which there are multiple CXL type 3 devices results in a situation where committing the region causes a null pointer dereference. Reported-by: Bobo WL Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") Signed-off-by: Jonathan Cameron Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/20220818164210.2084-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 40114801697845..c49d9a5f1091ca 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -174,7 +174,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->commit(cxld); + if (cxld->commit) + rc = cxld->commit(cxld); if (rc) break; } From f010c75c05299ecd65adfd31a7841eea3476ce1f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:44 +0100 Subject: [PATCH 03/13] cxl/pmem: Fix failure to account for 8 byte header for writes to the device LSA. Writes to the device must include an offset and size as defined in CXL 2.0 8.2.9.5.2.4 Set LSA (Opcode 4103h) Fixes tag is non obvious as this code has been through several reworks and variable names + wasn't in use until the addition of the region code. Due to a bug in QEMU CXL emulation this overrun resulted in QEMU crashing. Reported-by: Bobo WL Signed-off-by: Jonathan Cameron Fixes: 60b8f17215de ("cxl/pmem: Translate NVDIMM label commands to CXL label commands") Link: https://lore.kernel.org/r/20220815154044.24733-3-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 7dc0a2fa1a6b61..115a7b79f34396 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -107,7 +107,7 @@ static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, *cmd = (struct nd_cmd_get_config_size) { .config_size = cxlds->lsa_size, - .max_xfer = cxlds->payload_size, + .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; From 24f0692bfd41fd207d99c993a5785c3426762046 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 20 Oct 2022 16:54:55 -0700 Subject: [PATCH 04/13] ACPI: NUMA: Add CXL CFMWS 'nodes' to the possible nodes set The ACPI CEDT.CFMWS indicates a range of possible address where new CXL regions can appear. Each range is associated with a QTG id (QoS Throttling Group id). For each range + QTG pair that is not covered by a proximity domain in the SRAT, Linux creates a new NUMA node. However, the commit that added the new ranges missed updating the node_possible mask which causes memory_group_register() to fail. Add the new nodes to the nodes_possible mask. Cc: Fixes: fd49f99c1809 ("ACPI: NUMA: Add a node and memblk for each CFMWS not in SRAT") Cc: Alison Schofield Cc: Rafael J. Wysocki Reported-by: Vishal Verma Tested-by: Vishal Verma Acked-by: Rafael J. Wysocki Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166631003537.1167078.9373680312035292395.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/srat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 3b818ab186be89..1f4fc5f8a819d3 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -327,6 +327,7 @@ static int __init acpi_parse_cfmws(union acpi_subtable_headers *header, pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n", node, start, end); } + node_set(node, numa_nodes_parsed); /* Set the next available fake_pxm value */ (*fake_pxm)++; From 71ee71d7adcba648077997a29a91158d20c40b09 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 1 Nov 2022 01:41:00 -0600 Subject: [PATCH 05/13] cxl/region: Fix decoder allocation crash When an intermediate port's decoders have been exhausted by existing regions, and creating a new region with the port in question in it's hierarchical path is attempted, cxl_port_attach_region() fails to find a port decoder (as would be expected), and drops into the failure / cleanup path. However, during cleanup of the region reference, a sanity check attempts to dereference the decoder, which in the above case didn't exist. This causes a NULL pointer dereference BUG. To fix this, refactor the decoder allocation and de-allocation into helper routines, and in this 'free' routine, check that the decoder, @cxld, is valid before attempting any operations on it. Cc: Suggested-by: Dan Williams Signed-off-by: Vishal Verma Reviewed-by: Dave Jiang Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Link: https://lore.kernel.org/r/20221101074100.1732003-1-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c49d9a5f1091ca..bb6f4fc84a3fff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -687,18 +687,27 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, return cxl_rr; } -static void free_region_ref(struct cxl_region_ref *cxl_rr) +static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr) { - struct cxl_port *port = cxl_rr->port; struct cxl_region *cxlr = cxl_rr->region; struct cxl_decoder *cxld = cxl_rr->decoder; + if (!cxld) + return; + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); if (cxld->region == cxlr) { cxld->region = NULL; put_device(&cxlr->dev); } +} +static void free_region_ref(struct cxl_region_ref *cxl_rr) +{ + struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + + cxl_rr_free_decoder(cxl_rr); xa_erase(&port->regions, (unsigned long)cxlr); xa_destroy(&cxl_rr->endpoints); kfree(cxl_rr); @@ -729,6 +738,33 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, return 0; } +static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + struct cxl_region_ref *cxl_rr) +{ + struct cxl_decoder *cxld; + + if (port == cxled_to_port(cxled)) + cxld = &cxled->cxld; + else + cxld = cxl_region_find_decoder(port, cxlr); + if (!cxld) { + dev_dbg(&cxlr->dev, "%s: no decoder available\n", + dev_name(&port->dev)); + return -EBUSY; + } + + if (cxld->region) { + dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", + dev_name(&port->dev), dev_name(&cxld->dev), + dev_name(&cxld->region->dev)); + return -EBUSY; + } + + cxl_rr->decoder = cxld; + return 0; +} + /** * cxl_port_attach_region() - track a region's interest in a port by endpoint * @port: port to add a new region reference 'struct cxl_region_ref' @@ -795,12 +831,6 @@ static int cxl_port_attach_region(struct cxl_port *port, cxl_rr->nr_targets++; nr_targets_inc = true; } - - /* - * The decoder for @cxlr was allocated when the region was first - * attached to @port. - */ - cxld = cxl_rr->decoder; } else { cxl_rr = alloc_region_ref(port, cxlr); if (IS_ERR(cxl_rr)) { @@ -811,26 +841,11 @@ static int cxl_port_attach_region(struct cxl_port *port, } nr_targets_inc = true; - if (port == cxled_to_port(cxled)) - cxld = &cxled->cxld; - else - cxld = cxl_region_find_decoder(port, cxlr); - if (!cxld) { - dev_dbg(&cxlr->dev, "%s: no decoder available\n", - dev_name(&port->dev)); - goto out_erase; - } - - if (cxld->region) { - dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", - dev_name(&port->dev), dev_name(&cxld->dev), - dev_name(&cxld->region->dev)); - rc = -EBUSY; + rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr); + if (rc) goto out_erase; - } - - cxl_rr->decoder = cxld; } + cxld = cxl_rr->decoder; rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { From 4f1aa35f1fb7d51b125487c835982af792697ecb Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Tue, 27 Sep 2022 15:02:47 +0800 Subject: [PATCH 06/13] cxl/pmem: Use size_add() against integer overflow "struct_size() + n" may cause a integer overflow, use size_add() to handle it. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20220927070247.23148-1-yuzhe@nfschina.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 115a7b79f34396..0bac05d804bc5f 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -148,7 +148,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, return -EINVAL; /* 4-byte status follows the input data in the payload */ - if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len) + if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len) return -EINVAL; set_lsa = From a90accb358ae33ea982a35595573f7a045993f8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:24 -0700 Subject: [PATCH 07/13] cxl/region: Fix region HPA ordering validation Some regions may not have any address space allocated. Skip them when validating HPA order otherwise a crash like the following may result: devm_cxl_add_region: cxl_acpi cxl_acpi.0: decoder3.4: created region9 BUG: kernel NULL pointer dereference, address: 0000000000000000 [..] RIP: 0010:store_targetN+0x655/0x1740 [cxl_core] [..] Call Trace: kernfs_fop_write_iter+0x144/0x200 vfs_write+0x24a/0x4d0 ksys_write+0x69/0xf0 do_syscall_64+0x3a/0x90 store_targetN+0x655/0x1740: alloc_region_ref at drivers/cxl/core/region.c:676 (inlined by) cxl_port_attach_region at drivers/cxl/core/region.c:850 (inlined by) cxl_region_attach at drivers/cxl/core/region.c:1290 (inlined by) attach_target at drivers/cxl/core/region.c:1410 (inlined by) store_targetN at drivers/cxl/core/region.c:1453 Cc: Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/166752182461.947915.497032805239915067.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index bb6f4fc84a3fff..d26ca7a6beaeeb 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -658,6 +658,9 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, xa_for_each(&port->regions, index, iter) { struct cxl_region_params *ip = &iter->region->params; + if (!ip->res) + continue; + if (ip->res->start > p->res->start) { dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", From 0d9e734018d70cecf79e2e4c6082167160a0f13f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:30 -0700 Subject: [PATCH 08/13] cxl/region: Fix cxl_region leak, cleanup targets at region delete When a region is deleted any targets that have been previously assigned to that region hold references to it. Trigger those references to drop by detaching all targets at unregister_region() time. Otherwise that region object will leak as userspace has lost the ability to detach targets once region sysfs is torn down. Cc: Fixes: b9686e8c8e39 ("cxl/region: Enable the assignment of endpoint decoders to regions") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183055.947915.17681995648556534844.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d26ca7a6beaeeb..c52465e09f268a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1557,8 +1557,19 @@ static struct cxl_region *to_cxl_region(struct device *dev) static void unregister_region(void *dev) { struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int i; device_del(dev); + + /* + * Now that region sysfs is shutdown, the parameter block is now + * read-only, so no need to hold the region rwsem to access the + * region parameters. + */ + for (i = 0; i < p->interleave_ways; i++) + detach_target(cxlr, i); + cxl_region_iomem_release(cxlr); put_device(dev); } From 4d07ae22e79ebc2d7528bbc69daa53b86981cb3a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:36 -0700 Subject: [PATCH 09/13] cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak When a cxl_nvdimm object goes through a ->remove() event (device physically removed, nvdimm-bridge disabled, or nvdimm device disabled), then any associated regions must also be disabled. As highlighted by the cxl-create-region.sh test [1], a single device may host multiple regions, but the driver was only tracking one region at a time. This leads to a situation where only the last enabled region per nvdimm device is cleaned up properly. Other regions are leaked, and this also causes cxl_memdev reference leaks. Fix the tracking by allowing cxl_nvdimm objects to track multiple region associations. Cc: Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] Reported-by: Vishal Verma Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 2 + drivers/cxl/cxl.h | 2 +- drivers/cxl/pmem.c | 101 ++++++++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 37 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 1d12a8206444ef..36aa5070d90241 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + xa_destroy(&cxl_nvd->pmem_regions); kfree(cxl_nvd); } @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; + xa_init(&cxl_nvd->pmem_regions); device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f680450f0b16c8..1164ad49f3d3a7 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -423,7 +423,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; struct cxl_nvdimm_bridge *bridge; - struct cxl_pmem_region *region; + struct xarray pmem_regions; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 0bac05d804bc5f..4c627d67281a19 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; struct cxl_pmem_region *cxlr_pmem; + unsigned long index; device_lock(&cxl_nvb->dev); - cxlr_pmem = cxl_nvd->region; dev_set_drvdata(&cxl_nvd->dev, NULL); - cxl_nvd->region = NULL; - device_unlock(&cxl_nvb->dev); + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { + get_device(&cxlr_pmem->dev); + device_unlock(&cxl_nvb->dev); - if (cxlr_pmem) { device_release_driver(&cxlr_pmem->dev); put_device(&cxlr_pmem->dev); + + device_lock(&cxl_nvb->dev); } + device_unlock(&cxl_nvb->dev); nvdimm_delete(nvdimm); cxl_nvd->bridge = NULL; @@ -366,25 +369,49 @@ static int match_cxl_nvdimm(struct device *dev, void *data) static void unregister_nvdimm_region(void *nd_region) { - struct cxl_nvdimm_bridge *cxl_nvb; - struct cxl_pmem_region *cxlr_pmem; + nvdimm_region_delete(nd_region); +} + +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + int rc; + + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, + cxlr_pmem, GFP_KERNEL); + if (rc) + return rc; + + get_device(&cxlr_pmem->dev); + return 0; +} + +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + /* + * It is possible this is called without a corresponding + * cxl_nvdimm_add_region for @cxlr_pmem + */ + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); + if (cxlr_pmem) + put_device(&cxlr_pmem->dev); +} + +static void release_mappings(void *data) +{ int i; + struct cxl_pmem_region *cxlr_pmem = data; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; - cxlr_pmem = nd_region_provider_data(nd_region); - cxl_nvb = cxlr_pmem->bridge; device_lock(&cxl_nvb->dev); for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; - if (cxl_nvd->region) { - put_device(&cxlr_pmem->dev); - cxl_nvd->region = NULL; - } + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); } device_unlock(&cxl_nvb->dev); - - nvdimm_region_delete(nd_region); } static void cxlr_pmem_remove_resource(void *res) @@ -422,7 +449,7 @@ static int cxl_pmem_region_probe(struct device *dev) if (!cxl_nvb->nvdimm_bus) { dev_dbg(dev, "nvdimm bus not found\n"); rc = -ENXIO; - goto err; + goto out_nvb; } memset(&mappings, 0, sizeof(mappings)); @@ -431,7 +458,7 @@ static int cxl_pmem_region_probe(struct device *dev) res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) { rc = -ENOMEM; - goto err; + goto out_nvb; } res->name = "Persistent Memory"; @@ -442,11 +469,11 @@ static int cxl_pmem_region_probe(struct device *dev) rc = insert_resource(&iomem_resource, res); if (rc) - goto err; + goto out_nvb; rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); if (rc) - goto err; + goto out_nvb; ndr_desc.res = res; ndr_desc.provider_data = cxlr_pmem; @@ -462,7 +489,7 @@ static int cxl_pmem_region_probe(struct device *dev) nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) { rc = -ENOMEM; - goto err; + goto out_nvb; } ndr_desc.memregion = cxlr->id; @@ -472,9 +499,13 @@ static int cxl_pmem_region_probe(struct device *dev) info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; - goto err; + goto out_nvb; } + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); + if (rc) + goto out_nvd; + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_memdev *cxlmd = m->cxlmd; @@ -486,7 +517,7 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } /* safe to drop ref now with bridge lock held */ @@ -498,10 +529,17 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } - cxl_nvd->region = cxlr_pmem; - get_device(&cxlr_pmem->dev); + + /* + * Pin the region per nvdimm device as those may be released + * out-of-order with respect to the region, and a single nvdimm + * maybe associated with multiple regions + */ + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); + if (rc) + goto out_nvd; m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -527,27 +565,18 @@ static int cxl_pmem_region_probe(struct device *dev) nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); if (!cxlr_pmem->nd_region) { rc = -ENOMEM; - goto err; + goto out_nvd; } rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, cxlr_pmem->nd_region); -out: +out_nvd: kfree(info); +out_nvb: device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; - -err: - dev_dbg(dev, "failed to create nvdimm region\n"); - for (i--; i >= 0; i--) { - nvdimm = mappings[i].nvdimm; - cxl_nvd = nvdimm_provider_data(nvdimm); - put_device(&cxl_nvd->region->dev); - cxl_nvd->region = NULL; - } - goto out; } static struct cxl_driver cxl_pmem_region_driver = { From 86e86c3cb63325c12ea99fbce2cc5bafba86bb40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:42 -0700 Subject: [PATCH 10/13] tools/testing/cxl: Fix some error exits Fix a few typos where 'goto err_port' was used rather than the object specific cleanup. Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184255.947915.16163477849330181425.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index a072b2d3e726aa..133e4c73d37028 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -695,7 +695,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_uport", i); if (!pdev) - goto err_port; + goto err_uport; pdev->dev.parent = &root_port->dev; rc = platform_device_add(pdev); @@ -713,7 +713,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_dport", i); if (!pdev) - goto err_port; + goto err_dport; pdev->dev.parent = &uport->dev; rc = platform_device_add(pdev); From e41c8452b9b204689e68756a3836d1d37b617ad5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:48 -0700 Subject: [PATCH 11/13] tools/testing/cxl: Add a single-port host-bridge regression config Jonathan reports that region creation fails when a single-port host-bridge connects to a multi-port switch. Mock up that configuration so a fix can be tested and regression tested going forward. Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184838.947915.2167957540894293891.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 297 ++++++++++++++++++++++++++++++++--- 1 file changed, 278 insertions(+), 19 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 133e4c73d37028..7edce12fd2ce58 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -12,30 +12,62 @@ #include "mock.h" #define NR_CXL_HOST_BRIDGES 2 +#define NR_CXL_SINGLE_HOST 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; -static struct platform_device - *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * - NR_CXL_SWITCH_PORTS]; -struct platform_device - *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; +#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS) +static struct platform_device *cxl_root_port[NR_MULTI_ROOT]; +static struct platform_device *cxl_switch_uport[NR_MULTI_ROOT]; +#define NR_MEM_MULTI \ + (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_switch_dport[NR_MEM_MULTI]; + +static struct platform_device *cxl_hb_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_root_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_swu_single[NR_CXL_SINGLE_HOST]; +#define NR_MEM_SINGLE (NR_CXL_SINGLE_HOST * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; + +struct platform_device *cxl_mem[NR_MEM_MULTI]; +struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; + + +static inline bool is_multi_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) + if (&cxl_host_bridge[i]->dev == dev) + return true; + return false; +} + +static inline bool is_single_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (&cxl_hb_single[i]->dev == dev) + return true; + return false; +} static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { +static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { [0] = { .handle = &host_bridge[0], }, [1] = { .handle = &host_bridge[1], }, + [2] = { + .handle = &host_bridge[2], + }, + }; static bool is_mock_dev(struct device *dev) @@ -45,6 +77,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) if (dev == &cxl_mem[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) + if (dev == &cxl_mem_single[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -66,7 +101,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES]; + struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -83,6 +118,10 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[2]; } cfmws3; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws4; } __packed mock_cedt = { .cedt = { .header = { @@ -107,6 +146,14 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[2] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 2, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, .cfmws0 = { .cfmws = { .header = { @@ -167,13 +214,29 @@ static struct { }, .target = { 0, 1, }, }, + .cfmws4 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws4), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 4, + .window_size = SZ_256M * 4UL, + }, + .target = { 2 }, + }, }; -struct acpi_cedt_cfmws *mock_cfmws[4] = { +struct acpi_cedt_cfmws *mock_cfmws[] = { [0] = &mock_cedt.cfmws0.cfmws, [1] = &mock_cedt.cfmws1.cfmws, [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, + [4] = &mock_cedt.cfmws4.cfmws, }; struct cxl_mock_res { @@ -304,6 +367,9 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (dev == &cxl_hb_single[i]->dev) + return true; return false; } @@ -326,6 +392,18 @@ static bool is_mock_port(struct device *dev) if (dev == &cxl_switch_dport[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) + if (dev == &cxl_root_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) + if (dev == &cxl_swu_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) + if (dev == &cxl_swd_single[i]->dev) + return true; + if (is_cxl_memdev(dev)) return is_mock_dev(dev->parent); @@ -561,11 +639,31 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) int i, array_size; if (port->depth == 1) { - array_size = ARRAY_SIZE(cxl_root_port); - array = cxl_root_port; + if (is_multi_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (is_single_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_single); + array = cxl_root_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else if (port->depth == 2) { - array_size = ARRAY_SIZE(cxl_switch_dport); - array = cxl_switch_dport; + struct cxl_port *parent = to_cxl_port(port->dev.parent); + + if (is_multi_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else if (is_single_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_swd_single); + array = cxl_swd_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else { dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", port->depth); @@ -576,8 +674,12 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) struct platform_device *pdev = array[i]; struct cxl_dport *dport; - if (pdev->dev.parent != port->uport) + if (pdev->dev.parent != port->uport) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport), + dev_name(pdev->dev.parent)); continue; + } dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); @@ -627,6 +729,157 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_single_init(void) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) { + struct acpi_device *adev = + &host_bridge[NR_CXL_HOST_BRIDGES + i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", + NR_CXL_HOST_BRIDGES + i); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_hb_single[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) { + struct platform_device *bridge = + cxl_hb_single[i % ARRAY_SIZE(cxl_hb_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_root_port", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_port; + pdev->dev.parent = &bridge->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_port; + } + cxl_root_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) { + struct platform_device *root_port = cxl_root_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_uport; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_swu_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) { + struct platform_device *uport = + cxl_swu_single[i % ARRAY_SIZE(cxl_swu_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", + i + NR_MEM_MULTI); + if (!pdev) + goto err_dport; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_swd_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { + struct platform_device *dport = cxl_swd_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem_single[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); +err_port: + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } + + return rc; +} + +static void cxl_single_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } +} + static __init int cxl_test_init(void) { int rc, i; @@ -724,7 +977,6 @@ static __init int cxl_test_init(void) cxl_switch_dport[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; @@ -743,9 +995,13 @@ static __init int cxl_test_init(void) cxl_mem[i] = pdev; } + rc = cxl_single_init(); + if (rc) + goto err_mem; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_mem; + goto err_single; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -758,6 +1014,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_single: + cxl_single_exit(); err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -793,6 +1051,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) From e4f6dfa9ef756a3934a4caf618b1e86e9e8e21d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:54 -0700 Subject: [PATCH 12/13] cxl/region: Fix 'distance' calculation with passthrough ports When programming port decode targets, the algorithm wants to ensure that two devices are compatible to be programmed as peers beneath a given port. A compatible peer is a target that shares the same dport, and where that target's interleave position also routes it to the same dport. Compatibility is determined by the device's interleave position being >= to distance. For example, if a given dport can only map every Nth position then positions less than N away from the last target programmed are incompatible. The @distance for the host-bridge's cxl_port in a simple dual-ported host-bridge configuration with 2 direct-attached devices is 1, i.e. An x2 region divided by 2 dports to reach 2 region targets. An x4 region under an x2 host-bridge would need 2 intervening switches where the @distance at the host bridge level is 2 (x4 region divided by 2 switches to reach 4 devices). However, the distance between peers underneath a single ported host-bridge is always zero because there is no limit to the number of devices that can be mapped. In other words, there are no decoders to program in a passthrough, all descendants are mapped and distance only starts matters for the intervening descendant ports of the passthrough port. Add tracking for the number of dports mapped to a port, and use that to detect the passthrough case for calculating @distance. Cc: Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752185440.947915.6617495912508299445.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 11 +++++++++-- drivers/cxl/core/region.c | 9 ++++++++- drivers/cxl/cxl.h | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bffde862de0bfc..e7556864ea808b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -811,6 +811,7 @@ static struct cxl_dport *find_dport(struct cxl_port *port, int id) static int add_dport(struct cxl_port *port, struct cxl_dport *new) { struct cxl_dport *dup; + int rc; device_lock_assert(&port->dev); dup = find_dport(port, new->port_id); @@ -821,8 +822,14 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) dev_name(dup->dport)); return -EBUSY; } - return xa_insert(&port->dports, (unsigned long)new->dport, new, - GFP_KERNEL); + + rc = xa_insert(&port->dports, (unsigned long)new->dport, new, + GFP_KERNEL); + if (rc) + return rc; + + port->nr_dports++; + return 0; } /* diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c52465e09f268a..c0253de749458e 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -990,7 +990,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (cxl_rr->nr_targets_set) { int i, distance; - distance = p->nr_targets / cxl_rr->nr_targets; + /* + * Passthrough ports impose no distance requirements between + * peers + */ + if (port->nr_dports == 1) + distance = 0; + else + distance = p->nr_targets / cxl_rr->nr_targets; for (i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1164ad49f3d3a7..ac75554b5d763d 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -457,6 +457,7 @@ struct cxl_pmem_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids + * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering * @component_reg_phys: component register capability base address (optional) @@ -475,6 +476,7 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; + int nr_dports; int hdm_end; int commit_end; resource_size_t component_reg_phys; From 8f401ec1c8975eabfe4c089de91cbe058deabf71 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:31:00 -0700 Subject: [PATCH 13/13] cxl/region: Recycle region ids At region creation time the next region-id is atomically cached so that there is predictability of region device names. If that region is destroyed and then a new one is created the region id increments. That ends up looking like a memory leak, or is otherwise surprising that identifiers roll forward even after destroying all previously created regions. Try to reuse rather than free old region ids at region release time. While this fixes a cosmetic issue, the needlessly advancing memory region-id gives the appearance of a memory leak, hence the "Fixes" tag, but no "Cc: stable" tag. Cc: Ben Widawsky Cc: Jonathan Cameron Fixes: 779dd20cfb56 ("cxl/region: Add region creation support") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752186062.947915.13200195701224993317.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c0253de749458e..f9ae5ad284ffb0 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1534,9 +1534,24 @@ static const struct attribute_group *region_groups[] = { static void cxl_region_release(struct device *dev) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); struct cxl_region *cxlr = to_cxl_region(dev); + int id = atomic_read(&cxlrd->region_id); + + /* + * Try to reuse the recently idled id rather than the cached + * next id to prevent the region id space from increasing + * unnecessarily. + */ + if (cxlr->id < id) + if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) { + memregion_free(id); + goto out; + } memregion_free(cxlr->id); +out: + put_device(dev->parent); kfree(cxlr); } @@ -1598,6 +1613,11 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_region_key); dev->parent = &cxlrd->cxlsd.cxld.dev; + /* + * Keep root decoder pinned through cxl_region_release to fixup + * region id allocations + */ + get_device(dev->parent); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_region_type;