diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 54a61276b..333133b5c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2016-2018 Arm Limited +// Copyright (C) 2016-2020 Arm Limited // CMN-600 Coherent Mesh Network PMU driver #include @@ -64,15 +64,17 @@ #define CMN__PMEVCNT0_INPUT_SEL_XP 0x04 #define CMN__PMEVCNT0_INPUT_SEL_DEV 0x10 #define CMN__PMEVCNT0_GLOBAL_NUM GENMASK_ULL(18, 16) -#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n) ((n) * 4) //XXX: mysterious +1 because of combined counters; see arm_cmn_event_add() -#define CMN__PMEVCNT_PAIRED(n) BIT(4 + (n) + 1) +#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n) (((n) + 1) * 4) +#define CMN__PMEVCNT_PAIRED(n) BIT(4 + (n) + 1) //XXX: combined counters; #define CMN__PMEVCNT23_COMBINED BIT(2) #define CMN__PMEVCNT01_COMBINED BIT(1) #define CMN_DTM_PMU_CONFIG_PMU_EN BIT(0) #define CMN_DTM_PMEVCNT 0x220 +#define CMN_DTM_PMEVCNTSR 0x240 + #define CMN_DTM_NUM_COUNTERS 4 /* The DTC node is where the magic happens */ @@ -87,6 +89,9 @@ #define CMN_DT_PMEVCNT(n) (CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n)) #define CMN_DT_PMCCNTR (CMN_PMU_OFFSET + 0x40) +#define CMN_DT_PMEVCNTSR(n) (CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n)) +#define CMN_DT_PMCCNTRSR (CMN_PMU_OFFSET + 0x90) + #define CMN_DT_PMCR (CMN_PMU_OFFSET + 0x100) #define CMN_DT_PMCR_PMU_EN BIT(0) #define CMN_DT_PMCR_CNTR_RST BIT(5) @@ -95,14 +100,40 @@ #define CMN_DT_PMOVSR (CMN_PMU_OFFSET + 0x118) #define CMN_DT_PMOVSR_CLR (CMN_PMU_OFFSET + 0x120) -#define CMN_DT_PMSSR 0x128 +#define CMN_DT_PMSSR (CMN_PMU_OFFSET + 0x128) #define CMN_DT_PMSSR_SS_STATUS(n) BIT(n) -#define CMN_DT_PMSRR 0x130 +#define CMN_DT_PMSRR (CMN_PMU_OFFSET + 0x130) #define CMN_DT_PMSRR_SS_REQ BIT(0) -//TODO: Is it worth probing dt_dbg_id.num_pmucntr? #define CMN_DT_NUM_COUNTERS 8 +#define CMN_MAX_DTCS 4 + +/* The configuration master does one other useful thing */ +#define CMN_CFGM_PERIPH_ID_2 0x0010 +#define CMN_CFGM_PID2_REVISION GENMASK(7, 4) + +/* Event attributes */ +#define CMN_CONFIG_TYPE GENMASK(15, 0) +#define CMN_CONFIG_EVENTID GENMASK(23, 16) +#define CMN_CONFIG_OCCUPID GENMASK(27, 24) +#define CMN_CONFIG_BYNODEID BIT(31) +#define CMN_CONFIG_NODEID GENMASK(47, 32) + +#define CMN_EVENT_TYPE(event) FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config) +#define CMN_EVENT_EVENTID(event) FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config) +#define CMN_EVENT_OCCUPID(event) FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config) +#define CMN_EVENT_BYNODEID(event) FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config) +#define CMN_EVENT_NODEID(event) FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config) + +enum cmn_revision { + CMN_R1P0, + CMN_R1P1, + CMN_R1P2, + CMN_R1P3, + CMN_R2P0, + CMN_R3P0, +}; enum cmn_node_type { CMN_TYPE_INVALID, @@ -116,31 +147,16 @@ enum cmn_node_type { CMN_TYPE_RNI = 0xa, CMN_TYPE_RND = 0xd, CMN_TYPE_RNSAM = 0xf, - /* CML nodes, only CMN-600r1 onwards */ CMN_TYPE_CXRA = 0x100, CMN_TYPE_CXHA = 0x101, CMN_TYPE_CXLA = 0x102, }; -struct arm_cmn { - struct device *dev; - void __iomem *base; - void __iomem *dtc_base; - - u8 mesh_x; - u8 mesh_y; - u16 num_xps; - u16 num_dns; - struct arm_cmn_node *xps; - struct arm_cmn_node *dns; - - u32 dns_present; -}; - struct arm_cmn_node { void __iomem *pmu_base; u16 id, logid; enum cmn_node_type type; + int dtc_idx; //for XPs union { u8 event[4]; u32 event_sel; @@ -154,24 +170,49 @@ struct arm_cmn_node { s8 input_sel[4]; u32 pmu_config_high; }; + + DECLARE_BITMAP(counter_used_mask, CMN_DTM_NUM_COUNTERS); }; struct arm_cmn_dtc { struct arm_cmn *cmn; void __iomem *base; - struct hlist_node cpuhp_node; - int cpu; + unsigned int irq; + unsigned int xp_idx; + + /* Add 1 for cycle counter which only available for master dtc */ + DECLARE_BITMAP(counter_used_mask, CMN_DT_NUM_COUNTERS + 1); + struct perf_event *counters[CMN_DT_NUM_COUNTERS]; +}; + +struct arm_cmn { + struct device *dev; + void __iomem *base; + + enum cmn_revision rev; + u8 mesh_x; + u8 mesh_y; + u16 num_xps; + u16 num_dns; + struct arm_cmn_node *xps; + struct arm_cmn_node *dns; - /* One of these is not like the others... */ - struct perf_event *counters[CMN_DT_NUM_COUNTERS + 1]; + struct arm_cmn_dtc *dtc[CMN_MAX_DTCS]; + /* The cycle counter is a special case unique to DTC0 */ + struct perf_event *cycles; + + int cpu; + struct hlist_node cpuhp_node; struct pmu pmu; }; -#define to_cmn_dtc(x) container_of(x, struct arm_cmn_dtc, pmu) +#define to_cmn(p) container_of(p, struct arm_cmn, pmu) +#define to_pmu_dtc(p) (to_cmn(p)->dtc[0]) +#define to_event_dtc(e) (to_cmn((e)->pmu)->dtc[(e)->/*XXX*/]) -/* Keep track of our dynamic hotplug state */ -static enum cpuhp_state arm_cmn_cpuhp_state; +#define for_each_dtc(cmn, dtc, i) \ + for (i = 0; i < CMN_MAX_DTCS && ((dtc) = (cmn)->dtc[i]); i++) struct arm_cmn_event_attr { struct device_attribute attr; @@ -181,22 +222,14 @@ struct arm_cmn_event_attr { u8 occupid; }; -/* By remapping CML node types to fit, we can keep everything in one u32 */ -static u32 arm_cmn_node_type_to_bit(enum cmn_node_type type) -{ - if (type & 0x100) - type ^= 0x110; - return 1U << type; -} - -static bool arm_cmn_has_node(struct arm_cmn *cmn, enum cmn_node_type type) +static bool arm_cmn_has_dn(struct arm_cmn *cmn, enum cmn_node_type type) { - return cmn->dns_present & arm_cmn_node_type_to_bit(type); -} + int i; -static void arm_cmn_set_node(struct arm_cmn *cmn, enum cmn_node_type type) -{ - cmn->dns_present |= arm_cmn_node_type_to_bit(type); + for (i = 0; i < cmn->num_dns; i++) + if (cmn->dns[i].type == type) + return true; + return false; } static int arm_cmn_xyidbits(struct arm_cmn *cmn) @@ -228,7 +261,11 @@ static ssize_t arm_cmn_event_show(struct device *dev, struct arm_cmn_event_attr *eattr; eattr = container_of(attr, typeof(*eattr), attr); - return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n", + + if (eattr->type == CMN_TYPE_DTC) + return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type); + + return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x,bynodeid=1,nodeid=?\n", eattr->type, eattr->eventid, eattr->occupid); } @@ -237,20 +274,26 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, int unused) { struct device *dev = kobj_to_dev(kobj); - struct arm_cmn_dtc *dtc = to_cmn_dtc(dev_get_drvdata(dev)); + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); struct arm_cmn_event_attr *eattr; eattr = container_of(attr, typeof(*eattr), attr.attr); - if (arm_cmn_has_node(dtc->cmn, eattr->type)) - return attr->mode; - //TODO: munge these at probe time, or do we want to preserve topology? - if (eattr->type == CMN_TYPE_RNI && arm_cmn_has_node(dtc->cmn, CMN_TYPE_RND)) + /* We can't not have a cycle counter or XPs */ + if (eattr->type == CMN_TYPE_DTC || eattr->type == CMN_TYPE_XP) return attr->mode; - //TODO: Revision-specific event differences. Can we probe the revision reliably? + if (!arm_cmn_has_dn(cmn, eattr->type)) + return 0; - return 0; + /* Revision-specific differences */ + //TODO: check what else matters here. r0px can probably be considered not to exist + if (cmn->rev < CMN_R1P2) { + if (eattr->type == CMN_TYPE_HNF && eattr->eventid == 0x1b) + return 0; + } + + return attr->mode; } #define _CMN_EVENT_DVM(_name, _event, _occup) \ @@ -292,6 +335,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_DTC(cycles), + //XXX: urgh, apparently these confilct with either HN-I events on the equivalent selector, or possibly any HN-I events at all. FFS, documentation... CMN_EVENT_DVM(rxreq_dvmop, 0x01), CMN_EVENT_DVM(rxreq_dvmsync, 0x02), CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03), @@ -301,7 +345,7 @@ static struct attribute *arm_cmn_event_attrs[] = { _CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2), CMN_EVENT_HNF(cache_miss, 0x01), - CMN_EVENT_HNF(l3_sf_cache_access, 0x02), + CMN_EVENT_HNF(slc_sf_cache_access, 0x02), CMN_EVENT_HNF(cache_fill, 0x03), CMN_EVENT_HNF(pocq_retry, 0x04), CMN_EVENT_HNF(pocq_reqs_recvd, 0x05), @@ -309,8 +353,8 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_HNF(sf_evictions, 0x07), CMN_EVENT_HNF(dir_snoops_sent, 0x08), CMN_EVENT_HNF(brd_snoops_sent, 0x09), - CMN_EVENT_HNF(l3_eviction, 0x0a), - CMN_EVENT_HNF(l3_fill_invalid_way, 0x0b), + CMN_EVENT_HNF(slc_eviction, 0x0a), + CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b), CMN_EVENT_HNF(mc_retries, 0x0c), CMN_EVENT_HNF(mc_reqs, 0x0d), CMN_EVENT_HNF(qos_hh_retry, 0x0e), @@ -330,12 +374,11 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_HNF(snp_sent, 0x18), CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19), CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a), - CMN_EVENT_HNF(sfbi_intv, 0x1b), //TODO: r0 only, RESERVED in r1p0 - CMN_EVENT_HNF(snp_sent_untrk, 0x1b), //TODO: r1p2 onwards + CMN_EVENT_HNF(snp_sent_untrk, 0x1b), /* only r1p2 onwards */ CMN_EVENT_HNF(intv_dirty, 0x1c), CMN_EVENT_HNF(stash_snp_sent, 0x1d), CMN_EVENT_HNF(stash_data_pull, 0x1e), - CMN_EVENT_HNF(snp_fwded, 0x1f), //TODO: r1 only + CMN_EVENT_HNF(snp_fwded, 0x1f), //TODO: HN-I bandwidth events HNI_{RXDAT,TXDAT,TXREQ_TOTAL} on XP CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20), @@ -364,7 +407,6 @@ static struct attribute *arm_cmn_event_attrs[] = { //TODO: watchpoint events //TODO: SBSX bandwidth events SBSX_{RXDAT,TXDAT,TXREQ_TOTAL} on XP - //TODO: no actual SBSX events on r0, these are all r1 only CMN_EVENT_SBSX(rd_req, 0x01), CMN_EVENT_SBSX(wr_req, 0x02), CMN_EVENT_SBSX(cmo_req, 0x03), @@ -404,7 +446,7 @@ static struct attribute *arm_cmn_event_attrs[] = { CMN_EVENT_RNID(rdb_hybrid, 0x13), CMN_EVENT_RNID(rdb_ord, 0x14), - //TODO: CML events, now that they appear at least vaguely documented + //TODO: CML events, now that they appear at least vaguely documented? NULL }; @@ -419,32 +461,30 @@ static ssize_t arm_cmn_format_show(struct device *dev, { struct dev_ext_attribute *eattr = container_of(attr, typeof(*eattr), attr); + unsigned long field = (unsigned long)eattr->var; + int lo = __ffs(field), hi = __fls(field); + + if (lo == hi) + return snprintf(buf, PAGE_SIZE, "config:%d\n", lo); - return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); + return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi); } #define CMN_FORMAT_ATTR(_name, _var) \ (&((struct dev_ext_attribute[]) {{ \ .attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL), \ - .var = _var, \ + .var = (void *)_var, \ }})[0].attr.attr) -//TODO: make this better +//TODO: make this even better static struct attribute *arm_cmn_format_attrs[] = { - CMN_FORMAT_ATTR(nodeid, "config:0-15"), - CMN_FORMAT_ATTR(x, "config:5-6"), //XXX: needs to be dynamic - CMN_FORMAT_ATTR(y, "config:3-4"), //XXX: needs to be dynamic - CMN_FORMAT_ATTR(port, "config:2"), - CMN_FORMAT_ATTR(devid, "config:0-1"), - CMN_FORMAT_ATTR(type, "config:16-31"), - CMN_FORMAT_ATTR(eventid, "config:32-39"), - CMN_FORMAT_ATTR(occupid, "config:40-43"), + CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE), + CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID), + CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID), + CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID), + CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID), NULL }; -#define CMN_EVENT_TYPE(event) (((event)->attr.config >> 16) & 0xffff) -#define CMN_EVENT_NODEID(event) (((event)->attr.config >> 0) & 0xffff) -#define CMN_EVENT_EVENTID(event) (((event)->attr.config >> 32) & 0xff) -#define CMN_EVENT_OCCUPID(event) (((event)->attr.config >> 40) & 0xf) static const struct attribute_group arm_cmn_format_attrs_group = { .name = "format", @@ -454,9 +494,9 @@ static const struct attribute_group arm_cmn_format_attrs_group = { static ssize_t arm_cmn_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(dev_get_drvdata(dev)); + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); - return cpumap_print_to_pagebuf(true, buf, cpumask_of(dtc->cpu)); + return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu)); } static struct device_attribute arm_cmn_cpumask_attr = @@ -471,16 +511,112 @@ static struct attribute_group arm_cmn_cpumask_attr_group = { .attrs = arm_cmn_cpumask_attrs, }; +static int arm_cmn_dtc_domain_attr_idx(const struct attribute *attr) +{ + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, typeof(*eattr), attr.attr); + return (unsigned long)eattr->var; +} + +static ssize_t arm_cmn_dtc_domain_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + unsigned long map = 0; + int i, idx = arm_cmn_dtc_domain_attr_idx(&attr->attr); + + for (i = 0; i < cmn->num_xps; i++) + if (cmn->xps[i].dtc_idx == idx) + map |= 1UL << i; + + return bitmap_print_to_pagebuf(false, buf, &map, BITS_PER_LONG); +} + +static ssize_t arm_cmn_dtc_domain_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + unsigned long map, new_map, mask, diff, n, s, e, w; + int i, ret, idx = arm_cmn_dtc_domain_attr_idx(&attr->attr); + + ret = bitmap_parse(buf, count, &new_map, BITS_PER_LONG); + if (ret < 0) + return ret; + + /* Sanity checks: a domain contains its DTC and is contiguous */ + map = 1UL << cmn->dtc[idx]->xp_idx; + if (!(new_map & map)) + return -EINVAL; + + n = (1UL << (cmn->mesh_x * (cmn->mesh_y - 1))) - 1; + s = n >> cmn->mesh_x; + e = (1UL << cmn->mesh_x) - 1; + for (i = 0; i < cmn->mesh_y; i++) + e |= e >> cmn->mesh_x; + w = e >> 1; + do { + mask = ((map & w) << 1) | ((map & e) >> 1) | + ((map & s) << cmn->mesh_x) | ((map & n) >> cmn->mesh_x); + diff = new_map & mask; + if (!diff) + return -EINVAL; + map |= diff; + new_map &= ~diff; + } while (new_map); + + for_each_set_bit(i, &map, BITS_PER_LONG) + cmn->xps[i].dtc_idx = idx; + + return count; +} + +static umode_t arm_cmn_dtc_domain_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev)); + int idx = arm_cmn_dtc_domain_attr_idx(attr); + + if (cmn->dtc[idx]) + return attr->mode; + + return 0; +} + +#define CMN_DTC_DOMAIN_ATTR(_idx) \ + (&((struct dev_ext_attribute[]) {{ \ + .attr = __ATTR(dtc_domain_##_idx, 0644, arm_cmn_dtc_domain_show, arm_cmn_dtc_domain_store), \ + .var = (void *)_idx, \ + }})[0].attr.attr) + +static struct attribute *arm_cmn_dtc_domain_attrs[] = { + CMN_DTC_DOMAIN_ATTR(0), + CMN_DTC_DOMAIN_ATTR(1), + CMN_DTC_DOMAIN_ATTR(2), + CMN_DTC_DOMAIN_ATTR(3), + NULL +}; + +static struct attribute_group arm_cmn_dtc_domain_attr_group = { + .attrs = arm_cmn_dtc_domain_attrs, + .is_visible = arm_cmn_dtc_domain_is_visible, +}; + static const struct attribute_group *arm_cmn_attr_groups[] = { &arm_cmn_event_attrs_group, &arm_cmn_format_attrs_group, &arm_cmn_cpumask_attr_group, + &arm_cmn_dtc_domain_attr_group, NULL }; static void arm_cmn_pmu_enable(struct pmu *pmu) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(pmu); + struct arm_cmn_dtc *dtc = to_pmu_dtc(pmu); /* * Empirically, it seems we have to toggle dt_dtc_ctl.dt_en with * dt_pmcr.pmuen already set, which seems a little bit backwards, @@ -492,38 +628,38 @@ static void arm_cmn_pmu_enable(struct pmu *pmu) static void arm_cmn_pmu_disable(struct pmu *pmu) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(pmu); + struct arm_cmn_dtc *dtc = to_pmu_dtc(pmu); writel_relaxed(0, dtc->base + CMN_DT_DTC_CTL); } static u64 arm_cmn_read_ctr(struct perf_event *event) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); + struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_node *xp = (void *)event->hw.event_base; + struct arm_cmn_dtc *dtc = cmn->dtc[xp ? xp->dtc_idx : 0]; int dtc_idx = event->hw.idx; int dtm_idx = event->hw.event_base_rdpmc; - u32 lo, hi, old; + u32 val; + u64 count; if (dtc_idx == CMN_DT_NUM_COUNTERS) return readq_relaxed(dtc->base + CMN_DT_PMCCNTR); - //TODO: snapshots would be handy for this (and probably the only way to handle multiple DTMs) - hi = readl_relaxed(dtc->base + CMN_DT_PMEVCNT(dtc_idx)); - do { - old = hi; - //XXX: combined DTM counters - lo = readl_relaxed(xp->pmu_base + CMN_DTM_PMEVCNT + dtm_idx * 2); - hi = readl_relaxed(dtc->base + CMN_DT_PMEVCNT(dtc_idx)); - } while (hi != old); + val = readl_relaxed(dtc->base + CMN_DT_PMEVCNT(dtc_idx)); + count = (u64)val << 32; + //XXX: combined DTM counters + val = readl_relaxed(xp->pmu_base + CMN_DTM_PMEVCNT + dtm_idx * 2); + count += val; - return (u64)hi << 32 | lo; + return count; } static void arm_cmn_write_ctr(struct perf_event *event, u64 val) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); + struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_node *xp = (void *)event->hw.event_base; + struct arm_cmn_dtc *dtc = cmn->dtc[xp ? xp->dtc_idx : 0]; int dtc_idx = event->hw.idx; int dtm_idx = event->hw.event_base_rdpmc; @@ -536,12 +672,6 @@ static void arm_cmn_write_ctr(struct perf_event *event, u64 val) } } -static void arm_cmn_init_ctr(struct perf_event *event) -{ - local64_set(&event->hw.prev_count, 0); - arm_cmn_write_ctr(event, 0); -} - static void arm_cmn_event_read(struct perf_event *event) { local64_t *hw_prev = &event->hw.prev_count; @@ -560,13 +690,28 @@ static void arm_cmn_event_read(struct perf_event *event) local64_add((new - prev) & mask, &event->count); } +static void arm_cmn_event_set_period(struct perf_event *event) +{ + u64 val; + + if (event->hw.idx == CMN_DT_NUM_COUNTERS) + val = ((1ULL << 40) - 1) >> 1; + else + val = (~0ULL) >> 1; + + local64_set(&event->hw.prev_count, val); + arm_cmn_write_ctr(event, val); +} + static void arm_cmn_event_start(struct perf_event *event, int flags) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); + struct arm_cmn_dtc *dtc = to_pmu_dtc(event->pmu); struct arm_cmn_node *node = (void *)event->hw.config_base; if (flags & PERF_EF_RELOAD) arm_cmn_write_ctr(event, local64_read(&event->hw.prev_count)); + else + arm_cmn_event_set_period(event); if (!node) { writel_relaxed(CMN_DT_TRACE_CONTROL_CC_ENABLE, dtc->base + CMN_DT_TRACE_CONTROL); @@ -580,7 +725,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) static void arm_cmn_event_stop(struct perf_event *event, int flags) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); + struct arm_cmn_dtc *dtc = to_pmu_dtc(event->pmu); struct arm_cmn_node *node = (void *)event->hw.config_base; if (!node) { @@ -599,8 +744,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) static int arm_cmn_event_init(struct perf_event *event) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); - struct arm_cmn *cmn = dtc->cmn; + struct arm_cmn *cmn = to_cmn(event->pmu); enum cmn_node_type type; int i, bits, x, y, dev, port; u16 nodeid; @@ -611,15 +755,7 @@ static int arm_cmn_event_init(struct perf_event *event) if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - if (event->attr.exclude_user || - event->attr.exclude_kernel || - event->attr.exclude_hv || - event->attr.exclude_idle || - event->attr.exclude_host || - event->attr.exclude_guest) - return -EINVAL; - - event->cpu = dtc->cpu; + event->cpu = cmn->cpu; if (event->cpu < 0) return -EINVAL; @@ -667,17 +803,19 @@ static int arm_cmn_event_init(struct perf_event *event) static int arm_cmn_event_add(struct perf_event *event, int flags) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); - struct arm_cmn_node *node, *xp; + struct arm_cmn *cmn = to_cmn(event->pmu); + struct arm_cmn_node *node, *xp = (void *)event->hw.event_base; + struct arm_cmn_dtc *dtc = cmn->dtc[xp ? xp->dtc_idx : 0]; int dtc_idx, dtm_idx; if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) { - if (dtc->counters[CMN_DT_NUM_COUNTERS]) + if (test_and_set_bit(CMN_DT_NUM_COUNTERS, + cmn->dtc[0]->counter_used_mask)) return -ENOSPC; - dtc->counters[CMN_DT_NUM_COUNTERS] = event; + cmn->cycles = event; event->hw.idx = CMN_DT_NUM_COUNTERS; event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - arm_cmn_init_ctr(event); + if (flags & PERF_EF_START) arm_cmn_event_start(event, 0); return 0; @@ -686,20 +824,20 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) /* Grab a free global counter first... */ //TODO: we should be able to count the same event on multiple nodes at once, but the config magic to specify that will probably be fiddly... for (dtc_idx = 0; dtc_idx < CMN_DT_NUM_COUNTERS; dtc_idx++) - if (!dtc->counters[dtc_idx]) + if (!test_and_set_bit(dtc_idx, dtc->counter_used_mask)) goto found_global; return -ENOSPC; found_global: /* ...then a local counter to feed it. */ - xp = (void *)event->hw.event_base; //XXX: since we can't make 16-bit register accesses, using individual DTM counters // will be a massive pain. Combining them into 32-bit counters which we can then // write individually saves a bunch of headaches in the beginning. Thus we consider // only counters 0 and 2 for allocation and accesses, but silently translate to // counters 1 and 3 respectively when pairing the overflow with the DTC counter. for (dtm_idx = 0; dtm_idx < CMN_DTM_NUM_COUNTERS; dtm_idx += 2) - if (!(xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx))) + if (!test_and_set_bit(dtm_idx, xp->counter_used_mask)) goto found_local; + clear_bit(dtc_idx, dtc->counter_used_mask); return -ENOSPC; found_local: /* Go go go! */ @@ -707,7 +845,6 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) event->hw.idx = dtc_idx; event->hw.event_base_rdpmc = dtm_idx; event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - arm_cmn_init_ctr(event); node = (void *)event->hw.config_base; if (node->type != CMN_TYPE_XP) { @@ -716,8 +853,8 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) } xp->input_sel[dtm_idx] = event->hw.config + dtm_idx; - xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtc_idx)); - xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtc_idx); + xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx)); + xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); writeq_relaxed((u64)xp->pmu_config_high << 32 | xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); @@ -729,262 +866,342 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) static void arm_cmn_event_del(struct perf_event *event, int flags) { - struct arm_cmn_dtc *dtc = to_cmn_dtc(event->pmu); + struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_node *xp = (void *)event->hw.event_base; + struct arm_cmn_dtc *dtc = cmn->dtc[xp ? xp->dtc_idx : 0]; arm_cmn_event_stop(event, PERF_EF_UPDATE); - dtc->counters[event->hw.idx] = NULL; + if (event->hw.idx == CMN_DT_NUM_COUNTERS) { + clear_bit(CMN_DT_NUM_COUNTERS, + cmn->dtc[0]->counter_used_mask); + cmn->cycles = NULL; + } + else { + dtc->counters[event->hw.idx] = NULL; + clear_bit(event->hw.idx, dtc->counter_used_mask); + } if (!xp) return; xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(event->hw.event_base_rdpmc); writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG); + clear_bit(event->hw.event_base_rdpmc, xp->counter_used_mask); } static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { - struct arm_cmn_dtc *dtc; + struct arm_cmn *cmn; unsigned int target; - dtc = hlist_entry_safe(node, struct arm_cmn_dtc, cpuhp_node); - if (cpu != dtc->cpu) + cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); + if (cpu != cmn->cpu) return 0; target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) return 0; - perf_pmu_migrate_context(&dtc->pmu, cpu, target); - dtc->cpu = target; + perf_pmu_migrate_context(&cmn->pmu, cpu, target); + cmn->cpu = target; return 0; } -static irqreturn_t arm_cmn_irq_handler(int irq, void *dev_id) +static irqreturn_t arm_cmn_handle_dtc_irq(int irq, void *dev_id) { - irqreturn_t res = IRQ_NONE; + struct arm_cmn_dtc *dtc = dev_id; + struct perf_event *event = NULL; + irqreturn_t ret = IRQ_NONE; + unsigned long overflow; + int i; + u32 val; + + val = readl_relaxed(dtc->base + CMN_DT_PMCR); + val &= ~CMN_DT_PMCR_PMU_EN; + writel_relaxed(val, dtc->base + CMN_DT_PMCR); - //TODO: actually enable interrupt + overflow = readq_relaxed(dtc->base + CMN_DT_PMOVSR); + for_each_set_bit(i, &overflow, CMN_DT_NUM_COUNTERS) { + if (i == CMN_DT_NUM_COUNTERS) + event = dtc->cmn->cycles; + else + event = dtc->counters[i]; - return res; + arm_cmn_event_read(event); + arm_cmn_event_set_period(event); + ret = IRQ_HANDLED; + } + writeq_relaxed(overflow, dtc->base + CMN_DT_PMOVSR_CLR); + + val |= CMN_DT_PMCR_PMU_EN; + writel_relaxed(val, dtc->base + CMN_DT_PMCR); + + return ret; } -static int arm_cmn_init_pmu(struct arm_cmn *cmn) +static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id) { - struct platform_device *pdev = to_platform_device(cmn->dev); + struct arm_cmn *cmn = dev_id; struct arm_cmn_dtc *dtc; - unsigned long long value; - acpi_handle handle; - acpi_status status; - const char *name; - int irq, err; + irqreturn_t ret = IRQ_NONE; + int i; - if (!cmn->dtc_base) { - dev_err(cmn->dev, "no DTC found\n"); - return -ENODEV; + for_each_dtc(cmn, dtc, i) + if (dtc->irq == irq) + ret |= arm_cmn_handle_dtc_irq(irq, dtc); + + return ret; +} + +/* We can reasonably accommodate DTCs of the same CMN sharing the same IRQ */ +static int arm_cmn_init_irqs(struct arm_cmn *cmn) +{ + struct arm_cmn_dtc *dtc; + irq_handler_t handler; + unsigned int irqs[CMN_MAX_DTCS]; + int i, j, num_irqs = 0; + bool shared; + + for_each_dtc(cmn, dtc, j) { + for (i = 0; i < num_irqs; i++) + if (dtc->irq == irqs[i]) + goto next; + irqs[num_irqs++] = dtc->irq; + next: + ; /* isn't C great? */ } + shared = (num_irqs < j); + handler = shared ? arm_cmn_handle_irq : arm_cmn_handle_dtc_irq; + + for (i = 0; i < num_irqs; i++) { + void *dev_id = shared ? (void *)cmn : (void *)cmn->dtc[i]; + int err = devm_request_irq(cmn->dev, irqs[i], handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(cmn->dev), dev_id); + if (err) + return err; + + err = irq_set_affinity_hint(irqs[i], cpumask_of(cmn->cpu)); + if (err) + return err; + } + return 0; +} + +static int arm_cmn_init_dtc(struct arm_cmn *cmn, int idx) +{ + struct arm_cmn_node *dn = (struct arm_cmn_node *)cmn->dtc[idx]; + struct arm_cmn_dtc *dtc; + int irq; + + if (idx > 0) + dev_warn(cmn->dev, + "you must update dtc_domain_%u bitmap for events to be counted correctly\n", + idx); + dtc = devm_kzalloc(cmn->dev, sizeof(*dtc), GFP_KERNEL); if (!dtc) return -ENOMEM; - irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(cmn->dev, "missing IRQ for DTC\n"); - return -EINVAL; + irq = platform_get_irq(to_platform_device(cmn->dev), idx); + if (irq < 0) { + dev_err(cmn->dev, "missing IRQ for DTC %d\n", idx); + return irq; } - err = devm_request_irq(cmn->dev, irq, arm_cmn_irq_handler, - IRQF_NOBALANCING | IRQF_NO_THREAD| IRQF_SHARED, - dev_name(cmn->dev), dtc); - if (err) - return err; + dtc->irq = irq; + dtc->base = dn->pmu_base - CMN_PMU_OFFSET; + cmn->dtc[idx] = dtc; - platform_set_drvdata(pdev, dtc); + /* We do at least know that a DTC's XP must be in that DTC's domain */ + dtc->xp_idx = arm_cmn_node_to_xp(cmn, dn->id); + cmn->xps[dtc->xp_idx].dtc_idx = idx; - dtc->cmn = cmn; - dtc->base = cmn->dtc_base; - dtc->cpu = get_cpu(); - dtc->pmu = (struct pmu) { - .attr_groups = arm_cmn_attr_groups, - .task_ctx_nr = perf_invalid_context, - .pmu_enable = arm_cmn_pmu_enable, - .pmu_disable = arm_cmn_pmu_disable, - .event_init = arm_cmn_event_init, - .add = arm_cmn_event_add, - .del = arm_cmn_event_del, - .start = arm_cmn_event_start, - .stop = arm_cmn_event_stop, - .read = arm_cmn_event_read, - }; + writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); + //TODO: anything else to reset? - handle = ACPI_HANDLE(cmn->dev); - if (handle) { - status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, - &value); - if (ACPI_FAILURE(status)) { - dev_err(cmn->dev, - "Failed to evaluate _UID (0x%x)\n", status); - return -ENODEV; - } + writel_relaxed(CMN_DT_PMCR_PMU_EN | + CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); - name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", - (unsigned int)value); - } else { - /* FIXME: multiple instance if no ACPI? */ - name = "arm_cmn"; - } + return 0; +} - cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CMN_ONLINE, - &dtc->cpuhp_node); - put_cpu(); +/* During discovery, we just need to collect the DTCs sorted by logical ID */ +static void arm_cmn_add_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn) +{ + struct arm_cmn_dtc *dtc; + void *tmp = dn; + int i; - writel_relaxed(CMN_DT_PMCR_PMU_EN, dtc->base + CMN_DT_PMCR); + for_each_dtc(cmn, dtc, i) + if (((struct arm_cmn_node *)dtc)->logid > dn->logid) + break; - err = perf_pmu_register(&dtc->pmu, name, -1); - if (err) - cpuhp_state_remove_instance(arm_cmn_cpuhp_state, - &dtc->cpuhp_node); + for (; i < CMN_MAX_DTCS && tmp; i++) + swap(tmp, cmn->dtc[i]); +} - return err; +static void arm_cmn_init_node_info(struct arm_cmn_node *node, void __iomem *region) +{ + u64 reg = readq(region + CMN_NODE_INFO); + + node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg); + node->id = FIELD_GET(CMN_NI_NODE_ID, reg); + node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); + + node->pmu_base = region + CMN_PMU_OFFSET; +} + +static void arm_cmn_dump_node(struct arm_cmn *cmn, struct arm_cmn_node *node, + int level) +{ + dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#lx\n", + (level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ', + node->type, node->logid, + (node->pmu_base - cmn->base) - CMN_PMU_OFFSET); } -static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset, int lvl) +static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) { void __iomem *region = cmn->base + rgn_offset; - struct arm_cmn_node *node; - unsigned int node_ptr; - u16 node_type, node_id, node_logid, child_count, child_poff; + struct arm_cmn_node cfg, *dn; + u16 child_count, child_poff; u64 reg; - int i, ret; + int i, j; - if (lvl > 2) - return -EINVAL; + arm_cmn_init_node_info(&cfg, region); + arm_cmn_dump_node(cmn, &cfg, 0); + if (cfg.type != CMN_TYPE_CFG) + return -ENODEV; - reg = readq(region + CMN_NODE_INFO); - node_type = FIELD_GET(CMN_NI_NODE_TYPE, reg); - node_id = FIELD_GET(CMN_NI_NODE_ID, reg); - node_logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg); - - /* - * The DevID field is always zero in node_info registers for - * some reason; decode it from the node pointer instead. - */ - node_ptr = FIELD_GET(CMN_ADDR_NODE_PTR, rgn_offset); - node_id |= CMN_NODE_PTR_DEVID(node_ptr); + reg = readl(region + CMN_CFGM_PERIPH_ID_2); + cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg); + dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev); reg = readq(region + CMN_CHILD_INFO); child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); - arm_cmn_set_node(cmn, node_type); - dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n", - (lvl * 2) + 1, ' ', node_id, 5 - (lvl * 2), ' ', - node_type, node_logid, rgn_offset); - - switch (node_type) { - case CMN_TYPE_CFG: - cmn->xps = devm_kcalloc(cmn->dev, child_count, - sizeof(*cmn->xps), GFP_KERNEL); - if (!cmn->xps) - return -ENOMEM; - - cmn->num_xps = child_count; - goto wrangle_children; - case CMN_TYPE_XP: - node = devm_kcalloc(cmn->dev, cmn->num_dns + child_count, - sizeof(*cmn->dns), GFP_KERNEL); - if (!node) - return -ENOMEM; - - if (cmn->dns) { - memcpy(node, cmn->dns, cmn->num_dns * sizeof(*cmn->dns)); - devm_kfree(cmn->dev, cmn->dns); - } - cmn->dns = node; - - node = &cmn->xps[node_logid]; - node->pmu_config_low = CMN__PMEVCNT23_COMBINED | CMN__PMEVCNT01_COMBINED | CMN_DTM_PMU_CONFIG_PMU_EN; - break; - case CMN_TYPE_DTC: - if (node_logid > 0) { - dev_warn(cmn->dev, - "multiple DTCs not supported; events outside domain 0 will not be counted correctly\n"); - return 0; - } - cmn->dtc_base = region; - return 0; - /* These guys have PMU events */ - case CMN_TYPE_DVM: - case CMN_TYPE_HNI: - case CMN_TYPE_HNF: - case CMN_TYPE_SBSX: - case CMN_TYPE_RNI: - case CMN_TYPE_RND: - case CMN_TYPE_CXRA: - case CMN_TYPE_CXHA: - if (node_type == CMN_TYPE_RND) { - /* RND uses the same event type with RNI */ - node_type = CMN_TYPE_RNI; - } - node = &cmn->dns[cmn->num_dns++]; - break; - /* Nothing to see here */ - case CMN_TYPE_RNSAM: - case CMN_TYPE_CXLA: - return 0; - default: - dev_err(cmn->dev, "invalid node type: 0x%hx\n", node_type); - return -ENODEV; - } + cmn->num_xps = child_count; + cmn->xps = devm_kcalloc(cmn->dev, cmn->num_xps, + sizeof(*cmn->xps), GFP_KERNEL); + if (!cmn->xps) + return -ENOMEM; - node->pmu_base = region + CMN_PMU_OFFSET; - node->type = node_type; - node->logid = node_logid; - node->id = node_id; + /* Pass 1: visit the XPs, enumerate their children */ + for (i = 0; i < cmn->num_xps; i++) { + void __iomem *xp_region; + struct arm_cmn_node xp = { // XXX: do better + .pmu_config_low = CMN__PMEVCNT23_COMBINED | CMN__PMEVCNT01_COMBINED | CMN_DTM_PMU_CONFIG_PMU_EN, + }; -wrangle_children: - for (i = 0; i < child_count; i++) { reg = readq(region + child_poff + i * 8); + xp_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR); + arm_cmn_init_node_info(&xp, xp_region); + reg = readq(xp_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + cmn->num_dns += child_count; /* - * Don't even try to touch anything external, since in general - * we haven't a clue how to power up arbitrary CHI requesters. - * As of CMN-600r1 these could only be RN-SAMs or CXLAs, - * neither of which have any PMU events anyway. - * (Actually, CXLAs do seem to have grown some events in r1p2, - * but they don't go to regular XP DTMs, and they depend on - * secure configuration which we can't easily deal with) + * Thanks to the order in which XP logical IDs seem to be + * assigned, we can handily infer the mesh X dimension by + * looking out for the XP at (0,1) without needing to know + * the exact node ID format, which we can later derive. */ - if (reg & CMN_CHILD_NODE_EXTERNAL) { - dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); - continue; - } - - /* Recursion is strictly bounded at 2: CFG->XPs->devices */ - ret = arm_cmn_discover(cmn, reg & CMN_CHILD_NODE_ADDR, lvl + 1); - if (ret) - return ret; + if (xp.id == (1 << 3)) + cmn->mesh_x = xp.logid; + /* + * ...and by storing the XPs sorted by logical ID, we can + * also use that relationship in reverse to look up the + * relevant XP by index based solely on a DN's node ID. + */ + cmn->xps[xp.logid] = xp; } + /* + * If mesh_x wasn't set above then we never saw a node at (0,1), so + * we must have an Nx1 configuration. + */ + if (!cmn->mesh_x) + cmn->mesh_x = cmn->num_xps; + cmn->mesh_y = cmn->num_xps / cmn->mesh_x; - return 0; -} + cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns, + sizeof(*cmn->dns), GFP_KERNEL); + if (!cmn->dns) + return -ENOMEM; -/* - * Thanks to the order in which XP logical IDs seem to be assigned, we can - * handily infer the mesh X dimension by counting the number of XPs at Y=0 - * without needing to know the exact node ID format, which we can then derive. - */ -static void arm_cmn_mesh_fixup(struct arm_cmn *cmn) -{ - int i; + /* Pass 2: now we can actually visit the children */ + dn = cmn->dns; + for (i = 0; i < cmn->num_xps; i++) { + void __iomem *xp_region = cmn->xps[i].pmu_base - CMN_PMU_OFFSET; + + reg = readq(xp_region + CMN_CHILD_INFO); + child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg); + child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg); + + arm_cmn_dump_node(cmn, &cmn->xps[i], 1); + for (j = 0; j < child_count; j++) { + void __iomem *dn_region; + + reg = readq(xp_region + child_poff + j * 8); + /* + * Don't even try to touch anything external, since in general + * we haven't a clue how to power up arbitrary CHI requesters. + * As of CMN-600r1 these could only be RN-SAMs or CXLAs, + * neither of which have any PMU events anyway. + * (Actually, CXLAs do seem to have grown some events in r1p2, + * but they don't go to regular XP DTMs, and they depend on + * secure configuration which we can't easily deal with) + */ + if (reg & CMN_CHILD_NODE_EXTERNAL) { + dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); + continue; + } + + dn_region = cmn->base + (reg & CMN_CHILD_NODE_ADDR); + arm_cmn_init_node_info(dn, dn_region); + arm_cmn_dump_node(cmn, dn, 2); + + switch (dn->type) { + case CMN_TYPE_DTC: + arm_cmn_add_dtc(cmn, dn); + dn++; + break; + /* These guys have PMU events */ + case CMN_TYPE_DVM: + case CMN_TYPE_HNI: + case CMN_TYPE_HNF: + case CMN_TYPE_SBSX: + case CMN_TYPE_RNI: + case CMN_TYPE_CXRA: + case CMN_TYPE_CXHA: + dn++; + break; + /* To the PMU, RN-Ds don't add anything over RN-Is, so group them together */ + case CMN_TYPE_RND: + dn->type = CMN_TYPE_RNI; + dn++; + break; + /* Nothing to see here */ + case CMN_TYPE_RNSAM: + case CMN_TYPE_CXLA: + break; + /* Something has gone horribly wrong */ + default: + dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type); + return -ENODEV; + } + } + } + + /* Correct for any nodes we skipped */ + cmn->num_dns = dn - cmn->dns; - for (i = 1; i < cmn->num_xps; i++) - if (cmn->xps[i].id & (1 << 3)) - break; - cmn->mesh_x = i; - cmn->mesh_y = cmn->num_xps / i; dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n", cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn)); + + return 0; } /* There is no guarantee that this will work... */ @@ -1053,14 +1270,18 @@ static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn) static int arm_cmn_probe(struct platform_device *pdev) { + struct arm_cmn_dtc *dtc; struct arm_cmn *cmn; - int err, rootnode; + const char *name; + static atomic_t id; + int i, err, rootnode; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); if (!cmn) return -ENOMEM; cmn->dev = &pdev->dev; + platform_set_drvdata(pdev, cmn); if (has_acpi_companion(cmn->dev)) rootnode = arm_cmn_acpi_probe(pdev, cmn); @@ -1069,30 +1290,73 @@ static int arm_cmn_probe(struct platform_device *pdev) if (rootnode < 0) return rootnode; - err = arm_cmn_discover(cmn, rootnode, 0); + err = arm_cmn_discover(cmn, rootnode); if (err) return err; - arm_cmn_mesh_fixup(cmn); + for_each_dtc(cmn, dtc, i) { + err = arm_cmn_init_dtc(cmn, i); + if (err) + return err; + } - return arm_cmn_init_pmu(cmn); + err = arm_cmn_init_irqs(cmn); + if (err) + return err; + + cmn->cpu = raw_smp_processor_id(); + cmn->pmu = (struct pmu) { + .module = THIS_MODULE, + .attr_groups = arm_cmn_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .task_ctx_nr = perf_invalid_context, + .pmu_enable = arm_cmn_pmu_enable, + .pmu_disable = arm_cmn_pmu_disable, + .event_init = arm_cmn_event_init, + .add = arm_cmn_event_add, + .del = arm_cmn_event_del, + .start = arm_cmn_event_start, + .stop = arm_cmn_event_stop, + .read = arm_cmn_event_read, + }; + + if (atomic_fetch_inc(&id) == 0) { + name = "arm_cmn"; + } else { + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_read(&id)); + if (!name) + return -ENOMEM; + } + + err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_CMN_ONLINE, + &cmn->cpuhp_node); + if (err) + return err; + + err = perf_pmu_register(&cmn->pmu, name, -1); + if (err) + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_CMN_ONLINE, + &cmn->cpuhp_node); + return err; } static int arm_cmn_remove(struct platform_device *pdev) { - struct arm_cmn_dtc *dtc = platform_get_drvdata(pdev); + struct arm_cmn *cmn = platform_get_drvdata(pdev); - //TODO: What's the neatest way to find the DTCs and clean them up? - cpuhp_state_remove_instance(arm_cmn_cpuhp_state, - &dtc->cpuhp_node); + perf_pmu_unregister(&cmn->pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_CMN_ONLINE, + &cmn->cpuhp_node); return 0; } +#ifdef CONFIG_OF static const struct of_device_id arm_cmn_of_match[] = { { .compatible = "arm,cmn-600", }, {} }; MODULE_DEVICE_TABLE(of, arm_cmn_of_match); +#endif #ifdef CONFIG_ACPI static const struct acpi_device_id arm_cmn_acpi_match[] = { @@ -1105,7 +1369,7 @@ MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); static struct platform_driver arm_cmn_driver = { .driver = { .name = "arm-cmn", - .of_match_table = arm_cmn_of_match, + .of_match_table = of_match_ptr(arm_cmn_of_match), .acpi_match_table = ACPI_PTR(arm_cmn_acpi_match), }, .probe = arm_cmn_probe, @@ -1117,24 +1381,21 @@ static int __init arm_cmn_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CMN_ONLINE, - "perf/arm/cmn:online", NULL, - arm_cmn_pmu_offline_cpu); - if (ret < 0) + "perf/arm/cmn:online", NULL, + arm_cmn_pmu_offline_cpu); + if (ret) return ret; - arm_cmn_cpuhp_state = ret; - ret = platform_driver_register(&arm_cmn_driver); if (ret) - cpuhp_remove_multi_state(arm_cmn_cpuhp_state); - + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CMN_ONLINE); return ret; } static void __exit arm_cmn_exit(void) { platform_driver_unregister(&arm_cmn_driver); - cpuhp_remove_multi_state(arm_cmn_cpuhp_state); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CMN_ONLINE); } module_init(arm_cmn_init);