From 5c85d2e1bdcdc8b94a8a7be34717eb69a4ba867e Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Tue, 16 Feb 2021 17:22:48 -0800 Subject: [PATCH] NOUPSTREAM: arm64/ras: Treat single-bit ECC error in the MMU TC RAM as fatal A transient single-bit ECC error in the MMU TC RAM might lead to stale translation in the L2 TLB. Treat this condition as an uncontainable uncorrected error. Signed-off-by: Tyler Baicar Signed-off-by: Bobo --- arch/arm64/include/asm/ras.h | 3 +++ arch/arm64/kernel/ras.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h index 36bfff4ad8b2..122a04ca103b 100644 --- a/arch/arm64/include/asm/ras.h +++ b/arch/arm64/include/asm/ras.h @@ -25,6 +25,9 @@ #define ERR_FR_8B_CEC BIT(1) #define ERR_FR_16B_CEC BIT(2) +#define ARM_N1_MISC0_UNIT_MASK 0xf +#define ARM_N1_UNIT_L2_TLB 0x2 + struct ras_ext_regs { u64 err_fr; u64 err_ctlr; diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c index e146579a0d01..1960c0d1cb4b 100644 --- a/arch/arm64/kernel/ras.c +++ b/arch/arm64/kernel/ras.c @@ -52,6 +52,22 @@ void arch_arm_ras_report_error(void) regs.err_misc1); } + /* + * Workaround for ARM N1 errata where transient single-bit + * error in the MMU TC RAM might cause corruption. Treat + * these CEs as uncontainable UEs. + */ + if (i == 0 && + ((regs.err_status >> ERR_STATUS_CE_SHIFT) & ERR_STATUS_CE_MASK) == 0x2 && + (regs.err_misc0 & ARM_N1_MISC0_UNIT_MASK) == ARM_N1_UNIT_L2_TLB) { + pr_err("Upgrading L2 TLB CE to UC\n"); + regs.err_status &= ~(ERR_STATUS_CE_MASK); + regs.err_status |= ERR_STATUS_UE; + regs.err_status &= ~(ERR_STATUS_UET_MASK); + pr_err("CPU%u: Upgraded ERR%uSTATUS: 0x%llx\n", cpu_num, i, + regs.err_status); + } + trace_arm_ras_ext_event(0, cpu_num, ®s); /*