Skip to content

Commit

Permalink
powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
Browse files Browse the repository at this point in the history
commit 047e657 upstream.

On POWER9, under some circumstances, a broadcast TLB invalidation will
fail to invalidate the ERAT cache on some threads when there are
parallel mtpidr/mtlpidr happening on other threads of the same core.
This can cause stores to continue to go to a page after it's unmapped.

The workaround is to force an ERAT flush using PID=0 or LPID=0 tlbie
flush. This additional TLB flush will cause the ERAT cache
invalidation. Since we are using PID=0 or LPID=0, we don't get
filtered out by the TLB snoop filtering logic.

We need to still follow this up with another tlbie to take care of
store vs tlbie ordering issue explained in commit:
a5d4b58 ("powerpc/mm: Fixup tlbie vs store ordering issue on
POWER9"). The presence of ERAT cache implies we can still get new
stores and they may miss store queue marking flush.

Cc: [email protected]
Signed-off-by: Aneesh Kumar K.V <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
[sandipan: Backported to v4.19]
Signed-off-by: Sandipan Das <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
  • Loading branch information
kvaneesh authored and gregkh committed Nov 10, 2019
1 parent 06e8438 commit ec199b2
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 22 deletions.
3 changes: 2 additions & 1 deletion arch/powerpc/include/asm/cputable.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)

#ifndef __ASSEMBLY__

Expand Down Expand Up @@ -460,7 +461,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TIDR)
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
Expand Down
2 changes: 2 additions & 0 deletions arch/powerpc/kernel/dt_cpu_ftrs.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,8 @@ static __init void update_tlbie_feature_flag(unsigned long pvr)
WARN_ONCE(1, "Unknown PVR");
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
}

cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
}
}

Expand Down
42 changes: 32 additions & 10 deletions arch/powerpc/kvm/book3s_hv_rm_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}

static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
{

if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */

unsigned long rb,rs,prs,r,ric;

rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */

/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rb_value), "r" (lpid));
}
}

static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
Expand All @@ -452,16 +483,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[0]), "r" (kvm->arch.lpid));
}

fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} else {
if (need_sync)
Expand Down
29 changes: 26 additions & 3 deletions arch/powerpc/mm/hash_native_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,31 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
return va;
}

static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
int apsize, int ssize)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */

unsigned long rb,rs,prs,r,ric;

rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */

/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}


if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/* Need the extra ptesync to ensure we don't reorder tlbie*/
asm volatile("ptesync": : :"memory");
Expand Down Expand Up @@ -287,7 +310,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
} else {
__tlbie(vpn, psize, apsize, ssize);
fixup_tlbie(vpn, psize, apsize, ssize);
fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
Expand Down Expand Up @@ -860,7 +883,7 @@ static void native_flush_hash_range(unsigned long number, int local)
/*
* Just do one more with the last used values.
*/
fixup_tlbie(vpn, psize, psize, ssize);
fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");

if (lock_tlbie)
Expand Down
80 changes: 72 additions & 8 deletions arch/powerpc/mm/tlb-radix.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,21 +215,82 @@ static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}

static inline void fixup_tlbie(void)

static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap)
{
unsigned long pid = 0;
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}

static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}

static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);

if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}


static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
}
}

static inline void fixup_tlbie_lpid(unsigned long lpid)
{
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);

if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid(0, RIC_FLUSH_TLB);
}

if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
Expand Down Expand Up @@ -277,15 +338,16 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
break;
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
fixup_tlbie_pid(pid);
}
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}

Expand Down Expand Up @@ -329,15 +391,16 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
break;
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
fixup_tlbie_lpid(lpid);
}
fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}

Expand Down Expand Up @@ -410,6 +473,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,

for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);

fixup_tlbie_va_range(addr - page_size, pid, ap);
}

static inline void _tlbie_va(unsigned long va, unsigned long pid,
Expand All @@ -419,7 +484,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,

asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
fixup_tlbie();
fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}

Expand All @@ -430,7 +495,7 @@ static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,

asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
fixup_tlbie_lpid(lpid);
fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}

Expand All @@ -442,7 +507,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}

Expand Down Expand Up @@ -773,7 +837,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();

asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}
Expand Down

0 comments on commit ec199b2

Please sign in to comment.