From f845034db70aecfb0214a73c605c4c9e14a75895 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 29 Jan 2019 18:48:58 +0000 Subject: [PATCH] BACKPORT: mm/memory-failure: Add memory_failure_queue_kick() The GHES code calls memory_failure_queue() from IRQ context to schedule work on the current CPU so that memory_failure() can sleep. For synchronous memory errors the arch code needs to know any signals that memory_failure() will trigger are pending before it returns to user-space, possibly when exiting from the IRQ. Add a helper to kick the memory failure queue, to ensure the scheduled work has happened. This has to be called from process context, so may have been migrated from the original cpu. Pass the cpu the work was queued on. Change memory_failure_work_func() to permit being called on the 'wrong' cpu. This patch is needed because Quicksilver firmware-first error handling uses the SDEI notification type for communication between trusted firmware and the OS. This adds needed NMI and SDEI functionality so that the SDEI path in the kernel through APEI acts as an NMI and is properly wired up to the APEI interfaces. Backported from: https://patchwork.kernel.org/patch/10786963/ Signed-off-by: James Morse Signed-off-by: Tyler Baicar --- include/linux/mm.h | 1 + mm/memory-failure.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 37da62c21..240104e42 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2745,6 +2745,7 @@ enum mf_flags { }; extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue(unsigned long pfn, int flags); +extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int get_hwpoison_page(struct page *page); #define put_hwpoison_page(page) put_page(page) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9d142b9b8..2bb844a28 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1359,7 +1359,7 @@ static void memory_failure_work_func(struct work_struct *work) unsigned long proc_flags; int gotten; - mf_cpu = this_cpu_ptr(&memory_failure_cpu); + mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); @@ -1373,6 +1373,19 @@ static void memory_failure_work_func(struct work_struct *work) } } +/* + * Process memory_failure work queued on the specified CPU. + * Used to avoid return-to-userspace racing with the memory_failure workqueue. + */ +void memory_failure_queue_kick(int cpu) +{ + struct memory_failure_cpu *mf_cpu; + + mf_cpu = &per_cpu(memory_failure_cpu, cpu); + cancel_work_sync(&mf_cpu->work); + memory_failure_work_func(&mf_cpu->work); +} + static int __init memory_failure_init(void) { struct memory_failure_cpu *mf_cpu;