Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1 #468

Closed
wants to merge 26 commits into from
Closed

1 #468

Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c57cdfe
Replace bitmap PID implementation with IDR API implementation
gs0510 Aug 21, 2017
68c4af5
idr_preload returns in a non-interruptible context, while
Aug 25, 2017
9cb5ff7
Add locking around the idr_remove calls in the out_free
Aug 25, 2017
ec07a32
The function free_pid is called with irqs and preemption disabled,
Aug 27, 2017
ec9ae10
GFP_KERNEL can sleep, and cannot be used from an atomic context.
Aug 27, 2017
d7885c2
If a task exits before procfs is mounted, proc_flush_task_mnt will
Aug 28, 2017
ca41e8f
Ensure PID allocation starts at 1 during bootup, and wraps back around
Aug 28, 2017
bd03215
The strange looking do { ... } while loop in find_ge_pid turns
Aug 28, 2017
546e83a
x86/asm/64: Clear AC on NMI entries
amluto Aug 8, 2017
b64dade
x86/smpboot: Unbreak CPU0 hotplug
vittyvk Aug 3, 2017
bb90296
x86/cpufeature, kvm/svm: Rename (shorten) the new "virtualized VMSAVE…
suryasaimadhu Aug 1, 2017
fb21e77
x86: Mark various structures and functions as 'static'
Aug 10, 2017
38aa83e
x86/mtrr: Prevent CPU hotplug lock recursion
KAGA-KOKO Aug 15, 2017
39d8752
x86: Fix norandmaps/ADDR_NO_RANDOMIZE
oleg-nesterov Aug 15, 2017
5adf3f6
x86/elf: Remove the unnecessary ADDR_NO_RANDOMIZE checks
oleg-nesterov Aug 15, 2017
52db166
x86/boot/64/clang: Use fixup_pointer() to access 'next_early_pgt'
ramosian-glider Aug 16, 2017
bd711e6
x86: Constify attribute_group structures
ArvindYadavCs Jul 20, 2017
93c9bf4
Sanitize 'move_pages()' permission checks
torvalds Aug 20, 2017
d0e11c3
Linux 4.13-rc6
torvalds Aug 20, 2017
57d5003
Replace bitmap PID implementation with IDR API implementation
gs0510 Aug 21, 2017
395168a
idr_preload returns in a non-interruptible context, while
Aug 25, 2017
55c21c5
Add locking around the idr_remove calls in the out_free
Aug 25, 2017
6efcefe
The function free_pid is called with irqs and preemption disabled,
Aug 27, 2017
fde89ce
GFP_KERNEL can sleep, and cannot be used from an atomic context.
Aug 27, 2017
5dcb605
Merge branch 'master' into master
gs0510 Aug 29, 2017
4c49fe7
Merge pull request #2 from rikvanriel/master
gs0510 Aug 29, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
x86/smpboot: Unbreak CPU0 hotplug
A hang on CPU0 onlining after a preceding offlining is observed. Trace
shows that CPU0 is stuck in check_tsc_sync_target() waiting for source
CPU to run check_tsc_sync_source() but this never happens. Source CPU,
in its turn, is stuck on synchronize_sched() which is called from
native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler().

So it's a classic ABBA deadlock, due to the use of synchronize_sched() in
unregister_nmi_handler().

Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to
native_cpu_up() after cpu onlining is done.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
vittyvk authored and gs0510 committed Aug 29, 2017

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit b64dadec53709b061d732a2f2f246f6f0e85eef2
30 changes: 17 additions & 13 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
@@ -971,15 +971,15 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
int *cpu0_nmi_registered)
{
volatile u32 *trampoline_status =
(volatile u32 *) __va(real_mode_header->trampoline_status);
/* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;

unsigned long boot_error = 0;
int cpu0_nmi_registered = 0;
unsigned long timeout;

idle->thread.sp = (unsigned long)task_pt_regs(idle);
@@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
&cpu0_nmi_registered);
cpu0_nmi_registered);

if (!boot_error) {
/*
@@ -1080,21 +1080,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
*/
smpboot_restore_warm_reset_vector();
}
/*
* Clean up the nmi handler. Do this after the callin and callout sync
* to avoid impact of possible long unregister time.
*/
if (cpu0_nmi_registered)
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");

return boot_error;
}

int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
int cpu0_nmi_registered = 0;
unsigned long flags;
int err;
int err, ret = 0;

WARN_ON(irqs_disabled());

@@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)

common_cpu_up(cpu, tidle);

err = do_boot_cpu(apicid, cpu, tidle);
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
if (err) {
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
return -EIO;
ret = -EIO;
goto unreg_nmi;
}

/*
@@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
touch_nmi_watchdog();
}

return 0;
unreg_nmi:
/*
* Clean up the nmi handler. Do this after the callin and callout sync
* to avoid impact of possible long unregister time.
*/
if (cpu0_nmi_registered)
unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");

return ret;
}

/**