Skip to content

Commit

Permalink
s390: add support for virtually mapped kernel stacks
Browse files Browse the repository at this point in the history
With virtually mapped kernel stacks the kernel stack overflow detection
is now fault based, every stack has a guard page in the vmalloc space.
The panic_stack is renamed to nodat_stack and is used for all function
that need to run without DAT, e.g. memcpy_real or do_start_kdump.

The main effect is a reduction in the kernel image size as with vmap
stacks the old style overflow checking that adds two instructions per
function is not needed anymore. Result from bloat-o-meter:

add/remove: 20/1 grow/shrink: 13/26854 up/down: 2198/-216240 (-214042)

In regard to performance the micro-benchmark for fork has a hit of a
few microseconds, allocating 4 pages in vmalloc space is more expensive
compare to an order-2 page allocation. But with real workload I could
not find a noticeable difference.

Acked-by: Heiko Carstens <[email protected]>
Signed-off-by: Martin Schwidefsky <[email protected]>
  • Loading branch information
Martin Schwidefsky committed Oct 9, 2018
1 parent ff340d2 commit ce3dc44
Show file tree
Hide file tree
Showing 16 changed files with 225 additions and 88 deletions.
2 changes: 2 additions & 0 deletions arch/s390/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ config S390
select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
Expand Down Expand Up @@ -649,6 +650,7 @@ config PACK_STACK

config CHECK_STACK
def_bool y
depends on !VMAP_STACK
prompt "Detect kernel stack overflow"
help
This option enables the compiler option -mstack-guard and
Expand Down
4 changes: 2 additions & 2 deletions arch/s390/include/asm/lowcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@ struct lowcore {
__u64 current_task; /* 0x0338 */
__u64 kernel_stack; /* 0x0340 */

/* Interrupt, panic and restart stack. */
/* Interrupt, DAT-off and restartstack. */
__u64 async_stack; /* 0x0348 */
__u64 panic_stack; /* 0x0350 */
__u64 nodat_stack; /* 0x0350 */
__u64 restart_stack; /* 0x0358 */

/* Restart function and parameter. */
Expand Down
8 changes: 8 additions & 0 deletions arch/s390/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ struct thread_struct {

typedef struct thread_struct thread_struct;

/*
* General size of a stack
*/
#define STACK_ORDER 2
#define STACK_SIZE (PAGE_SIZE << STACK_ORDER)
#define STACK_INIT_OFFSET \
(STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))

/*
* Stack layout of a C stack frame.
*/
Expand Down
3 changes: 0 additions & 3 deletions arch/s390/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
* Size of kernel stack for each process
*/
#define THREAD_SIZE_ORDER 2
#define ASYNC_ORDER 2

#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)

#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ int main(void)
OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/base.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

ENTRY(s390_base_mcck_handler)
basr %r13,0
0: lg %r15,__LC_PANIC_STACK # load panic stack
0: lg %r15,__LC_NODAT_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_mcck_handler_fn
lg %r9,0(%r1)
Expand Down
6 changes: 3 additions & 3 deletions arch/s390/kernel/dumpstack.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK
sp = __dump_trace(func, data, sp,
S390_lowcore.panic_stack + frame_size - PAGE_SIZE,
S390_lowcore.panic_stack + frame_size);
S390_lowcore.nodat_stack + frame_size - STACK_SIZE,
S390_lowcore.nodat_stack + frame_size);
#endif
sp = __dump_trace(func, data, sp,
S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
S390_lowcore.async_stack + frame_size - STACK_SIZE,
S390_lowcore.async_stack + frame_size);
task = task ?: current;
__dump_trace(func, data, sp,
Expand Down
53 changes: 36 additions & 17 deletions arch/s390/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP
#endif
.endm

.macro CHECK_STACK stacksize,savearea
.macro CHECK_STACK savearea
#ifdef CONFIG_CHECK_STACK
tml %r15,\stacksize - CONFIG_STACK_GUARD
tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
lghi %r14,\savearea
jz stack_overflow
#endif
.endm

.macro CHECK_VMAP_STACK savearea,oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - STACK_SIZE
oill %r14,STACK_INIT
clg %r14,__LC_KERNEL_STACK
je \oklabel
clg %r14,__LC_ASYNC_STACK
je \oklabel
clg %r14,__LC_NODAT_STACK
je \oklabel
clg %r14,__LC_RESTART_STACK
je \oklabel
lghi %r14,\savearea
j stack_overflow
#else
j \oklabel
#endif
.endm

.macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
Expand All @@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f
0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack?
0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
jnz 2f
CHECK_STACK 1<<STACK_SHIFT,\savearea
CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 3f
1: UPDATE_VTIME %r14,%r15,\timer
Expand Down Expand Up @@ -600,9 +620,10 @@ ENTRY(pgm_check_handler)
jnz 1f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
1: CHECK_STACK __LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 4f
# CHECK_VMAP_STACK branches to stack_overflow or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
Expand Down Expand Up @@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler)
jnz 4f
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
Expand All @@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jno .Lmcck_return
TRACE_IRQS_OFF
Expand All @@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler)
lpswe __LC_RETURN_MCCK_PSW

.Lmcck_panic:
lg %r15,__LC_PANIC_STACK
lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip

Expand All @@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler)
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART
lg %r15,__LC_RESTART_STACK
aghi %r15,-__PT_SIZE # create pt_regs on stack
xc 0(__PT_SIZE,%r15),0(%r15)
stmg %r0,%r14,__PT_R0(%r15)
mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA
Expand All @@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler)

.section .kprobes.text, "ax"

#ifdef CONFIG_CHECK_STACK
#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/*
* The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
stack_overflow:
lg %r15,__LC_PANIC_STACK # change to panic stack
lg %r15,__LC_NODAT_STACK # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
Expand Down
3 changes: 3 additions & 0 deletions arch/s390/kernel/entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]);
void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);

unsigned long stack_alloc(void);
void stack_free(unsigned long stack);

#endif /* _ENTRY_H */
4 changes: 1 addition & 3 deletions arch/s390/kernel/head64.S
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ ENTRY(startup_continue)
#
larl %r14,init_task
stg %r14,__LC_CURRENT
larl %r15,init_thread_union+THREAD_SIZE
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-STACK_FRAME_OVERHEAD
larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
#
# Early setup functions that may not rely on an initialized bss section,
# like moving the initrd. Returns with an initialized bss section.
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ void do_softirq_own_stack(void)
old = current_stack_pointer();
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) {
CALL_ON_STACK(__do_softirq, new, 0);
} else {
/* We are already on the async stack. */
Expand Down
17 changes: 13 additions & 4 deletions arch/s390/kernel/machine_kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image)
}
#endif

static unsigned long do_start_kdump(unsigned long addr)
{
struct kimage *image = (struct kimage *) addr;
int (*start_kdump)(int) = (void *)image->start;
int rc;

__arch_local_irq_stnsm(0xfb); /* disable DAT */
rc = start_kdump(0);
__arch_local_irq_stosm(0x04); /* enable DAT */
return rc;
}

/*
* Check if kdump checksums are valid: We call purgatory with parameter "0"
*/
static bool kdump_csum_valid(struct kimage *image)
{
#ifdef CONFIG_CRASH_DUMP
int (*start_kdump)(int) = (void *)image->start;
int rc;

__arch_local_irq_stnsm(0xfb); /* disable DAT */
rc = start_kdump(0);
__arch_local_irq_stosm(0x04); /* enable DAT */
rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
return rc == 0;
#else
return false;
Expand Down
89 changes: 80 additions & 9 deletions arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <linux/crash_dump.h>
#include <linux/memory.h>
#include <linux/compat.h>
#include <linux/start_kernel.h>

#include <asm/ipl.h>
#include <asm/facility.h>
Expand Down Expand Up @@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc);

void *restart_stack __section(.data);

unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
return (unsigned long)
__vmalloc_node_range(STACK_SIZE, STACK_SIZE,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP,
PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
#else
return __get_free_pages(GFP_KERNEL, STACK_ORDER);
#endif
}

void stack_free(unsigned long stack)
{
#ifdef CONFIG_VMAP_STACK
vfree((void *) stack);
#else
free_pages(stack, STACK_ORDER);
#endif
}

int __init arch_early_irq_init(void)
{
unsigned long stack;

stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
if (!stack)
panic("Couldn't allocate async stack");
S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
return 0;
}

static int __init async_stack_realloc(void)
{
unsigned long old, new;

old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
new = stack_alloc();
if (!new)
panic("Couldn't allocate async stack");
S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
free_pages(old, STACK_ORDER);
return 0;
}
early_initcall(async_stack_realloc);

void __init arch_call_rest_init(void)
{
struct stack_frame *frame;
unsigned long stack;

stack = stack_alloc();
if (!stack)
panic("Couldn't allocate kernel stack");
current->stack = (void *) stack;
#ifdef CONFIG_VMAP_STACK
current->stack_vm_area = (void *) stack;
#endif
set_task_stack_end_magic(current);
stack += STACK_INIT_OFFSET;
S390_lowcore.kernel_stack = stack;
frame = (struct stack_frame *) stack;
memset(frame, 0, sizeof(*frame));
/* Branch to rest_init on the new stack, never returns */
asm volatile(
" la 15,0(%[_frame])\n"
" jg rest_init\n"
: : [_frame] "a" (frame));
}

static void __init setup_lowcore(void)
{
struct lowcore *lc;
Expand All @@ -329,14 +402,8 @@ static void __init setup_lowcore(void)
PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->kernel_stack = ((unsigned long) &init_thread_union)
lc->nodat_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->panic_stack = (unsigned long)
memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
Expand All @@ -357,8 +424,12 @@ static void __init setup_lowcore(void)
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;

restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
restart_stack += ASYNC_SIZE;
/*
* Allocate the global restart stack which is the same for
* all CPUs in cast *one* of them does a PSW restart.
*/
restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE);
restart_stack += STACK_INIT_OFFSET;

/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
Expand Down
Loading

0 comments on commit ce3dc44

Please sign in to comment.