Skip to content

Commit

Permalink
[mono][interp] Align simd types to 16 bytes by default (#81219)
Browse files Browse the repository at this point in the history
* [mono][interp] Remove unused method

* [mono][interp] Optimize code just in case

* [mono][interp] Align simd types to 16 bytes by default

All interp vars (args, il locals and other allocated vars) are now aligned to 16 byte offsets.

* [mono][interp] Add svar arg to MINT_NEWOBJ_SLOW_UNOPT

Assumption that return offset is identical to location of param offset for this opcode is no longer true. Set the param_offset explicitly, separate from the return, similar to #81017

* [mono][interp] Disable assertion on hot path

* [mono][interp] Remove some duplicate code
  • Loading branch information
BrzVlad authored Feb 6, 2023
1 parent 2ca0229 commit 287621c
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 65 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#define MINT_STACK_SLOT_SIZE (sizeof (stackval))
// This alignment provides us with straight forward support for Vector128
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
#define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down
32 changes: 19 additions & 13 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3693,10 +3693,10 @@ static MONO_NEVER_INLINE int
interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint16* ip, MonoError *error)
{
char *locals = (char*)frame->stack;
int call_args_offset = ip [1];
guint16 param_size = ip [3];
guint16 ret_size = ip [4];
int start_call_args_offset = call_args_offset;
int return_offset = ip [1];
int start_param_offset = ip [2];
guint16 param_size = ip [4];
guint16 ret_size = ip [5];
gpointer this_ptr;

// Should only be called in unoptimized code. This opcode moves the params around
Expand All @@ -3707,13 +3707,19 @@ interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint

MonoClass *newobj_class = cmethod->method->klass;

call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT);
int call_args_offset = ALIGN_TO (return_offset + ret_size, MINT_STACK_ALIGNMENT);
// We allocate space on the stack for return value and for this pointer, that is passed to ctor
if (param_size)
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + start_call_args_offset, param_size);
if (param_size) {
int param_offset;
if (ip [6]) // Check if first arg is simd type, which requires realigning param area
param_offset = ALIGN_TO (call_args_offset + MINT_STACK_SLOT_SIZE, MINT_SIMD_ALIGNMENT);
else
param_offset = call_args_offset + MINT_STACK_SLOT_SIZE;
memmove (locals + param_offset, locals + start_param_offset, param_size);
}

if (is_vt) {
this_ptr = locals + start_call_args_offset;
this_ptr = locals + return_offset;
memset (this_ptr, 0, ret_size);
} else {
// FIXME push/pop LMF
Expand All @@ -3724,7 +3730,7 @@ interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint

this_ptr = mono_object_new_checked (newobj_class, error);
return_val_if_nok (error, -1);
LOCAL_VAR (start_call_args_offset, gpointer) = this_ptr; // return value
LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value
}
LOCAL_VAR (call_args_offset, gpointer) = this_ptr;
return call_args_offset;
Expand Down Expand Up @@ -4270,7 +4276,7 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
reinit_frame (child_frame, frame, cmethod, locals + return_offset, locals + call_args_offset);
frame = child_frame;
}
g_assert (((gsize)frame->stack % MINT_STACK_ALIGNMENT) == 0);
g_assert_checked (((gsize)frame->stack % MINT_STACK_ALIGNMENT) == 0);

MonoException *call_ex;
if (method_entry (context, frame,
Expand Down Expand Up @@ -5773,16 +5779,16 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
goto call;
}
MINT_IN_CASE(MINT_NEWOBJ_SLOW_UNOPT) {
return_offset = ip [1];
cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
cmethod = (InterpMethod*)frame->imethod->data_items [ip [3]];
int offset = interp_newobj_slow_unopt (frame, cmethod, ip, error);
if (offset == -1) {
MonoException *exc = interp_error_convert_to_exception (frame, error, ip);
g_assert (exc);
THROW_EX (exc, ip);
}
return_offset = 0; // unused, ctor has void return
call_args_offset = offset;
ip += 5;
ip += 7;
goto call;
}
MINT_IN_CASE(MINT_INTRINS_SPAN_CTOR) {
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ OPDEF(MINT_JMP, "jmp", 2, 0, 0, MintOpMethodToken)

OPDEF(MINT_ENDFILTER, "endfilter", 2, 0, 1, MintOpNoArgs)

OPDEF(MINT_NEWOBJ_SLOW_UNOPT, "newobj_slow_unopt", 5, 1, 0, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_SLOW_UNOPT, "newobj_slow_unopt", 7, 1, 1, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_STRING_UNOPT, "newobj_string_unopt", 4, 1, 0, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_SLOW, "newobj_slow", 4, 1, 1, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_ARRAY, "newobj_array", 5, 1, 1, MintOpMethodToken)
Expand Down
138 changes: 87 additions & 51 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,15 +490,21 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size)
td->locals_capacity = 2;
td->locals = (InterpLocal*) g_realloc (td->locals, td->locals_capacity * sizeof (InterpLocal));
}
td->locals [td->locals_size].type = type;
td->locals [td->locals_size].mt = mint_type (type);
td->locals [td->locals_size].flags = 0;
td->locals [td->locals_size].indirects = 0;
td->locals [td->locals_size].offset = -1;
td->locals [td->locals_size].size = size;
td->locals [td->locals_size].live_start = -1;
td->locals [td->locals_size].bb_index = -1;
td->locals [td->locals_size].def = NULL;
int mt = mint_type (type);
InterpLocal *local = &td->locals [td->locals_size];

local->type = type;
local->mt = mt;
local->flags = 0;
if (mt == MINT_TYPE_VT && m_class_is_simd_type (mono_class_from_mono_type_internal (type)))
local->flags |= INTERP_LOCAL_FLAG_SIMD;
local->indirects = 0;
local->offset = -1;
local->size = size;
local->live_start = -1;
local->bb_index = -1;
local->def = NULL;

td->locals_size++;
return td->locals_size - 1;

Expand All @@ -520,11 +526,6 @@ create_interp_stack_local (TransformData *td, StackInfo *sp, int type_size)
int local = create_interp_local_explicit (td, get_type_from_stack (sp->type, sp->klass), type_size);

td->locals [local].flags |= INTERP_LOCAL_FLAG_EXECUTION_STACK;
if (!td->optimized) {
td->locals [local].stack_offset = sp->offset;
// Additional space that is allocated for the frame, when we don't run the var offset allocator
ENSURE_STACK_SIZE(td, sp->offset + sp->size);
}
sp->local = local;
}

Expand All @@ -543,12 +544,20 @@ static void
push_type_explicit (TransformData *td, int type, MonoClass *k, int type_size)
{
ensure_stack (td, 1);
td->sp->type = GINT_TO_UINT8 (type);
td->sp->klass = k;
td->sp->flags = 0;
td->sp->offset = get_tos_offset (td);
td->sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE);
create_interp_stack_local (td, td->sp, type_size);
StackInfo *sp = td->sp;
sp->type = GINT_TO_UINT8 (type);
sp->klass = k;
sp->flags = 0;
sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE);
create_interp_stack_local (td, sp, type_size);
if (!td->optimized) {
sp->offset = get_tos_offset (td);
if (td->locals [sp->local].flags & INTERP_LOCAL_FLAG_SIMD)
sp->offset = ALIGN_TO (sp->offset, MINT_SIMD_ALIGNMENT);
td->locals [sp->local].stack_offset = sp->offset;
// Additional space that is allocated for the frame, when we don't run the var offset allocator
ENSURE_STACK_SIZE(td, sp->offset + sp->size);
}
td->sp++;
}

Expand All @@ -557,11 +566,12 @@ push_var (TransformData *td, int var_index)
{
InterpLocal *var = &td->locals [var_index];
ensure_stack (td, 1);
td->sp->type = GINT_TO_UINT8 (stack_type [var->mt]);
td->sp->klass = mono_class_from_mono_type_internal (var->type);
td->sp->flags = 0;
td->sp->local = var_index;
td->sp->size = ALIGN_TO (var->size, MINT_STACK_SLOT_SIZE);
StackInfo *sp = td->sp;
sp->type = GINT_TO_UINT8 (stack_type [var->mt]);
sp->klass = mono_class_from_mono_type_internal (var->type);
sp->flags = 0;
sp->local = var_index;
sp->size = ALIGN_TO (var->size, MINT_STACK_SLOT_SIZE);
td->sp++;
}

Expand All @@ -585,17 +595,13 @@ push_var (TransformData *td, int var_index)
(s)->klass = k; \
} while (0)

static void
set_type_and_local (TransformData *td, StackInfo *sp, MonoClass *klass, int type)
{
SET_TYPE (sp, type, klass);
create_interp_stack_local (td, sp, MINT_STACK_SLOT_SIZE);
}

static void
set_simple_type_and_local (TransformData *td, StackInfo *sp, int type)
{
set_type_and_local (td, sp, NULL, type);
SET_SIMPLE_TYPE (sp, type);
create_interp_stack_local (td, sp, MINT_STACK_SLOT_SIZE);
if (!td->optimized)
td->locals [sp->local].stack_offset = sp->offset;
}

static void
Expand Down Expand Up @@ -1465,6 +1471,9 @@ alloc_var_offset (TransformData *td, int local, gint32 *ptos)
offset = *ptos;
size = td->locals [local].size;

if (td->locals [local].flags & INTERP_LOCAL_FLAG_SIMD)
offset = ALIGN_TO (offset, MINT_SIMD_ALIGNMENT);

td->locals [local].offset = offset;

*ptos = ALIGN_TO (offset + size, MINT_STACK_SLOT_SIZE);
Expand Down Expand Up @@ -3524,7 +3533,11 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
td->sp -= num_args;
guint32 params_stack_size = get_stack_size (td->sp, num_args);
// Used only by unoptimized code
int param_offset = get_tos_offset (td);
int param_offset;
if (num_args)
param_offset = td->sp [0].offset;
else
param_offset = get_tos_offset (td);

int *call_args = create_call_args (td, num_args);

Expand Down Expand Up @@ -4114,6 +4127,26 @@ interp_emit_memory_barrier (TransformData *td, int kind)
goto exit; \
} while (0)

static int
interp_type_size (MonoType *type, int mt, int *align_p)
{
int size, align;
if (mt == MINT_TYPE_VT) {
size = mono_type_size (type, &align);
MonoClass *klass = mono_class_from_mono_type_internal (type);
if (m_class_is_simd_type (klass)) // mono_type_size should report the alignment
align = MINT_SIMD_ALIGNMENT;
else
align = MINT_STACK_SLOT_SIZE;
g_assert (align <= MINT_STACK_ALIGNMENT);
} else {
size = MINT_STACK_SLOT_SIZE; // not really
align = MINT_STACK_SLOT_SIZE;
}
*align_p = align;
return size;
}

static void
interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMethodSignature *sig, MonoMethodHeader *header, MonoError *error)
{
Expand Down Expand Up @@ -4141,19 +4174,15 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
type = mono_method_signature_internal (td->method)->params [i - sig->hasthis];
int mt = mint_type (type);
td->locals [i].type = type;
td->locals [i].offset = offset;
td->locals [i].flags = INTERP_LOCAL_FLAG_GLOBAL;
td->locals [i].indirects = 0;
td->locals [i].mt = mt;
td->locals [i].def = NULL;
if (mt == MINT_TYPE_VT) {
size = mono_type_size (type, &align);
td->locals [i].size = size;
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
} else {
td->locals [i].size = MINT_STACK_SLOT_SIZE; // not really
offset += MINT_STACK_SLOT_SIZE;
}
size = interp_type_size (type, mt, &align);
td->locals [i].size = size;
offset = ALIGN_TO (offset, align);
td->locals [i].offset = offset;
offset += size;
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

Expand All @@ -4167,21 +4196,19 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
return;
}
}
offset += align - 1;
offset &= ~(align - 1);
int mt = mint_type (header->locals [i]);
size = interp_type_size (header->locals [i], mt, &align);
offset = ALIGN_TO (offset, align);
imethod->local_offsets [i] = offset;
td->locals [index].type = header->locals [i];
td->locals [index].offset = offset;
td->locals [index].flags = INTERP_LOCAL_FLAG_GLOBAL;
td->locals [index].indirects = 0;
td->locals [index].mt = mint_type (header->locals [i]);
td->locals [index].def = NULL;
if (td->locals [index].mt == MINT_TYPE_VT)
td->locals [index].size = size;
else
td->locals [index].size = MINT_STACK_SLOT_SIZE; // not really
td->locals [index].size = size;
// Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
offset += size;
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

Expand Down Expand Up @@ -6023,12 +6050,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
} else if (!td->optimized) {
int tos = get_tos_offset (td);
td->sp -= csignature->param_count;
int param_size = tos - get_tos_offset (td);
int param_offset = get_tos_offset (td);
int param_size = tos - param_offset;

td->cbb->contains_call_instruction = TRUE;
interp_add_ins (td, MINT_NEWOBJ_SLOW_UNOPT);
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
init_last_ins_call (td);
td->last_ins->info.call_info->call_offset = param_offset;
td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (m));
td->last_ins->data [1] = param_size;
if (csignature->param_count > 0) {
// Instruct opcode to also align params if necessary
if (td->locals [td->sp [0].local].flags & INTERP_LOCAL_FLAG_SIMD)
td->last_ins->data [3] = 1;
}

gboolean is_vt = m_class_is_valuetype (klass);
if (is_vt) {
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#define INTERP_LOCAL_FLAG_UNKNOWN_USE 32
#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64
// We use this flag to avoid addition of align field in InterpLocal, for now
#define INTERP_LOCAL_FLAG_SIMD 128

typedef struct _InterpInst InterpInst;
typedef struct _InterpBasicBlock InterpBasicBlock;
Expand Down

0 comments on commit 287621c

Please sign in to comment.