Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][interp] Align simd types to 16 bytes by default #81219

Merged
merged 6 commits into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#define MINT_STACK_SLOT_SIZE (sizeof (stackval))
// This alignment provides us with straight forward support for Vector128
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
#define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down
32 changes: 19 additions & 13 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3693,10 +3693,10 @@ static MONO_NEVER_INLINE int
interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint16* ip, MonoError *error)
{
char *locals = (char*)frame->stack;
int call_args_offset = ip [1];
guint16 param_size = ip [3];
guint16 ret_size = ip [4];
int start_call_args_offset = call_args_offset;
int return_offset = ip [1];
int start_param_offset = ip [2];
guint16 param_size = ip [4];
guint16 ret_size = ip [5];
gpointer this_ptr;

// Should only be called in unoptimized code. This opcode moves the params around
Expand All @@ -3707,13 +3707,19 @@ interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint

MonoClass *newobj_class = cmethod->method->klass;

call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT);
int call_args_offset = ALIGN_TO (return_offset + ret_size, MINT_STACK_ALIGNMENT);
// We allocate space on the stack for return value and for this pointer, that is passed to ctor
if (param_size)
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + start_call_args_offset, param_size);
if (param_size) {
int param_offset;
if (ip [6]) // Check if first arg is simd type, which requires realigning param area
param_offset = ALIGN_TO (call_args_offset + MINT_STACK_SLOT_SIZE, MINT_SIMD_ALIGNMENT);
else
param_offset = call_args_offset + MINT_STACK_SLOT_SIZE;
memmove (locals + param_offset, locals + start_param_offset, param_size);
}

if (is_vt) {
this_ptr = locals + start_call_args_offset;
this_ptr = locals + return_offset;
memset (this_ptr, 0, ret_size);
} else {
// FIXME push/pop LMF
Expand All @@ -3724,7 +3730,7 @@ interp_newobj_slow_unopt (InterpFrame *frame, InterpMethod *cmethod, const guint

this_ptr = mono_object_new_checked (newobj_class, error);
return_val_if_nok (error, -1);
LOCAL_VAR (start_call_args_offset, gpointer) = this_ptr; // return value
LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value
}
LOCAL_VAR (call_args_offset, gpointer) = this_ptr;
return call_args_offset;
Expand Down Expand Up @@ -4270,7 +4276,7 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
reinit_frame (child_frame, frame, cmethod, locals + return_offset, locals + call_args_offset);
frame = child_frame;
}
g_assert (((gsize)frame->stack % MINT_STACK_ALIGNMENT) == 0);
g_assert_checked (((gsize)frame->stack % MINT_STACK_ALIGNMENT) == 0);

MonoException *call_ex;
if (method_entry (context, frame,
Expand Down Expand Up @@ -5773,16 +5779,16 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
goto call;
}
MINT_IN_CASE(MINT_NEWOBJ_SLOW_UNOPT) {
return_offset = ip [1];
cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
cmethod = (InterpMethod*)frame->imethod->data_items [ip [3]];
int offset = interp_newobj_slow_unopt (frame, cmethod, ip, error);
if (offset == -1) {
MonoException *exc = interp_error_convert_to_exception (frame, error, ip);
g_assert (exc);
THROW_EX (exc, ip);
}
return_offset = 0; // unused, ctor has void return
call_args_offset = offset;
ip += 5;
ip += 7;
goto call;
}
MINT_IN_CASE(MINT_INTRINS_SPAN_CTOR) {
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ OPDEF(MINT_JMP, "jmp", 2, 0, 0, MintOpMethodToken)

OPDEF(MINT_ENDFILTER, "endfilter", 2, 0, 1, MintOpNoArgs)

OPDEF(MINT_NEWOBJ_SLOW_UNOPT, "newobj_slow_unopt", 5, 1, 0, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_SLOW_UNOPT, "newobj_slow_unopt", 7, 1, 1, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_STRING_UNOPT, "newobj_string_unopt", 4, 1, 0, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_SLOW, "newobj_slow", 4, 1, 1, MintOpMethodToken)
OPDEF(MINT_NEWOBJ_ARRAY, "newobj_array", 5, 1, 1, MintOpMethodToken)
Expand Down
138 changes: 87 additions & 51 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,15 +490,21 @@ create_interp_local_explicit (TransformData *td, MonoType *type, int size)
td->locals_capacity = 2;
td->locals = (InterpLocal*) g_realloc (td->locals, td->locals_capacity * sizeof (InterpLocal));
}
td->locals [td->locals_size].type = type;
td->locals [td->locals_size].mt = mint_type (type);
td->locals [td->locals_size].flags = 0;
td->locals [td->locals_size].indirects = 0;
td->locals [td->locals_size].offset = -1;
td->locals [td->locals_size].size = size;
td->locals [td->locals_size].live_start = -1;
td->locals [td->locals_size].bb_index = -1;
td->locals [td->locals_size].def = NULL;
int mt = mint_type (type);
InterpLocal *local = &td->locals [td->locals_size];

local->type = type;
local->mt = mt;
local->flags = 0;
if (mt == MINT_TYPE_VT && m_class_is_simd_type (mono_class_from_mono_type_internal (type)))
local->flags |= INTERP_LOCAL_FLAG_SIMD;
local->indirects = 0;
local->offset = -1;
local->size = size;
local->live_start = -1;
local->bb_index = -1;
local->def = NULL;

td->locals_size++;
return td->locals_size - 1;

Expand All @@ -520,11 +526,6 @@ create_interp_stack_local (TransformData *td, StackInfo *sp, int type_size)
int local = create_interp_local_explicit (td, get_type_from_stack (sp->type, sp->klass), type_size);

td->locals [local].flags |= INTERP_LOCAL_FLAG_EXECUTION_STACK;
if (!td->optimized) {
td->locals [local].stack_offset = sp->offset;
// Additional space that is allocated for the frame, when we don't run the var offset allocator
ENSURE_STACK_SIZE(td, sp->offset + sp->size);
}
sp->local = local;
}

Expand All @@ -543,12 +544,20 @@ static void
push_type_explicit (TransformData *td, int type, MonoClass *k, int type_size)
{
ensure_stack (td, 1);
td->sp->type = GINT_TO_UINT8 (type);
td->sp->klass = k;
td->sp->flags = 0;
td->sp->offset = get_tos_offset (td);
td->sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE);
create_interp_stack_local (td, td->sp, type_size);
StackInfo *sp = td->sp;
sp->type = GINT_TO_UINT8 (type);
sp->klass = k;
sp->flags = 0;
sp->size = ALIGN_TO (type_size, MINT_STACK_SLOT_SIZE);
create_interp_stack_local (td, sp, type_size);
if (!td->optimized) {
sp->offset = get_tos_offset (td);
if (td->locals [sp->local].flags & INTERP_LOCAL_FLAG_SIMD)
sp->offset = ALIGN_TO (sp->offset, MINT_SIMD_ALIGNMENT);
td->locals [sp->local].stack_offset = sp->offset;
// Additional space that is allocated for the frame, when we don't run the var offset allocator
ENSURE_STACK_SIZE(td, sp->offset + sp->size);
}
td->sp++;
}

Expand All @@ -557,11 +566,12 @@ push_var (TransformData *td, int var_index)
{
InterpLocal *var = &td->locals [var_index];
ensure_stack (td, 1);
td->sp->type = GINT_TO_UINT8 (stack_type [var->mt]);
td->sp->klass = mono_class_from_mono_type_internal (var->type);
td->sp->flags = 0;
td->sp->local = var_index;
td->sp->size = ALIGN_TO (var->size, MINT_STACK_SLOT_SIZE);
StackInfo *sp = td->sp;
sp->type = GINT_TO_UINT8 (stack_type [var->mt]);
sp->klass = mono_class_from_mono_type_internal (var->type);
sp->flags = 0;
sp->local = var_index;
sp->size = ALIGN_TO (var->size, MINT_STACK_SLOT_SIZE);
td->sp++;
}

Expand All @@ -585,17 +595,13 @@ push_var (TransformData *td, int var_index)
(s)->klass = k; \
} while (0)

static void
set_type_and_local (TransformData *td, StackInfo *sp, MonoClass *klass, int type)
{
SET_TYPE (sp, type, klass);
create_interp_stack_local (td, sp, MINT_STACK_SLOT_SIZE);
}

static void
set_simple_type_and_local (TransformData *td, StackInfo *sp, int type)
{
set_type_and_local (td, sp, NULL, type);
SET_SIMPLE_TYPE (sp, type);
create_interp_stack_local (td, sp, MINT_STACK_SLOT_SIZE);
if (!td->optimized)
td->locals [sp->local].stack_offset = sp->offset;
}

static void
Expand Down Expand Up @@ -1465,6 +1471,9 @@ alloc_var_offset (TransformData *td, int local, gint32 *ptos)
offset = *ptos;
size = td->locals [local].size;

if (td->locals [local].flags & INTERP_LOCAL_FLAG_SIMD)
offset = ALIGN_TO (offset, MINT_SIMD_ALIGNMENT);

td->locals [local].offset = offset;

*ptos = ALIGN_TO (offset + size, MINT_STACK_SLOT_SIZE);
Expand Down Expand Up @@ -3524,7 +3533,11 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
td->sp -= num_args;
guint32 params_stack_size = get_stack_size (td->sp, num_args);
// Used only by unoptimized code
int param_offset = get_tos_offset (td);
int param_offset;
if (num_args)
param_offset = td->sp [0].offset;
else
param_offset = get_tos_offset (td);

int *call_args = create_call_args (td, num_args);

Expand Down Expand Up @@ -4114,6 +4127,26 @@ interp_emit_memory_barrier (TransformData *td, int kind)
goto exit; \
} while (0)

static int
interp_type_size (MonoType *type, int mt, int *align_p)
{
int size, align;
if (mt == MINT_TYPE_VT) {
size = mono_type_size (type, &align);
MonoClass *klass = mono_class_from_mono_type_internal (type);
if (m_class_is_simd_type (klass)) // mono_type_size should report the alignment
align = MINT_SIMD_ALIGNMENT;
else
align = MINT_STACK_SLOT_SIZE;
g_assert (align <= MINT_STACK_ALIGNMENT);
} else {
size = MINT_STACK_SLOT_SIZE; // not really
align = MINT_STACK_SLOT_SIZE;
}
*align_p = align;
return size;
}

static void
interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMethodSignature *sig, MonoMethodHeader *header, MonoError *error)
{
Expand Down Expand Up @@ -4141,19 +4174,15 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
type = mono_method_signature_internal (td->method)->params [i - sig->hasthis];
int mt = mint_type (type);
td->locals [i].type = type;
td->locals [i].offset = offset;
td->locals [i].flags = INTERP_LOCAL_FLAG_GLOBAL;
td->locals [i].indirects = 0;
td->locals [i].mt = mt;
td->locals [i].def = NULL;
if (mt == MINT_TYPE_VT) {
size = mono_type_size (type, &align);
td->locals [i].size = size;
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
} else {
td->locals [i].size = MINT_STACK_SLOT_SIZE; // not really
offset += MINT_STACK_SLOT_SIZE;
}
size = interp_type_size (type, mt, &align);
td->locals [i].size = size;
offset = ALIGN_TO (offset, align);
td->locals [i].offset = offset;
offset += size;
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

Expand All @@ -4167,21 +4196,19 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
return;
}
}
offset += align - 1;
offset &= ~(align - 1);
int mt = mint_type (header->locals [i]);
size = interp_type_size (header->locals [i], mt, &align);
offset = ALIGN_TO (offset, align);
imethod->local_offsets [i] = offset;
td->locals [index].type = header->locals [i];
td->locals [index].offset = offset;
td->locals [index].flags = INTERP_LOCAL_FLAG_GLOBAL;
td->locals [index].indirects = 0;
td->locals [index].mt = mint_type (header->locals [i]);
td->locals [index].def = NULL;
if (td->locals [index].mt == MINT_TYPE_VT)
td->locals [index].size = size;
else
td->locals [index].size = MINT_STACK_SLOT_SIZE; // not really
td->locals [index].size = size;
// Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
offset += size;
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

Expand Down Expand Up @@ -6023,12 +6050,21 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
} else if (!td->optimized) {
int tos = get_tos_offset (td);
td->sp -= csignature->param_count;
int param_size = tos - get_tos_offset (td);
int param_offset = get_tos_offset (td);
int param_size = tos - param_offset;

td->cbb->contains_call_instruction = TRUE;
interp_add_ins (td, MINT_NEWOBJ_SLOW_UNOPT);
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
init_last_ins_call (td);
td->last_ins->info.call_info->call_offset = param_offset;
td->last_ins->data [0] = get_data_item_index_imethod (td, mono_interp_get_imethod (m));
td->last_ins->data [1] = param_size;
if (csignature->param_count > 0) {
// Instruct opcode to also align params if necessary
if (td->locals [td->sp [0].local].flags & INTERP_LOCAL_FLAG_SIMD)
td->last_ins->data [3] = 1;
}

gboolean is_vt = m_class_is_valuetype (klass);
if (is_vt) {
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#define INTERP_LOCAL_FLAG_UNKNOWN_USE 32
#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64
// We use this flag to avoid addition of align field in InterpLocal, for now
#define INTERP_LOCAL_FLAG_SIMD 128

typedef struct _InterpInst InterpInst;
typedef struct _InterpBasicBlock InterpBasicBlock;
Expand Down