From 87e6689f380e85d778c7e3faf27ee1ab23b564b9 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 23 Dec 2024 04:20:38 +0100 Subject: [PATCH] Use a u16 instead of an i64 for enum tags Now that we support stack allocated types, the fields of enum types aren't necessarily all one word in size any more. This in turn means that using an i64 might be wasteful due to the excess padding. To reduce the size of enums somewhat, we now use a 16 bits integer for the tag instead of a 64 bits integer. This change also revealed an issue with struct returns/arguments on the AMD64 ABI, as for types such as `{ i16, i64 }` the ABI actually seems to demand `{ i64, i16 }`. Since this requires re-ordering the struct fields and remapping all access sites accordingly, which is a pain, we take the same approach as Rust which in turn matches what the ARM ABI wants: for structs between 8 and 16 bytes, we just treat them as `{ i64, i64 }` types. This is valid size/alignment wise because for a `{ i16, i64 }` the alignment of both members is 8 bytes. This fixes https://github.com/inko-lang/inko/issues/785. Changelog: performance --- compiler/src/llvm/builder.rs | 12 ++-- compiler/src/llvm/context.rs | 79 +++++++------------------ compiler/src/llvm/passes.rs | 14 +++-- compiler/src/mir/mod.rs | 23 ++++++- compiler/src/mir/passes.rs | 67 ++++++++++----------- compiler/src/type_check/define_types.rs | 2 +- compiler/src/type_check/expressions.rs | 4 +- 7 files changed, 95 insertions(+), 106 deletions(-) diff --git a/compiler/src/llvm/builder.rs b/compiler/src/llvm/builder.rs index ca7e060c..0bff0355 100644 --- a/compiler/src/llvm/builder.rs +++ b/compiler/src/llvm/builder.rs @@ -238,19 +238,23 @@ impl<'ctx> Builder<'ctx> { } pub(crate) fn i64_literal(&self, value: i64) -> IntValue<'ctx> { - self.u64_literal(value as u64) + self.int_literal(64, value as u64) } pub(crate) fn u16_literal(&self, value: u16) -> IntValue<'ctx> { - self.context.i16_type().const_int(value as u64, false) + self.int_literal(16, value as u64) } pub(crate) fn u32_literal(&self, value: u32) -> IntValue<'ctx> { - self.context.i32_type().const_int(value as u64, false) + self.int_literal(32, value as u64) } pub(crate) fn u64_literal(&self, value: u64) -> IntValue<'ctx> { - self.context.i64_type().const_int(value, false) + self.int_literal(64, value) + } + + pub(crate) fn int_literal(&self, bits: u32, value: u64) -> IntValue<'ctx> { + self.context.custom_int(bits).const_int(value, false) } pub(crate) fn f64_literal(&self, value: f64) -> FloatValue<'ctx> { diff --git a/compiler/src/llvm/context.rs b/compiler/src/llvm/context.rs index be7cff49..333834c3 100644 --- a/compiler/src/llvm/context.rs +++ b/compiler/src/llvm/context.rs @@ -110,6 +110,12 @@ impl Context { self.inner.struct_type(fields, false) } + pub(crate) fn two_words(&self) -> StructType { + let word = self.i64_type().into(); + + self.inner.struct_type(&[word, word], false) + } + pub(crate) fn class_type<'a>( &'a self, method_type: StructType<'a>, @@ -178,17 +184,8 @@ impl Context { }; match id { - TypeId::Foreign(ForeignType::Int(8, _)) => { - self.i8_type().as_basic_type_enum() - } - TypeId::Foreign(ForeignType::Int(16, _)) => { - self.i16_type().as_basic_type_enum() - } - TypeId::Foreign(ForeignType::Int(32, _)) => { - self.i32_type().as_basic_type_enum() - } - TypeId::Foreign(ForeignType::Int(_, _)) => { - self.i64_type().as_basic_type_enum() + TypeId::Foreign(ForeignType::Int(size, _)) => { + self.custom_int(size).as_basic_type_enum() } TypeId::Foreign(ForeignType::Float(32)) => { self.f32_type().as_basic_type_enum() @@ -231,9 +228,15 @@ impl Context { ArgumentType::Regular(bits.as_basic_type_enum()) } else if bytes <= 16 { - ArgumentType::Regular( - self.abi_struct_type(layouts, typ).as_basic_type_enum(), - ) + // The AMD64 ABI doesn't handle types such as + // `{ i16, i64 }`. While it does handle `{ i64, i16 }`, this + // requires re-ordering the fields and their corresponding + // access sites. + // + // To avoid the complexity of that we take the same approach + // as Rust: if the struct is larger than 8 bytes, we turn it + // into two 64 bits values. + ArgumentType::Regular(self.two_words().as_basic_type_enum()) } else { ArgumentType::StructValue(typ) } @@ -242,10 +245,7 @@ impl Context { if bytes <= 8 { ArgumentType::Regular(self.i64_type().as_basic_type_enum()) } else if bytes <= 16 { - let word = self.i64_type().into(); - let sret = self.struct_type(&[word, word]); - - ArgumentType::Regular(sret.as_basic_type_enum()) + ArgumentType::Regular(self.two_words().as_basic_type_enum()) } else { // clang and Rust don't use "byval" for ARM64 when the // struct is too large, so neither do we. @@ -287,56 +287,21 @@ impl Context { let bytes = layouts.target_data.get_abi_size(&typ) as u32; match state.config.target.arch { - Architecture::Amd64 => { + // For both AMD64 and ARM64 the way structs are returned is the + // same. For more details, refer to argument_type(). + Architecture::Amd64 | Architecture::Arm64 => { if bytes <= 8 { let bits = self.custom_int(size_in_bits(bytes)); ReturnType::Regular(bits.as_basic_type_enum()) } else if bytes <= 16 { - ReturnType::Regular( - self.abi_struct_type(layouts, typ).as_basic_type_enum(), - ) - } else { - ReturnType::Struct(typ) - } - } - Architecture::Arm64 => { - if bytes <= 8 { - let bits = self.custom_int(size_in_bits(bytes)); - - ReturnType::Regular(bits.as_basic_type_enum()) - } else if bytes <= 16 { - let word = self.i64_type().into(); - let sret = self.struct_type(&[word, word]); - - ReturnType::Regular(sret.as_basic_type_enum()) + ReturnType::Regular(self.two_words().as_basic_type_enum()) } else { ReturnType::Struct(typ) } } } } - - fn abi_struct_type<'ctx>( - &'ctx self, - layouts: &Layouts<'ctx>, - typ: StructType<'ctx>, - ) -> StructType<'ctx> { - // We need to ensure each field is treated as a single/whole value, - // otherwise (e.g. if a field is an array of bytes) we may end up - // generating code that's not ABI compliant. - let fields: Vec<_> = typ - .get_field_types_iter() - .map(|t| { - let bits = - size_in_bits(layouts.target_data.get_abi_size(&t) as u32); - - self.custom_int(bits).as_basic_type_enum() - }) - .collect(); - - self.struct_type(&fields) - } } #[cfg(test)] diff --git a/compiler/src/llvm/passes.rs b/compiler/src/llvm/passes.rs index 83523144..39bcb05c 100644 --- a/compiler/src/llvm/passes.rs +++ b/compiler/src/llvm/passes.rs @@ -1827,18 +1827,21 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> { ); } Instruction::Switch(ins) => { - let var = self.variables[&ins.register]; - let val = self.builder.load_int(var); + let reg_var = self.variables[&ins.register]; + let reg_typ = self.variable_types[&ins.register]; + let bits = reg_typ.into_int_type().get_bit_width(); + let reg_val = + self.builder.load(reg_typ, reg_var).into_int_value(); let mut cases = Vec::with_capacity(ins.blocks.len()); for (index, block) in ins.blocks.iter().enumerate() { cases.push(( - self.builder.u64_literal(index as u64), + self.builder.int_literal(bits, index as u64), all_blocks[block.0], )); } - self.builder.exhaustive_switch(val, &cases); + self.builder.exhaustive_switch(reg_val, &cases); } Instruction::Nil(ins) => { let var = self.variables[&ins.register]; @@ -1854,7 +1857,8 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> { } Instruction::Int(ins) => { let var = self.variables[&ins.register]; - let val = self.builder.i64_literal(ins.value); + let val = + self.builder.int_literal(ins.bits as _, ins.value as u64); self.builder.store(var, val); } diff --git a/compiler/src/mir/mod.rs b/compiler/src/mir/mod.rs index 7b57dc77..7aed01ee 100644 --- a/compiler/src/mir/mod.rs +++ b/compiler/src/mir/mod.rs @@ -319,16 +319,36 @@ impl Block { pub(crate) fn int_literal( &mut self, register: RegisterId, + bits: u8, value: i64, location: InstructionLocation, ) { self.instructions.push(Instruction::Int(Box::new(IntLiteral { register, + bits, value, location, }))); } + pub(crate) fn u16_literal( + &mut self, + register: RegisterId, + value: u16, + location: InstructionLocation, + ) { + self.int_literal(register, 16, value as i64, location) + } + + pub(crate) fn i64_literal( + &mut self, + register: RegisterId, + value: i64, + location: InstructionLocation, + ) { + self.int_literal(register, 64, value, location) + } + pub(crate) fn float_literal( &mut self, register: RegisterId, @@ -1035,6 +1055,7 @@ pub(crate) struct Return { #[derive(Clone)] pub(crate) struct IntLiteral { pub(crate) register: RegisterId, + pub(crate) bits: u8, pub(crate) value: i64, pub(crate) location: InstructionLocation, } @@ -1383,7 +1404,7 @@ impl Instruction { format!("r{} = nil", v.register.0) } Instruction::Int(ref v) => { - format!("r{} = int {:?}", v.register.0, v.value) + format!("r{} = i{} {:?}", v.register.0, v.bits, v.value) } Instruction::Float(ref v) => { format!("r{} = float {:?}", v.register.0, v.value) diff --git a/compiler/src/mir/passes.rs b/compiler/src/mir/passes.rs index 01157696..71235b60 100644 --- a/compiler/src/mir/passes.rs +++ b/compiler/src/mir/passes.rs @@ -17,9 +17,9 @@ use types::format::format_type; use types::module_name::ModuleName; use types::{ self, Block as _, ClassId, ConstantId, FieldId, Inline, MethodId, ModuleId, - Symbol, TypeBounds, TypeRef, VerificationError, EQ_METHOD, FIELDS_LIMIT, - OPTION_NONE, OPTION_SOME, RESULT_CLASS, RESULT_ERROR, RESULT_MODULE, - RESULT_OK, + Symbol, TypeBounds, TypeRef, VerificationError, ENUM_TAG_INDEX, EQ_METHOD, + FIELDS_LIMIT, OPTION_NONE, OPTION_SOME, RESULT_CLASS, RESULT_ERROR, + RESULT_MODULE, RESULT_OK, }; const SELF_NAME: &str = "self"; @@ -1275,13 +1275,13 @@ impl<'a> LowerToMir<'a> { lower.prepare(loc); let ins_reg = lower.new_register(ins); - let tag_reg = lower.new_register(TypeRef::int()); - let tag_val = constructor_id.id(lower.db()) as i64; + let tag_val = constructor_id.id(lower.db()); let tag_field = - class.field_by_index(lower.db(), types::ENUM_TAG_INDEX).unwrap(); + class.field_by_index(lower.db(), ENUM_TAG_INDEX).unwrap(); + let tag_reg = lower.new_register(tag_field.value_type(lower.db())); lower.current_block_mut().allocate(ins_reg, class, loc); - lower.current_block_mut().int_literal(tag_reg, tag_val, loc); + lower.current_block_mut().u16_literal(tag_reg, tag_val, loc); lower .current_block_mut() .set_field(ins_reg, class, tag_field, tag_reg, loc); @@ -1854,7 +1854,7 @@ impl<'a> LowerMethod<'a> { let loc = InstructionLocation::new(node.location); let reg = self.new_register(node.resolved_type); - self.current_block_mut().int_literal(reg, node.value, loc); + self.current_block_mut().i64_literal(reg, node.value, loc); reg } @@ -2459,16 +2459,17 @@ impl<'a> LowerMethod<'a> { let loc = InstructionLocation::new(node.location); let reg = self.expression(node.expression); let class = self.register_type(reg).class_id(self.db()).unwrap(); - let tag_reg = self.new_untracked_register(TypeRef::int()); let tag_field = - class.field_by_index(self.db(), types::ENUM_TAG_INDEX).unwrap(); + class.field_by_index(self.db(), ENUM_TAG_INDEX).unwrap(); + let tag_typ = tag_field.value_type(self.db()); + let tag_reg = self.new_untracked_register(tag_typ); let val_field = class.enum_fields(self.db())[0]; let ok_block = self.add_block(); let err_block = self.add_block(); let after_block = self.add_block(); let mut blocks = vec![BlockId(0), BlockId(0)]; let ret_reg = self.new_untracked_register(node.return_type); - let err_tag = self.new_untracked_register(TypeRef::int()); + let err_tag = self.new_untracked_register(tag_typ); self.add_edge(self.current_block, ok_block); self.add_edge(self.current_block, err_block); @@ -2504,11 +2505,7 @@ impl<'a> LowerMethod<'a> { self.current_block = err_block; self.current_block_mut().allocate(ret_reg, class, loc); - self.current_block_mut().int_literal( - err_tag, - none_id as _, - loc, - ); + self.current_block_mut().u16_literal(err_tag, none_id, loc); self.current_block_mut() .set_field(ret_reg, class, tag_field, err_tag, loc); self.current_block_mut().drop_without_dropper(reg, loc); @@ -2544,7 +2541,7 @@ impl<'a> LowerMethod<'a> { self.current_block = err_block; self.current_block_mut().allocate(ret_reg, class, loc); - self.current_block_mut().int_literal(err_tag, err_id as _, loc); + self.current_block_mut().u16_literal(err_tag, err_id, loc); self.current_block_mut() .get_field(err_val, reg, class, val_field, loc); self.current_block_mut() @@ -2580,13 +2577,13 @@ impl<'a> LowerMethod<'a> { let err_id = class.constructor(self.db(), RESULT_ERROR).unwrap().id(self.db()); let tag_field = - class.field_by_index(self.db(), types::ENUM_TAG_INDEX).unwrap(); + class.field_by_index(self.db(), ENUM_TAG_INDEX).unwrap(); + let tag_reg = self.new_register(tag_field.value_type(self.db())); let val_field = class.enum_fields(self.db())[0]; let result_reg = self.new_register(node.return_type); - let tag_reg = self.new_register(TypeRef::int()); self.current_block_mut().allocate(result_reg, class, loc); - self.current_block_mut().int_literal(tag_reg, err_id as _, loc); + self.current_block_mut().u16_literal(tag_reg, err_id, loc); self.current_block_mut() .set_field(result_reg, class, tag_field, tag_reg, loc); self.current_block_mut() @@ -3292,10 +3289,9 @@ impl<'a> LowerMethod<'a> { let test_end_block = match case.constructor { pmatch::Constructor::Int(val) => { - let val_type = TypeRef::int(); - let val_reg = self.new_untracked_register(val_type); + let val_reg = self.new_untracked_register(TypeRef::int()); - self.block_mut(test_block).int_literal(val_reg, val, loc); + self.block_mut(test_block).i64_literal(val_reg, val, loc); self.block_mut(test_block).call_builtin( res_reg, types::Intrinsic::IntEq, @@ -3377,14 +3373,12 @@ impl<'a> LowerMethod<'a> { let test_type = self.register_type(test_reg); let class = test_type.class_id(self.db()).unwrap(); - let tag_reg = self.new_untracked_register(TypeRef::int()); let tag_field = - class.field_by_index(self.db(), types::ENUM_TAG_INDEX).unwrap(); + class.field_by_index(self.db(), ENUM_TAG_INDEX).unwrap(); + let tag_reg = + self.new_untracked_register(tag_field.value_type(self.db())); let member_fields = class.enum_fields(self.db()); - self.block_mut(test_block) - .get_field(tag_reg, test_reg, class, tag_field, loc); - for case in cases { let case_registers = registers.clone(); let block = self.add_block(); @@ -3409,6 +3403,8 @@ impl<'a> LowerMethod<'a> { self.decision(state, case.node, block, case_registers); } + self.block_mut(test_block) + .get_field(tag_reg, test_reg, class, tag_field, loc); self.block_mut(test_block).switch(tag_reg, blocks, loc); test_block } @@ -4681,18 +4677,18 @@ impl<'a> LowerMethod<'a> { let after = self.add_block(); let enum_fields = class.enum_fields(self.db()); let tag_field = - class.field_by_index(self.db(), types::ENUM_TAG_INDEX).unwrap(); - let tag = self.new_register(TypeRef::int()); + class.field_by_index(self.db(), ENUM_TAG_INDEX).unwrap(); + let tag_reg = self.new_register(tag_field.value_type(self.db())); for con in cons { let block = self.add_current_block(); self.add_edge(before, block); - let members = con.arguments(self.db()).to_vec(); - let fields = &enum_fields[0..members.len()]; + let args = con.arguments(self.db()).to_vec(); + let fields = &enum_fields[0..args.len()]; - for (&field, typ) in fields.iter().zip(members.into_iter()) { + for (&field, typ) in fields.iter().zip(args.into_iter()) { func(self, field, typ); } @@ -4701,8 +4697,9 @@ impl<'a> LowerMethod<'a> { blocks.push(block); } - self.block_mut(before).get_field(tag, rec, class, tag_field, location); - self.block_mut(before).switch(tag, blocks, location); + self.block_mut(before) + .get_field(tag_reg, rec, class, tag_field, location); + self.block_mut(before).switch(tag_reg, blocks, location); self.current_block = after; } } diff --git a/compiler/src/type_check/define_types.rs b/compiler/src/type_check/define_types.rs index b95661ca..bd21e2f9 100644 --- a/compiler/src/type_check/define_types.rs +++ b/compiler/src/type_check/define_types.rs @@ -1185,7 +1185,7 @@ impl<'a> DefineConstructors<'a> { let module = self.module; let db = self.db_mut(); let vis = Visibility::TypePrivate; - let tag_typ = TypeRef::int(); + let tag_typ = TypeRef::foreign_unsigned_int(16); let tag_name = ENUM_TAG_FIELD.to_string(); let loc = class_id.location(db); diff --git a/compiler/src/type_check/expressions.rs b/compiler/src/type_check/expressions.rs index a50b8c54..cb7b92a2 100644 --- a/compiler/src/type_check/expressions.rs +++ b/compiler/src/type_check/expressions.rs @@ -2050,9 +2050,7 @@ impl<'a> CheckMethodBody<'a> { } fn int_pattern(&mut self, node: &mut hir::IntLiteral, input_type: TypeRef) { - let typ = TypeRef::int(); - - self.expression_pattern(typ, input_type, node.location); + self.expression_pattern(TypeRef::int(), input_type, node.location); } fn string_pattern(