Skip to content

Commit

Permalink
auto merge of rust-lang#6724 : thestinger/rust/swap_fast, r=thestinger
Browse files Browse the repository at this point in the history
Passing higher alignment values gives the optimization passes more freedom since it can copy in larger chunks. This change results in rustc outputting the same post-optimization IR as clang for swaps and most copies excluding the lack of information about padding.

Code snippet:

```rust
#[inline(never)]
fn swap<T>(x: &mut T, y: &mut T) {
    util::swap(x, y);
}
```

Original IR (for `int`):

```llvm
define internal fastcc void @_ZN9swap_283417_a71830ca3ed2d65d3_00E(i64*, i64*) rust-lang#1 {
static_allocas:
  %2 = icmp eq i64* %0, %1
  br i1 %2, label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit, label %3

; <label>:3                                       ; preds = %static_allocas
  %4 = load i64* %0, align 1
  %5 = load i64* %1, align 1
  store i64 %5, i64* %0, align 1
  store i64 %4, i64* %1, align 1
  br label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit

_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit:  ; preds = %3, %static_allocas
  ret void
}
```

After rust-lang#6710:

```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) rust-lang#1 {
static_allocas:
  %2 = load i64* %0, align 1
  %3 = load i64* %1, align 1
  store i64 %3, i64* %0, align 1
  store i64 %2, i64* %1, align 1
  ret void
}
```

After this change:

```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) rust-lang#1 {
static_allocas:
  %2 = load i64* %0, align 8
  %3 = load i64* %1, align 8
  store i64 %3, i64* %0, align 8
  store i64 %2, i64* %1, align 8
  ret void
}
```

Another example:

```rust
#[inline(never)]
fn set<T>(x: &mut T, y: T) {
    *x = y;
}
```

Before, with `(int, int)` (align 1):

```llvm
define internal fastcc void @_ZN8set_282517_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) rust-lang#1 {
static_allocas:
  %2 = bitcast { i64, i64 }* %1 to i8*
  %3 = bitcast { i64, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 1, i1 false)
  ret void
}
```

After, with `(int, int)` (align 8):

```llvm
define internal fastcc void @_ZN8set_282617_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) rust-lang#1 {
static_allocas:
  %2 = bitcast { i64, i64 }* %1 to i8*
  %3 = bitcast { i64, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
  ret void
}
```
  • Loading branch information
bors committed May 27, 2013
2 parents 5d04ee8 + e6c04de commit dbc5758
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 80 deletions.
17 changes: 7 additions & 10 deletions src/librustc/middle/trans/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use middle::trans::foreign;
use middle::trans::glue;
use middle::trans::inline;
use middle::trans::machine;
use middle::trans::machine::llsize_of;
use middle::trans::machine::{llalign_of_min, llsize_of};
use middle::trans::meth;
use middle::trans::monomorphize;
use middle::trans::reachable;
Expand Down Expand Up @@ -1442,12 +1442,7 @@ pub fn with_cond(bcx: block, val: ValueRef, f: &fn(block) -> block) -> block {
next_cx
}

pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
n_bytes: ValueRef) {
// FIXME (Related to #1645, I think?): Provide LLVM with better
// alignment information when the alignment is statically known (it must
// be nothing more than a constant int, or LLVM complains -- not even a
// constant element of a tydesc works).
pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
let _icx = cx.insn_ctxt("call_memcpy");
let ccx = cx.ccx();
let key = match ccx.sess.targ_cfg.arch {
Expand All @@ -1462,7 +1457,7 @@ pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
let src_ptr = PointerCast(cx, src, T_ptr(T_i8()));
let dst_ptr = PointerCast(cx, dst, T_ptr(T_i8()));
let size = IntCast(cx, n_bytes, ccx.int_type);
let align = C_i32(1i32);
let align = C_i32(align as i32);
let volatile = C_i1(false);
Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile]);
}
Expand All @@ -1471,8 +1466,10 @@ pub fn memcpy_ty(bcx: block, dst: ValueRef, src: ValueRef, t: ty::t) {
let _icx = bcx.insn_ctxt("memcpy_ty");
let ccx = bcx.ccx();
if ty::type_is_structural(t) {
let llsz = llsize_of(ccx, type_of::type_of(ccx, t));
call_memcpy(bcx, dst, src, llsz);
let llty = type_of::type_of(ccx, t);
let llsz = llsize_of(ccx, llty);
let llalign = llalign_of_min(ccx, llty);
call_memcpy(bcx, dst, src, llsz, llalign as u32);
} else {
Store(bcx, Load(bcx, src), dst);
}
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/middle/trans/closure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ pub fn make_opaque_cbox_take_glue(
[opaque_tydesc, sz],
expr::SaveIn(rval));
let cbox_out = PointerCast(bcx, Load(bcx, rval), llopaquecboxty);
call_memcpy(bcx, cbox_out, cbox_in, sz);
call_memcpy(bcx, cbox_out, cbox_in, sz, 1);
Store(bcx, cbox_out, cboxptr);

// Take the (deeply cloned) type descriptor
Expand Down
96 changes: 67 additions & 29 deletions src/librustc/middle/trans/foreign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ pub fn trans_intrinsic(ccx: @CrateContext,
let llsrcptr = PointerCast(bcx, llsrcptr, T_ptr(T_i8()));

let llsize = llsize_of(ccx, llintype);
call_memcpy(bcx, lldestptr, llsrcptr, llsize);
call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
}
}
~"needs_drop" => {
Expand Down Expand Up @@ -846,44 +846,82 @@ pub fn trans_intrinsic(ccx: @CrateContext,
Store(bcx, morestack_addr, fcx.llretptr.get());
}
~"memcpy32" => {
let dst_ptr = get_param(decl, first_real_arg);
let src_ptr = get_param(decl, first_real_arg + 1);
let size = get_param(decl, first_real_arg + 2);
let align = C_i32(1);
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(
&~"llvm.memcpy.p0i8.p0i8.i32");
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i32");
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
}
~"memcpy64" => {
let dst_ptr = get_param(decl, first_real_arg);
let src_ptr = get_param(decl, first_real_arg + 1);
let size = get_param(decl, first_real_arg + 2);
let align = C_i32(1);
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(
&~"llvm.memcpy.p0i8.p0i8.i64");
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i64");
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
}
~"memmove32" => {
let dst_ptr = get_param(decl, first_real_arg);
let src_ptr = get_param(decl, first_real_arg + 1);
let size = get_param(decl, first_real_arg + 2);
let align = C_i32(1);
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(
&~"llvm.memmove.p0i8.p0i8.i32");
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i32");
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
}
~"memmove64" => {
let dst_ptr = get_param(decl, first_real_arg);
let src_ptr = get_param(decl, first_real_arg + 1);
let size = get_param(decl, first_real_arg + 2);
let align = C_i32(1);
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i64");
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
}
~"memset32" => {
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let val = get_param(decl, first_real_arg + 1);
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i32");
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
}
~"memset64" => {
let tp_ty = substs.tys[0];
let lltp_ty = type_of::type_of(ccx, tp_ty);
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);

let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
let val = get_param(decl, first_real_arg + 1);
let count = get_param(decl, first_real_arg + 2);
let volatile = C_i1(false);
let llfn = *bcx.ccx().intrinsics.get(
&~"llvm.memmove.p0i8.p0i8.i64");
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i64");
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
}
~"sqrtf32" => {
let x = get_param(decl, first_real_arg);
Expand Down
4 changes: 2 additions & 2 deletions src/librustc/middle/trans/tvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ pub fn duplicate_uniq(bcx: block, vptr: ValueRef, vec_ty: ty::t) -> Result {

let data_ptr = get_dataptr(bcx, get_bodyptr(bcx, vptr));
let new_data_ptr = get_dataptr(bcx, get_bodyptr(bcx, newptr));
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill);
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill, 1);

let bcx = if ty::type_needs_drop(bcx.tcx(), unit_ty) {
iter_vec_raw(bcx, new_data_ptr, vec_ty, fill, glue::take_ty)
Expand Down Expand Up @@ -370,7 +370,7 @@ pub fn write_content(bcx: block,
let bytes = s.len() + 1; // copy null-terminator too
let llbytes = C_uint(bcx.ccx(), bytes);
let llcstr = C_cstr(bcx.ccx(), s);
base::call_memcpy(bcx, lldest, llcstr, llbytes);
base::call_memcpy(bcx, lldest, llcstr, llbytes, 1);
return bcx;
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/librustc/middle/trans/type_use.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ pub fn type_uses_for(ccx: @CrateContext, fn_id: def_id, n_tps: uint)
~"visit_tydesc" | ~"forget" | ~"frame_address" |
~"morestack_addr" => 0,

~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" => 0,
~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" |
~"memset32" | ~"memset64" => use_repr,

~"sqrtf32" | ~"sqrtf64" | ~"powif32" | ~"powif64" |
~"sinf32" | ~"sinf64" | ~"cosf32" | ~"cosf64" |
Expand Down
48 changes: 36 additions & 12 deletions src/librustc/middle/typeck/check/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3538,65 +3538,89 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
(0u, ~[], ty::mk_nil_ptr(ccx.tcx))
}
~"memcpy32" => {
(0,
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_imm
}),
ty::mk_u32()
],
ty::mk_nil())
}
~"memcpy64" => {
(0,
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_imm
}),
ty::mk_u64()
],
ty::mk_nil())
}
~"memmove32" => {
(0,
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_imm
}),
ty::mk_u32()
],
ty::mk_nil())
}
~"memmove64" => {
(0,
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_ptr(tcx, ty::mt {
ty: ty::mk_u8(),
ty: param(ccx, 0),
mutbl: ast::m_imm
}),
ty::mk_u64()
],
ty::mk_nil())
}
~"memset32" => {
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_u8(),
ty::mk_u32()
],
ty::mk_nil())
}
~"memset64" => {
(1,
~[
ty::mk_ptr(tcx, ty::mt {
ty: param(ccx, 0),
mutbl: ast::m_mutbl
}),
ty::mk_u8(),
ty::mk_u64()
],
ty::mk_nil())
}
~"sqrtf32" => (0, ~[ ty::mk_f32() ], ty::mk_f32()),
~"sqrtf64" => (0, ~[ ty::mk_f64() ], ty::mk_f64()),
~"powif32" => {
Expand Down
24 changes: 23 additions & 1 deletion src/libstd/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ use sys;
use unstable::intrinsics;

/// Casts the value at `src` to U. The two types must have the same length.
#[cfg(stage0)]
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
let mut dest: U = intrinsics::init();
let mut dest: U = intrinsics::uninit();
{
let dest_ptr: *mut u8 = transmute(&mut dest);
let src_ptr: *u8 = transmute(src);
Expand All @@ -26,6 +27,26 @@ pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
dest
}

#[cfg(target_word_size = "32", not(stage0))]
#[inline(always)]
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
let mut dest: U = intrinsics::uninit();
let dest_ptr: *mut u8 = transmute(&mut dest);
let src_ptr: *u8 = transmute(src);
intrinsics::memcpy32(dest_ptr, src_ptr, sys::size_of::<U>() as u32);
dest
}

#[cfg(target_word_size = "64", not(stage0))]
#[inline(always)]
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
let mut dest: U = intrinsics::uninit();
let dest_ptr: *mut u8 = transmute(&mut dest);
let src_ptr: *u8 = transmute(src);
intrinsics::memcpy64(dest_ptr, src_ptr, sys::size_of::<U>() as u64);
dest
}

/**
* Move a thing into the void
*
Expand All @@ -43,6 +64,7 @@ pub unsafe fn forget<T>(thing: T) { intrinsics::forget(thing); }
* and/or reinterpret_cast when such calls would otherwise scramble a box's
* reference count
*/
#[inline(always)]
pub unsafe fn bump_box_refcount<T>(t: @T) { forget(t); }

/**
Expand Down
Loading

0 comments on commit dbc5758

Please sign in to comment.