diff --git a/NEWS.md b/NEWS.md
index 0d296d6b1b2bb..5d37f11030bc7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -215,6 +215,12 @@ Library improvements
 Compiler/Runtime improvements
 -----------------------------
 
+* `ccall` is now implemented as a macro, removing the need for special code-generator support for Intrinsics.
+
+* `ccall` gained limited support for a `llvmcall` calling-convention. This can replace many uses of `llvmcall` with a simpler, shorter declaration.
+
+* All Intrinsics are now Builtin functions instead and have proper error checking and fall-back static compilation support.
+
 Deprecated or removed
 ---------------------
 
diff --git a/base/Enums.jl b/base/Enums.jl
index 0e1f37cd8f56f..14c9a1b9f3971 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -2,15 +2,15 @@
 
 module Enums
 
-import Core.Intrinsics.box
+import Core.Intrinsics.bitcast
 export Enum, @enum
 
 function basetype end
 
 abstract Enum{T<:Integer}
 
-Base.convert{T<:Integer}(::Type{Integer}, x::Enum{T}) = box(T, x)
-Base.convert{T<:Integer,T2<:Integer}(::Type{T}, x::Enum{T2}) = convert(T, box(T2, x))
+Base.convert{T<:Integer}(::Type{Integer}, x::Enum{T}) = bitcast(T, x)
+Base.convert{T<:Integer,T2<:Integer}(::Type{T}, x::Enum{T2}) = convert(T, bitcast(T2, x))
 Base.write{T<:Integer}(io::IO, x::Enum{T}) = write(io, T(x))
 Base.read{T<:Enum}(io::IO, ::Type{T}) = T(read(io, Enums.basetype(T)))
 
@@ -106,7 +106,7 @@ macro enum(T,syms...)
         Base.@__doc__(bitstype $(sizeof(basetype) * 8) $(esc(typename)) <: Enum{$(basetype)})
         function Base.convert(::Type{$(esc(typename))}, x::Integer)
             $(membershiptest(:x, values)) || enum_argument_error($(Expr(:quote, typename)), x)
-            box($(esc(typename)), convert($(basetype), x))
+            return bitcast($(esc(typename)), convert($(basetype), x))
         end
         Enums.basetype(::Type{$(esc(typename))}) = $(esc(basetype))
         Base.typemin(x::Type{$(esc(typename))}) = $(esc(typename))($lo)
diff --git a/base/base.jl b/base/base.jl
index b349101bff15b..45d8de10d8281 100644
--- a/base/base.jl
+++ b/base/base.jl
@@ -145,8 +145,8 @@ end
 finalize(o::ANY) = ccall(:jl_finalize_th, Void, (Ptr{Void}, Any,),
                          Core.getptls(), o)
 
-gc(full::Bool=true) = ccall(:jl_gc_collect, Void, (Cint,), full)
-gc_enable(on::Bool) = ccall(:jl_gc_enable, Cint, (Cint,), on)!=0
+gc(full::Bool=true) = ccall(:jl_gc_collect, Void, (Int32,), full)
+gc_enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0
 
 immutable Nullable{T}
     hasvalue::Bool
diff --git a/base/bool.jl b/base/bool.jl
index 2420eea845420..25c5a7838b208 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -34,12 +34,12 @@ julia> ![true false true]
 function !(x::Bool)
     ## We need a better heuristic to detect this automatically
     @_pure_meta
-    return box(Bool,not_int(unbox(Bool,x)))
+    return not_int(x)
 end
 
 (~)(x::Bool) = !x
-(&)(x::Bool, y::Bool) = box(Bool,and_int(unbox(Bool,x),unbox(Bool,y)))
-(|)(x::Bool, y::Bool) = box(Bool,or_int(unbox(Bool,x),unbox(Bool,y)))
+(&)(x::Bool, y::Bool) = and_int(x, y)
+(|)(x::Bool, y::Bool) = or_int(x, y)
 
 """
     xor(x, y)
@@ -58,7 +58,7 @@ julia> [true; true; false] ⊻ [true; false; false]
  false
 ```
 """
-xor(x::Bool, y::Bool) = (x!=y)
+xor(x::Bool, y::Bool) = (x != y)
 
 >>(x::Bool, c::Unsigned) = Int(x) >> c
 <<(x::Bool, c::Unsigned) = Int(x) << c
diff --git a/base/boot.jl b/base/boot.jl
index adf1adf383097..f39454059a985 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -116,8 +116,6 @@
 #     runnable::Bool
 # end
 
-import Core.Intrinsics.ccall
-
 export
     # key types
     Any, DataType, Vararg, ANY, NTuple,
diff --git a/base/c.jl b/base/c.jl
index 3d7eab2954469..6d5be6e2e6d36 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -2,7 +2,7 @@
 
 # definitions related to C interface
 
-import Core.Intrinsics: cglobal, box
+import Core.Intrinsics: cglobal, bitcast
 
 cfunction(f::Function, r, a) = ccall(:jl_function_ptr, Ptr{Void}, (Any, Any, Any), f, r, a)
 
@@ -27,18 +27,21 @@ else
     typealias Culong UInt
     typealias Cwchar_t Int32
 end
+
 """
     Clong
 
 Equivalent to the native `signed long` c-type.
 """
 Clong
+
 """
     Culong
 
 Equivalent to the native `unsigned long` c-type.
 """
 Culong
+
 """
     Cwchar_t
 
@@ -58,13 +61,13 @@ if !is_windows()
 end
 
 # construction from typed pointers
-convert{T<:Union{Int8,UInt8}}(::Type{Cstring}, p::Ptr{T}) = box(Cstring, p)
-convert(::Type{Cwstring}, p::Ptr{Cwchar_t}) = box(Cwstring, p)
-convert{T<:Union{Int8,UInt8}}(::Type{Ptr{T}}, p::Cstring) = box(Ptr{T}, p)
-convert(::Type{Ptr{Cwchar_t}}, p::Cwstring) = box(Ptr{Cwchar_t}, p)
+convert{T<:Union{Int8,UInt8}}(::Type{Cstring}, p::Ptr{T}) = bitcast(Cstring, p)
+convert(::Type{Cwstring}, p::Ptr{Cwchar_t}) = bitcast(Cwstring, p)
+convert{T<:Union{Int8,UInt8}}(::Type{Ptr{T}}, p::Cstring) = bitcast(Ptr{T}, p)
+convert(::Type{Ptr{Cwchar_t}}, p::Cwstring) = bitcast(Ptr{Cwchar_t}, p)
 
 # construction from untyped pointers
-convert{T<:Union{Cstring,Cwstring}}(::Type{T}, p::Ptr{Void}) = box(T, p)
+convert{T<:Union{Cstring,Cwstring}}(::Type{T}, p::Ptr{Void}) = bitcast(T, p)
 
 pointer(p::Cstring) = convert(Ptr{UInt8}, p)
 pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
diff --git a/base/checked.jl b/base/checked.jl
index 3fbe17742603a..48f065878e3d1 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -8,7 +8,7 @@ export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
        checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
        add_with_overflow, sub_with_overflow, mul_with_overflow
 
-import Core.Intrinsics: box, unbox,
+import Core.Intrinsics:
        checked_sadd_int, checked_ssub_int, checked_smul_int, checked_sdiv_int,
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
diff --git a/base/ctypes.jl b/base/ctypes.jl
index b96189f42afa9..037cfd74ad638 100644
--- a/base/ctypes.jl
+++ b/base/ctypes.jl
@@ -9,78 +9,104 @@
 Equivalent to the native `unsigned char` c-type (`UInt8`).
 """
 typealias Cuchar UInt8
+
+
 """
     Cshort
 
 Equivalent to the native `signed short` c-type (`Int16`).
 """
 typealias Cshort Int16
+
+
 """
     Cushort
 
 Equivalent to the native `unsigned short` c-type (`UInt16`).
 """
 typealias Cushort UInt16
+
+
 """
     Cint
 
 Equivalent to the native `signed int` c-type (`Int32`).
 """
 typealias Cint Int32
+
+
 """
     Cuint
 
 Equivalent to the native `unsigned int` c-type (`UInt32`).
 """
 typealias Cuint UInt32
+
+
 """
     Cptrdiff_t
 
 Equivalent to the native `ptrdiff_t` c-type (`Int`).
 """
 typealias Cptrdiff_t Int
+
+
 """
     Csize_t
 
 Equivalent to the native `size_t` c-type (`UInt`).
 """
 typealias Csize_t UInt
+
+
 """
     Cssize_t
 
 Equivalent to the native `ssize_t` c-type.
 """
 typealias Cssize_t Int
+
+
 """
     Cintmax_t
 
 Equivalent to the native `intmax_t` c-type (`Int64`).
 """
 typealias Cintmax_t Int64
+
+
 """
     Cuintmax_t
 
 Equivalent to the native `uintmax_t` c-type (`UInt64`).
 """
 typealias Cuintmax_t UInt64
+
+
 """
     Clonglong
 
 Equivalent to the native `signed long long` c-type (`Int64`).
 """
 typealias Clonglong Int64
+
+
 """
     Culonglong
 
 Equivalent to the native `unsigned long long` c-type (`UInt64`).
 """
 typealias Culonglong UInt64
+
+
 """
     Cfloat
 
 Equivalent to the native `float` c-type (`Float32`).
 """
 typealias Cfloat Float32
+
+
 """
     Cdouble
 
diff --git a/base/deprecated.jl b/base/deprecated.jl
index a6d6b80cbdc2a..487fbfa131150 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -1758,4 +1758,9 @@ end)
 @deprecate(SharedArray{T}(filename::AbstractString, ::Type{T}, dims::NTuple, offset; kwargs...),
            SharedArray{T,length(dims)}(filename, dims, offset; kwargs...))
 
+@noinline function is_intrinsic_expr(x::ANY)
+    Base.depwarn("is_intrinsic_expr is deprecated. There are no intrinsic functions anymore.", :is_intrinsic_expr)
+    return false
+end
+
 # End deprecations scheduled for 0.6
diff --git a/base/essentials.jl b/base/essentials.jl
index 3b6ca896e25d1..359b1f07fa55c 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -115,7 +115,7 @@ unsafe_convert{T}(::Type{T}, x::T) = x # unsafe_convert (like convert) defaults
 unsafe_convert{T<:Ptr}(::Type{T}, x::T) = x  # to resolve ambiguity with the next method
 unsafe_convert{P<:Ptr}(::Type{P}, x::Ptr) = convert(P, x)
 
-reinterpret{T}(::Type{T}, x) = box(T, x)
+reinterpret{T}(::Type{T}, x) = bitcast(T, x)
 reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
 reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x)
 
@@ -148,11 +148,11 @@ setindex!(A::Array{Any}, x::ANY, i::Int) = Core.arrayset(A, x, i)
 map(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i=1:length(a) ]
 
 function precompile(f::ANY, args::Tuple)
-    ccall(:jl_compile_hint, Cint, (Any,), Tuple{Core.Typeof(f), args...}) != 0
+    ccall(:jl_compile_hint, Int32, (Any,), Tuple{Core.Typeof(f), args...}) != 0
 end
 
 function precompile(argt::Type)
-    ccall(:jl_compile_hint, Cint, (Any,), argt) != 0
+    ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
 end
 
 """
diff --git a/base/fastmath.jl b/base/fastmath.jl
index bce8637b7fa40..36d975506e48c 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -23,7 +23,9 @@ module FastMath
 
 export @fastmath
 
-import Core.Intrinsics: box, unbox, powi_llvm, sqrt_llvm_fast
+import Core.Intrinsics: powi_llvm, sqrt_llvm_fast, neg_float_fast,
+    add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, rem_float_fast,
+    eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
 
 const fast_op =
     Dict(# basic arithmetic
@@ -131,18 +133,13 @@ end
 
 FloatTypes = Union{Float32, Float64}
 
-sub_fast{T<:FloatTypes}(x::T) = box(T,Base.neg_float_fast(unbox(T,x)))
+sub_fast{T<:FloatTypes}(x::T) = neg_float_fast(x)
 
-add_fast{T<:FloatTypes}(x::T, y::T) =
-    box(T,Base.add_float_fast(unbox(T,x), unbox(T,y)))
-sub_fast{T<:FloatTypes}(x::T, y::T) =
-    box(T,Base.sub_float_fast(unbox(T,x), unbox(T,y)))
-mul_fast{T<:FloatTypes}(x::T, y::T) =
-    box(T,Base.mul_float_fast(unbox(T,x), unbox(T,y)))
-div_fast{T<:FloatTypes}(x::T, y::T) =
-    box(T,Base.div_float_fast(unbox(T,x), unbox(T,y)))
-rem_fast{T<:FloatTypes}(x::T, y::T) =
-    box(T,Base.rem_float_fast(unbox(T,x), unbox(T,y)))
+add_fast{T<:FloatTypes}(x::T, y::T) = add_float_fast(x, y)
+sub_fast{T<:FloatTypes}(x::T, y::T) = sub_float_fast(x, y)
+mul_fast{T<:FloatTypes}(x::T, y::T) = mul_float_fast(x, y)
+div_fast{T<:FloatTypes}(x::T, y::T) = div_float_fast(x, y)
+rem_fast{T<:FloatTypes}(x::T, y::T) = rem_float_fast(x, y)
 
 add_fast{T<:FloatTypes}(x::T, y::T, zs::T...) =
     add_fast(add_fast(x, y), zs...)
@@ -157,14 +154,10 @@ mul_fast{T<:FloatTypes}(x::T, y::T, zs::T...) =
     end
 end
 
-eq_fast{T<:FloatTypes}(x::T, y::T) =
-    Base.eq_float_fast(unbox(T,x),unbox(T,y))
-ne_fast{T<:FloatTypes}(x::T, y::T) =
-    Base.ne_float_fast(unbox(T,x),unbox(T,y))
-lt_fast{T<:FloatTypes}(x::T, y::T) =
-    Base.lt_float_fast(unbox(T,x),unbox(T,y))
-le_fast{T<:FloatTypes}(x::T, y::T) =
-    Base.le_float_fast(unbox(T,x),unbox(T,y))
+eq_fast{T<:FloatTypes}(x::T, y::T) = eq_float_fast(x, y)
+ne_fast{T<:FloatTypes}(x::T, y::T) = ne_float_fast(x, y)
+lt_fast{T<:FloatTypes}(x::T, y::T) = lt_float_fast(x, y)
+le_fast{T<:FloatTypes}(x::T, y::T) = le_float_fast(x, y)
 
 isinf_fast(x) = false
 isfinite_fast(x) = true
@@ -251,12 +244,11 @@ end
 # builtins
 
 pow_fast{T<:FloatTypes}(x::T, y::Integer) = pow_fast(x, Int32(y))
-pow_fast{T<:FloatTypes}(x::T, y::Int32) =
-    box(T, Base.powi_llvm(unbox(T,x), unbox(Int32,y)))
+pow_fast{T<:FloatTypes}(x::T, y::Int32) = Base.powi_llvm(x, y)
 
 # TODO: Change sqrt_llvm intrinsic to avoid nan checking; add nan
 # checking to sqrt in math.jl; remove sqrt_llvm_fast intrinsic
-sqrt_fast{T<:FloatTypes}(x::T) = box(T, Base.sqrt_llvm_fast(unbox(T,x)))
+sqrt_fast{T<:FloatTypes}(x::T) = sqrt_llvm_fast(x)
 
 # libm
 
diff --git a/base/float.jl b/base/float.jl
index c15b614fc0cc6..bbfe7ee333807 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -7,27 +7,27 @@
 
 Positive infinity of type `Float16`.
 """
-const Inf16 = box(Float16,unbox(UInt16,0x7c00))
+const Inf16 = bitcast(Float16, 0x7c00)
 """
     NaN16
 
 A not-a-number value of type `Float16`.
 """
-const NaN16 = box(Float16,unbox(UInt16,0x7e00))
+const NaN16 = bitcast(Float16, 0x7e00)
 """
     Inf32
 
 Positive infinity of type `Float32`.
 """
-const Inf32 = box(Float32,unbox(UInt32,0x7f800000))
+const Inf32 = bitcast(Float32, 0x7f800000)
 """
     NaN32
 
 A not-a-number value of type `Float32`.
 """
-const NaN32 = box(Float32,unbox(UInt32,0x7fc00000))
-const Inf64 = box(Float64,unbox(UInt64,0x7ff0000000000000))
-const NaN64 = box(Float64,unbox(UInt64,0x7ff8000000000000))
+const NaN32 = bitcast(Float32, 0x7fc00000)
+const Inf64 = bitcast(Float64, 0x7ff0000000000000)
+const NaN64 = bitcast(Float64, 0x7ff8000000000000)
 
 """
     Inf
@@ -43,23 +43,23 @@ A not-a-number value of type `Float64`.
 const NaN = NaN64
 
 ## conversions to floating-point ##
-convert(::Type{Float16}, x::Integer) = convert(Float16, convert(Float32,x))
-for t in (Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128)
+convert(::Type{Float16}, x::Integer) = convert(Float16, convert(Float32, x))
+for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
     @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16
 end
 promote_rule(::Type{Float16}, ::Type{Bool}) = Float16
 
-for t1 in (Float32,Float64)
-    for st in (Int8,Int16,Int32,Int64)
+for t1 in (Float32, Float64)
+    for st in (Int8, Int16, Int32, Int64)
         @eval begin
-            convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
-            promote_rule(::Type{$t1}, ::Type{$st}  ) = $t1
+            convert(::Type{$t1}, x::($st)) = sitofp($t1, x)
+            promote_rule(::Type{$t1}, ::Type{$st}) = $t1
         end
     end
-    for ut in (Bool,UInt8,UInt16,UInt32,UInt64)
+    for ut in (Bool, UInt8, UInt16, UInt32, UInt64)
         @eval begin
-            convert(::Type{$t1},x::($ut)) = box($t1,uitofp($t1,unbox($ut,x)))
-            promote_rule(::Type{$t1}, ::Type{$ut}  ) = $t1
+            convert(::Type{$t1}, x::($ut)) = uitofp($t1, x)
+            promote_rule(::Type{$t1}, ::Type{$ut}) = $t1
         end
     end
 end
@@ -229,13 +229,13 @@ for i = 0:255
         shifttable[i|0x100+1] = 13
     end
 end
-#convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
-convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,unbox(Float64,x)))
-convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
+#convert(::Type{Float16}, x::Float32) = fptrunc(Float16, x)
+convert(::Type{Float32}, x::Float64) = fptrunc(Float32, x)
+convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32, x))
 
-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
-convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,unbox(Float32,x)))
-convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
+#convert(::Type{Float32}, x::Float16) = fpext(Float32, x)
+convert(::Type{Float64}, x::Float32) = fpext(Float64, x)
+convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32, x))
 
 convert(::Type{AbstractFloat}, x::Bool)    = convert(Float64, x)
 convert(::Type{AbstractFloat}, x::Int8)    = convert(Float64, x)
@@ -275,14 +275,14 @@ float{T<:Number}(::Type{T}) = typeof(float(zero(T)))
 
 for Ti in (Int8, Int16, Int32, Int64)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptosi($Ti,unbox(Float32,x)))
-        unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptosi($Ti,unbox(Float64,x)))
+        unsafe_trunc(::Type{$Ti}, x::Float32) = fptosi($Ti, x)
+        unsafe_trunc(::Type{$Ti}, x::Float64) = fptosi($Ti, x)
     end
 end
 for Ti in (UInt8, UInt16, UInt32, UInt64)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptoui($Ti,unbox(Float32,x)))
-        unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptoui($Ti,unbox(Float64,x)))
+        unsafe_trunc(::Type{$Ti}, x::Float32) = fptoui($Ti, x)
+        unsafe_trunc(::Type{$Ti}, x::Float64) = fptoui($Ti, x)
     end
 end
 
@@ -333,20 +333,20 @@ ceil{ T<:Integer}(::Type{T}, x::Float16) = ceil(T, Float32(x))
 round{T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,round(x))
 round{T<:Integer}(::Type{T}, x::Float16) = round(T, Float32(x))
 
-trunc(x::Float64) = box(Float64,trunc_llvm(unbox(Float64,x)))
-trunc(x::Float32) = box(Float32,trunc_llvm(unbox(Float32,x)))
+trunc(x::Float64) = trunc_llvm(x)
+trunc(x::Float32) = trunc_llvm(x)
 trunc(x::Float16) = Float16(trunc(Float32(x)))
 
-floor(x::Float64) = box(Float64,floor_llvm(unbox(Float64,x)))
-floor(x::Float32) = box(Float32,floor_llvm(unbox(Float32,x)))
+floor(x::Float64) = floor_llvm(x)
+floor(x::Float32) = floor_llvm(x)
 floor(x::Float16) = Float16(floor(Float32(x)))
 
-ceil(x::Float64) = box(Float64,ceil_llvm(unbox(Float64,x)))
-ceil(x::Float32) = box(Float32,ceil_llvm(unbox(Float32,x)))
+ceil(x::Float64) = ceil_llvm(x)
+ceil(x::Float32) = ceil_llvm(x)
 ceil(x::Float16) = Float16( ceil(Float32(x)))
 
-round(x::Float64) = box(Float64,rint_llvm(unbox(Float64,x)))
-round(x::Float32) = box(Float32,rint_llvm(unbox(Float32,x)))
+round(x::Float64) = rint_llvm(x)
+round(x::Float32) = rint_llvm(x)
 round(x::Float16) = Float16(round(Float32(x)))
 
 ## floating point promotions ##
@@ -360,24 +360,24 @@ widen(::Type{Float32}) = Float64
 _default_type(T::Union{Type{Real},Type{AbstractFloat}}) = Float64
 
 ## floating point arithmetic ##
--(x::Float64) = box(Float64,neg_float(unbox(Float64,x)))
--(x::Float32) = box(Float32,neg_float(unbox(Float32,x)))
--(x::Float16) = reinterpret(Float16, reinterpret(UInt16,x) ⊻ 0x8000)
+-(x::Float64) = neg_float(x)
+-(x::Float32) = neg_float(x)
+-(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) ⊻ 0x8000)
 
-for op in (:+,:-,:*,:/,:\,:^)
+for op in (:+, :-, :*, :/, :\, :^)
     @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b)))
 end
-+(x::Float32, y::Float32) = box(Float32,add_float(unbox(Float32,x),unbox(Float32,y)))
-+(x::Float64, y::Float64) = box(Float64,add_float(unbox(Float64,x),unbox(Float64,y)))
--(x::Float32, y::Float32) = box(Float32,sub_float(unbox(Float32,x),unbox(Float32,y)))
--(x::Float64, y::Float64) = box(Float64,sub_float(unbox(Float64,x),unbox(Float64,y)))
-*(x::Float32, y::Float32) = box(Float32,mul_float(unbox(Float32,x),unbox(Float32,y)))
-*(x::Float64, y::Float64) = box(Float64,mul_float(unbox(Float64,x),unbox(Float64,y)))
-/(x::Float32, y::Float32) = box(Float32,div_float(unbox(Float32,x),unbox(Float32,y)))
-/(x::Float64, y::Float64) = box(Float64,div_float(unbox(Float64,x),unbox(Float64,y)))
-
-muladd(x::Float32, y::Float32, z::Float32) = box(Float32,muladd_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
-muladd(x::Float64, y::Float64, z::Float64) = box(Float64,muladd_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
++(x::Float32, y::Float32) = add_float(x, y)
++(x::Float64, y::Float64) = add_float(x, y)
+-(x::Float32, y::Float32) = sub_float(x, y)
+-(x::Float64, y::Float64) = sub_float(x, y)
+*(x::Float32, y::Float32) = mul_float(x, y)
+*(x::Float64, y::Float64) = mul_float(x, y)
+/(x::Float32, y::Float32) = div_float(x, y)
+/(x::Float64, y::Float64) = div_float(x, y)
+
+muladd(x::Float32, y::Float32, z::Float32) = muladd_float(x, y, z)
+muladd(x::Float64, y::Float64, z::Float64) = muladd_float(x, y, z)
 function muladd(a::Float16, b::Float16, c::Float16)
     Float16(muladd(Float32(a), Float32(b), Float32(c)))
 end
@@ -392,8 +392,8 @@ for func in (:div,:fld,:cld,:rem,:mod)
     end
 end
 
-rem(x::Float32, y::Float32) = box(Float32,rem_float(unbox(Float32,x),unbox(Float32,y)))
-rem(x::Float64, y::Float64) = box(Float64,rem_float(unbox(Float64,x),unbox(Float64,y)))
+rem(x::Float32, y::Float32) = rem_float(x, y)
+rem(x::Float64, y::Float64) = rem_float(x, y)
 
 cld{T<:AbstractFloat}(x::T, y::T) = -fld(-x,y)
 
@@ -420,20 +420,20 @@ function ==(x::Float16, y::Float16)
     end
     return ix == iy
 end
-==(x::Float32, y::Float32) = eq_float(unbox(Float32,x),unbox(Float32,y))
-==(x::Float64, y::Float64) = eq_float(unbox(Float64,x),unbox(Float64,y))
-!=(x::Float32, y::Float32) = ne_float(unbox(Float32,x),unbox(Float32,y))
-!=(x::Float64, y::Float64) = ne_float(unbox(Float64,x),unbox(Float64,y))
-<( x::Float32, y::Float32) = lt_float(unbox(Float32,x),unbox(Float32,y))
-<( x::Float64, y::Float64) = lt_float(unbox(Float64,x),unbox(Float64,y))
-<=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y))
-<=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y))
-
-isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y))
-isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y))
-isless( x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y))
-isless( x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y))
-for op in (:<,:<=,:isless)
+==(x::Float32, y::Float32) = eq_float(x, y)
+==(x::Float64, y::Float64) = eq_float(x, y)
+!=(x::Float32, y::Float32) = ne_float(x, y)
+!=(x::Float64, y::Float64) = ne_float(x, y)
+<( x::Float32, y::Float32) = lt_float(x, y)
+<( x::Float64, y::Float64) = lt_float(x, y)
+<=(x::Float32, y::Float32) = le_float(x, y)
+<=(x::Float64, y::Float64) = le_float(x, y)
+
+isequal(x::Float32, y::Float32) = fpiseq(x, y)
+isequal(x::Float64, y::Float64) = fpiseq(x, y)
+isless( x::Float32, y::Float32) = fpislt(x, y)
+isless( x::Float64, y::Float64) = fpislt(x, y)
+for op in (:<, :<=, :isless)
     @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b))
 end
 
@@ -505,9 +505,9 @@ end
 <=(x::Union{Int32,UInt32}, y::Float32) = Float64(x)<=Float64(y)
 
 
-abs(x::Float16) = reinterpret(Float16, reinterpret(UInt16,x) & 0x7fff)
-abs(x::Float32) = box(Float32,abs_float(unbox(Float32,x)))
-abs(x::Float64) = box(Float64,abs_float(unbox(Float64,x)))
+abs(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) & 0x7fff)
+abs(x::Float32) = abs_float(x)
+abs(x::Float64) = abs_float(x)
 
 """
     isnan(f) -> Bool
@@ -549,8 +549,8 @@ hx(a::UInt64, b::Float64, h::UInt) = hash_uint64((3a + reinterpret(UInt64,b)) -
 const hx_NaN = hx(UInt64(0), NaN, UInt(0  ))
 
 hash(x::UInt64,  h::UInt) = hx(x, Float64(x), h)
-hash(x::Int64,   h::UInt) = hx(reinterpret(UInt64,abs(x)), Float64(x), h)
-hash(x::Float64, h::UInt) = isnan(x) ? (hx_NaN ⊻ h) : hx(box(UInt64,fptoui(unbox(Float64,abs(x)))), x, h)
+hash(x::Int64,   h::UInt) = hx(reinterpret(UInt64, abs(x)), Float64(x), h)
+hash(x::Float64, h::UInt) = isnan(x) ? (hx_NaN ⊻ h) : hx(fptoui(UInt64, abs(x)), x, h)
 
 hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
 hash(x::Float32, h::UInt) = hash(Float64(x), h)
@@ -681,10 +681,10 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
 end
 
 @eval begin
-    issubnormal(x::Float32) = (abs(x) < $(box(Float32,unbox(UInt32,0x00800000)))) & (x!=0)
-    issubnormal(x::Float64) = (abs(x) < $(box(Float64,unbox(UInt64,0x0010000000000000)))) & (x!=0)
+    issubnormal(x::Float32) = (abs(x) < $(bitcast(Float32, 0x00800000))) & (x!=0)
+    issubnormal(x::Float64) = (abs(x) < $(bitcast(Float64, 0x0010000000000000))) & (x!=0)
 
-    typemin(::Type{Float16}) = $(box(Float16,unbox(UInt16,0xfc00)))
+    typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
     typemax(::Type{Float16}) = $(Inf16)
     typemin(::Type{Float32}) = $(-Inf32)
     typemax(::Type{Float32}) = $(Inf32)
@@ -693,33 +693,33 @@ end
     typemin{T<:Real}(x::T) = typemin(T)
     typemax{T<:Real}(x::T) = typemax(T)
 
-    realmin(::Type{Float16}) = $(box(Float16,unbox(UInt16,0x0400)))
-    realmin(::Type{Float32}) = $(box(Float32,unbox(UInt32,0x00800000)))
-    realmin(::Type{Float64}) = $(box(Float64,unbox(UInt64,0x0010000000000000)))
-    realmax(::Type{Float16}) = $(box(Float16,unbox(UInt16,0x7bff)))
-    realmax(::Type{Float32}) = $(box(Float32,unbox(UInt32,0x7f7fffff)))
-    realmax(::Type{Float64}) = $(box(Float64,unbox(UInt64,0x7fefffffffffffff)))
+    realmin(::Type{Float16}) = $(bitcast(Float16, 0x0400))
+    realmin(::Type{Float32}) = $(bitcast(Float32, 0x00800000))
+    realmin(::Type{Float64}) = $(bitcast(Float64, 0x0010000000000000))
+    realmax(::Type{Float16}) = $(bitcast(Float16, 0x7bff))
+    realmax(::Type{Float32}) = $(bitcast(Float32, 0x7f7fffff))
+    realmax(::Type{Float64}) = $(bitcast(Float64, 0x7fefffffffffffff))
     realmin{T<:AbstractFloat}(x::T) = realmin(T)
     realmax{T<:AbstractFloat}(x::T) = realmax(T)
     realmin() = realmin(Float64)
     realmax() = realmax(Float64)
 
-    eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= realmin(x) ? ldexp(eps(typeof(x)),exponent(x)) : nextfloat(zero(x)) : oftype(x,NaN)
-    eps(::Type{Float16}) = $(box(Float16,unbox(UInt16,0x1400)))
-    eps(::Type{Float32}) = $(box(Float32,unbox(UInt32,0x34000000)))
-    eps(::Type{Float64}) = $(box(Float64,unbox(UInt64,0x3cb0000000000000)))
+    eps(x::AbstractFloat) = isfinite(x) ? abs(x) >= realmin(x) ? ldexp(eps(typeof(x)), exponent(x)) : nextfloat(zero(x)) : oftype(x, NaN)
+    eps(::Type{Float16}) = $(bitcast(Float16, 0x1400))
+    eps(::Type{Float32}) = $(bitcast(Float32, 0x34000000))
+    eps(::Type{Float64}) = $(bitcast(Float64, 0x3cb0000000000000))
     eps() = eps(Float64)
 end
 
 ## byte order swaps for arbitrary-endianness serialization/deserialization ##
-bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
-bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))
+bswap(x::Float32) = bswap_int(x)
+bswap(x::Float64) = bswap_int(x)
 
 # bit patterns
-reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x)
-reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x)
-reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64,x)
-reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32,x)
+reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x)
+reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x)
+reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x)
+reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x)
 
 sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
 exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index adb7bb20d1025..0bf968ea86668 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -2,19 +2,19 @@
 
 ## floating-point functions ##
 
-copysign(x::Float64, y::Float64) = box(Float64,copysign_float(unbox(Float64,x),unbox(Float64,y)))
-copysign(x::Float32, y::Float32) = box(Float32,copysign_float(unbox(Float32,x),unbox(Float32,y)))
+copysign(x::Float64, y::Float64) = copysign_float(x, y)
+copysign(x::Float32, y::Float32) = copysign_float(x, y)
 copysign(x::Float32, y::Real) = copysign(x, Float32(y))
 copysign(x::Float64, y::Real) = copysign(x, Float64(y))
 
-flipsign(x::Float64, y::Float64) = box(Float64,xor_int(unbox(Float64,x),and_int(unbox(Float64,y),0x8000000000000000)))
-flipsign(x::Float32, y::Float32) = box(Float32,xor_int(unbox(Float32,x),and_int(unbox(Float32,y),0x80000000)))
+flipsign(x::Float64, y::Float64) = bitcast(Float64, xor_int(bitcast(UInt64, x), and_int(bitcast(UInt64, y), 0x8000000000000000)))
+flipsign(x::Float32, y::Float32) = bitcast(Float32, xor_int(bitcast(UInt32, x), and_int(bitcast(UInt32, y), 0x80000000)))
 flipsign(x::Float32, y::Real) = flipsign(x, Float32(y))
 flipsign(x::Float64, y::Real) = flipsign(x, Float64(y))
 
-signbit(x::Float64) = signbit(reinterpret(Int64,x))
-signbit(x::Float32) = signbit(reinterpret(Int32,x))
-signbit(x::Float16) = signbit(reinterpret(Int16,x))
+signbit(x::Float64) = signbit(bitcast(Int64, x))
+signbit(x::Float32) = signbit(bitcast(Int32, x))
+signbit(x::Float16) = signbit(bitcast(Int16, x))
 
 maxintfloat(::Type{Float64}) = 9007199254740992.
 maxintfloat(::Type{Float32}) = Float32(16777216.)
@@ -22,20 +22,20 @@ maxintfloat(::Type{Float16}) = Float16(2048f0)
 maxintfloat{T<:AbstractFloat}(x::T)  = maxintfloat(T)
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = x-trunc(x) == 0
+isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
 
-num2hex(x::Float16) = hex(reinterpret(UInt16,x), 4)
-num2hex(x::Float32) = hex(box(UInt32,unbox(Float32,x)),8)
-num2hex(x::Float64) = hex(box(UInt64,unbox(Float64,x)),16)
+num2hex(x::Float16) = hex(bitcast(UInt16, x), 4)
+num2hex(x::Float32) = hex(bitcast(UInt32, x), 8)
+num2hex(x::Float64) = hex(bitcast(UInt64, x), 16)
 
 function hex2num(s::AbstractString)
     if length(s) <= 4
-        return box(Float16,unbox(UInt16,parse(UInt16,s,16)))
+        return bitcast(Float16, parse(UInt16, s, 16))
     end
     if length(s) <= 8
-        return box(Float32,unbox(UInt32,parse(UInt32,s,16)))
+        return bitcast(Float32, parse(UInt32, s, 16))
     end
-    return box(Float64,unbox(UInt64,parse(UInt64,s,16)))
+    return bitcast(Float64, parse(UInt64, s, 16))
 end
 
 """
@@ -206,10 +206,8 @@ fma_libm(x::Float32, y::Float32, z::Float32) =
     ccall(("fmaf", libm_name), Float32, (Float32,Float32,Float32), x, y, z)
 fma_libm(x::Float64, y::Float64, z::Float64) =
     ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z)
-fma_llvm(x::Float32, y::Float32, z::Float32) =
-    box(Float32,fma_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
-fma_llvm(x::Float64, y::Float64, z::Float64) =
-    box(Float64,fma_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
+fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z)
+fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)
 # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
 # onto a broken system libm; if so, use openlibm's fma instead
 # 1.0000305f0 = 1 + 1/2^15
diff --git a/base/inference.jl b/base/inference.jl
index f76dfb1e973ae..aaf9fdbc60c70 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -307,10 +307,8 @@ tupletype_tail(t::ANY, n) = Tuple{t.parameters[n:end]...}
 
 #### type-functions for builtins / intrinsics ####
 
-cmp_tfunc = (x::ANY, y::ANY) -> Bool
-
 const _Type_name = Type.body.name
-isType(t::ANY) = isa(t,DataType) && (t::DataType).name === _Type_name
+isType(t::ANY) = isa(t, DataType) && (t::DataType).name === _Type_name
 
 # true if Type is inlineable as constant (is a singleton)
 isconstType(t::ANY) = isType(t) && (isleaftype(t.parameters[1]) || t.parameters[1] === Union{})
@@ -327,11 +325,15 @@ function add_tfunc(f::Function, minarg::Int, maxarg::Int, tfunc::ANY)
     push!(t_ffunc_key, f)
     push!(t_ffunc_val, (minarg, maxarg, tfunc))
 end
-add_tfunc(throw, 1, 1, x->Bottom)
+
+add_tfunc(throw, 1, 1, (x::ANY) -> Bottom)
+
 # the inverse of typeof_tfunc
 function instanceof_tfunc(t::ANY)
     # TODO improve
-    if isa(t, Const)
+    if t === Bottom
+        return t
+    elseif isa(t, Const)
         if isa(t.val, Type)
             return t.val
         end
@@ -340,7 +342,95 @@ function instanceof_tfunc(t::ANY)
     end
     return Any
 end
-add_tfunc(box, 2, 2, (t,v)->instanceof_tfunc(t))
+bitcast_tfunc(t::ANY, x::ANY) = instanceof_tfunc(t)
+math_tfunc(x::ANY) = widenconst(x)
+math_tfunc(x::ANY, y::ANY) = widenconst(x)
+math_tfunc(x::ANY, y::ANY, z::ANY) = widenconst(x)
+fptoui_tfunc(t::ANY, x::ANY) = bitcast_tfunc(t, x)
+fptosi_tfunc(t::ANY, x::ANY) = bitcast_tfunc(t, x)
+function fptoui_tfunc(x::ANY)
+    T = widenconst(x)
+    T === Float64 && return UInt64
+    T === Float32 && return UInt32
+    T === Float16 && return UInt16
+    return Any
+end
+function fptosi_tfunc(x::ANY)
+    T = widenconst(x)
+    T === Float64 && return Int64
+    T === Float32 && return Int32
+    T === Float16 && return Int16
+    return Any
+end
+
+    ## conversion ##
+add_tfunc(bitcast, 2, 2, bitcast_tfunc)
+add_tfunc(sext_int, 2, 2, bitcast_tfunc)
+add_tfunc(zext_int, 2, 2, bitcast_tfunc)
+add_tfunc(trunc_int, 2, 2, bitcast_tfunc)
+add_tfunc(fptoui, 1, 2, fptoui_tfunc)
+add_tfunc(fptosi, 1, 2, fptosi_tfunc)
+add_tfunc(uitofp, 2, 2, bitcast_tfunc)
+add_tfunc(sitofp, 2, 2, bitcast_tfunc)
+add_tfunc(fptrunc, 2, 2, bitcast_tfunc)
+add_tfunc(fpext, 2, 2, bitcast_tfunc)
+    ## checked conversion ##
+add_tfunc(checked_trunc_sint, 2, 2, bitcast_tfunc)
+add_tfunc(checked_trunc_uint, 2, 2, bitcast_tfunc)
+add_tfunc(check_top_bit, 1, 1, math_tfunc)
+    ## arithmetic ##
+add_tfunc(neg_int, 1, 1, math_tfunc)
+add_tfunc(add_int, 2, 2, math_tfunc)
+add_tfunc(sub_int, 2, 2, math_tfunc)
+add_tfunc(mul_int, 2, 2, math_tfunc)
+add_tfunc(sdiv_int, 2, 2, math_tfunc)
+add_tfunc(udiv_int, 2, 2, math_tfunc)
+add_tfunc(srem_int, 2, 2, math_tfunc)
+add_tfunc(urem_int, 2, 2, math_tfunc)
+add_tfunc(neg_float, 1, 1, math_tfunc)
+add_tfunc(add_float, 2, 2, math_tfunc)
+add_tfunc(sub_float, 2, 2, math_tfunc)
+add_tfunc(mul_float, 2, 2, math_tfunc)
+add_tfunc(div_float, 2, 2, math_tfunc)
+add_tfunc(rem_float, 2, 2, math_tfunc)
+add_tfunc(fma_float, 3, 3, math_tfunc)
+add_tfunc(muladd_float, 3, 3, math_tfunc)
+    ## fast arithmetic ##
+add_tfunc(neg_float_fast, 1, 1, math_tfunc)
+add_tfunc(add_float_fast, 2, 2, math_tfunc)
+add_tfunc(sub_float_fast, 2, 2, math_tfunc)
+add_tfunc(mul_float_fast, 2, 2, math_tfunc)
+add_tfunc(div_float_fast, 2, 2, math_tfunc)
+add_tfunc(rem_float_fast, 2, 2, math_tfunc)
+    ## bitwise operators ##
+add_tfunc(and_int, 2, 2, math_tfunc)
+add_tfunc(or_int, 2, 2, math_tfunc)
+add_tfunc(xor_int, 2, 2, math_tfunc)
+add_tfunc(not_int, 1, 1, math_tfunc)
+add_tfunc(shl_int, 2, 2, math_tfunc)
+add_tfunc(lshr_int, 2, 2, math_tfunc)
+add_tfunc(ashr_int, 2, 2, math_tfunc)
+add_tfunc(bswap_int, 1, 1, math_tfunc)
+add_tfunc(ctpop_int, 1, 1, math_tfunc)
+add_tfunc(ctlz_int, 1, 1, math_tfunc)
+add_tfunc(cttz_int, 1, 1, math_tfunc)
+add_tfunc(checked_sdiv_int, 2, 2, math_tfunc)
+add_tfunc(checked_udiv_int, 2, 2, math_tfunc)
+add_tfunc(checked_srem_int, 2, 2, math_tfunc)
+add_tfunc(checked_urem_int, 2, 2, math_tfunc)
+    ## functions ##
+add_tfunc(abs_float, 1, 1, math_tfunc)
+add_tfunc(copysign_float, 2, 2, math_tfunc)
+add_tfunc(flipsign_int, 2, 2, math_tfunc)
+add_tfunc(ceil_llvm, 1, 1, math_tfunc)
+add_tfunc(floor_llvm, 1, 1, math_tfunc)
+add_tfunc(trunc_llvm, 1, 1, math_tfunc)
+add_tfunc(rint_llvm, 1, 1, math_tfunc)
+add_tfunc(sqrt_llvm, 1, 1, math_tfunc)
+add_tfunc(powi_llvm, 2, 2, math_tfunc)
+add_tfunc(sqrt_llvm_fast, 1, 1, math_tfunc)
+    ## same-type comparisons ##
+cmp_tfunc(x::ANY, y::ANY) = Bool
 add_tfunc(eq_int, 2, 2, cmp_tfunc)
 add_tfunc(ne_int, 2, 2, cmp_tfunc)
 add_tfunc(slt_int, 2, 2, cmp_tfunc)
@@ -358,27 +448,15 @@ add_tfunc(ne_float_fast, 2, 2, cmp_tfunc)
 add_tfunc(lt_float_fast, 2, 2, cmp_tfunc)
 add_tfunc(le_float_fast, 2, 2, cmp_tfunc)
 
-chk_tfunc = (x,y) -> Tuple{widenconst(x),Bool}
+    ## checked arithmetic ##
+chk_tfunc(x::ANY, y::ANY) = Tuple{widenconst(x), Bool}
 add_tfunc(checked_sadd_int, 2, 2, chk_tfunc)
 add_tfunc(checked_uadd_int, 2, 2, chk_tfunc)
 add_tfunc(checked_ssub_int, 2, 2, chk_tfunc)
 add_tfunc(checked_usub_int, 2, 2, chk_tfunc)
 add_tfunc(checked_smul_int, 2, 2, chk_tfunc)
 add_tfunc(checked_umul_int, 2, 2, chk_tfunc)
-
-const _Ref_name = Ref.body.name
-add_tfunc(Core.Intrinsics.ccall, 3, IInf,
-    function(fptr::ANY, rt::ANY, at::ANY, a...)
-        t = instanceof_tfunc(rt)
-        if isa(t, DataType) && (t::DataType).name === _Ref_name
-            t = t.parameters[1]
-            if t === Any
-                return Union{} # a return type of Box{Any} is invalid
-            end
-            return t
-        end
-        return t
-    end)
+    ## other, misc intrinsics ##
 add_tfunc(Core.Intrinsics.llvmcall, 3, IInf,
     (fptr::ANY, rt::ANY, at::ANY, a...) -> instanceof_tfunc(rt))
 cglobal_tfunc(fptr::ANY) = Ptr{Void}
@@ -1407,20 +1485,22 @@ function abstract_eval_call(e::Expr, vtypes::VarTable, sv::InferenceState)
     return abstract_call(f, e.args, argtypes, vtypes, sv)
 end
 
+const _Ref_name = Ref.body.name
+
 function abstract_eval(e::ANY, vtypes::VarTable, sv::InferenceState)
-    if isa(e,QuoteNode)
+    if isa(e, QuoteNode)
         return abstract_eval_constant((e::QuoteNode).value)
-    elseif isa(e,SSAValue)
+    elseif isa(e, SSAValue)
         return abstract_eval_ssavalue(e::SSAValue, sv.src)
-    elseif isa(e,Slot)
+    elseif isa(e, Slot)
         return vtypes[slot_id(e)].typ
-    elseif isa(e,Symbol)
+    elseif isa(e, Symbol)
         return abstract_eval_global(sv.mod, e)
     elseif isa(e,GlobalRef)
         return abstract_eval_global(e.mod, e.name)
     end
 
-    if !isa(e,Expr)
+    if !isa(e, Expr)
         return abstract_eval_constant(e)
     end
     e = e::Expr
@@ -1431,17 +1511,47 @@ function abstract_eval(e::ANY, vtypes::VarTable, sv::InferenceState)
     elseif e.head === :new
         t = instanceof_tfunc(abstract_eval(e.args[1], vtypes, sv))
         for i = 2:length(e.args)
-            abstract_eval(e.args[i], vtypes, sv)
+            if abstract_eval(e.args[i], vtypes, sv) === Bottom
+                rt = Bottom
+            end
         end
     elseif e.head === :&
         abstract_eval(e.args[1], vtypes, sv)
         t = Any
+    elseif e.head === :foreigncall
+        rt = e.args[2]
+        if isdefined(sv.linfo, :def)
+            spsig = sv.linfo.def.sig
+            if isa(spsig, UnionAll)
+                env = data_pointer_from_objref(sv.linfo.sparam_vals) + sizeof(Ptr{Void})
+                rt = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, env)
+            end
+        end
+        abstract_eval(e.args[1], vtypes, sv)
+        for i = 3:length(e.args)
+            if abstract_eval(e.args[i], vtypes, sv) === Bottom
+                t = Bottom
+            end
+        end
+        if rt === Bottom
+            t = Bottom
+        elseif isa(rt, Type)
+            t = rt
+            if isa(t, DataType) && (t::DataType).name === _Ref_name
+                t = t.parameters[1]
+                if t === Any
+                    t = Bottom # a return type of Box{Any} is invalid
+                end
+            end
+        else
+            t = Any
+        end
     elseif e.head === :static_parameter
         n = e.args[1]
         t = Any
         if n <= length(sv.sp)
             val = sv.sp[n]
-            if isa(val,TypeVar)
+            if isa(val, TypeVar)
                 # static param bound to typevar
                 # if the tvar does not refer to anything more specific than Any,
                 # the static param might actually be an integer, symbol, etc.
@@ -1509,17 +1619,17 @@ type StateUpdate
 end
 
 function abstract_interpret(e::ANY, vtypes::VarTable, sv::InferenceState)
-    !isa(e,Expr) && return vtypes
+    !isa(e, Expr) && return vtypes
     # handle assignment
     if e.head === :(=)
         t = abstract_eval(e.args[2], vtypes, sv)
         t === Bottom && return ()
         lhs = e.args[1]
-        if isa(lhs,Slot) || isa(lhs,SSAValue)
+        if isa(lhs, Slot) || isa(lhs, SSAValue)
             # don't bother for GlobalRef
-            return StateUpdate(lhs, VarState(t,false), vtypes)
+            return StateUpdate(lhs, VarState(t, false), vtypes)
         end
-    elseif e.head === :call
+    elseif e.head === :call || e.head === :foreigncall
         t = abstract_eval(e, vtypes, sv)
         t === Bottom && return ()
     elseif e.head === :gotoifnot
@@ -1527,8 +1637,8 @@ function abstract_interpret(e::ANY, vtypes::VarTable, sv::InferenceState)
         t === Bottom && return ()
     elseif e.head === :method
         fname = e.args[1]
-        if isa(fname,Slot)
-            return StateUpdate(fname, VarState(Any,false), vtypes)
+        if isa(fname, Slot)
+            return StateUpdate(fname, VarState(Any, false), vtypes)
         end
     end
     return vtypes
@@ -2633,7 +2743,7 @@ end
 
 # replace slots 1:na with argexprs, static params with spvals, and increment
 # other slots by offset.
-function substitute!(e::ANY, na, argexprs, spvals, offset)
+function substitute!(e::ANY, na::Int, argexprs::Vector{Any}, spsig::ANY, spvals::Vector{Any}, offset::Int)
     if isa(e, Slot)
         id = slot_id(e)
         if 1 <= id <= na
@@ -2650,16 +2760,30 @@ function substitute!(e::ANY, na, argexprs, spvals, offset)
         end
     end
     if isa(e, NewvarNode)
-        return NewvarNode(substitute!(e.slot, na, argexprs, spvals, offset))
+        return NewvarNode(substitute!(e.slot, na, argexprs, spsig, spvals, offset))
     end
     if isa(e, Expr)
         e = e::Expr
         head = e.head
         if head === :static_parameter
             return spvals[e.args[1]]
+        elseif head === :foreigncall
+            for i = 1:length(e.args)
+                if i == 2
+                    e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
+                elseif i == 3
+                    argtuple = Any[
+                        ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
+                        for argt
+                        in e.args[3] ]
+                    e.args[3] = svec(argtuple...)
+                else
+                    e.args[i] = substitute!(e.args[i], na, argexprs, spsig, spvals, offset)
+                end
+            end
         elseif !is_meta_expr_head(head)
             for i = 1:length(e.args)
-                e.args[i] = substitute!(e.args[i], na, argexprs, spvals, offset)
+                e.args[i] = substitute!(e.args[i], na, argexprs, spsig, spvals, offset)
             end
         end
     end
@@ -2723,7 +2847,6 @@ function is_pure_builtin(f::ANY)
     if isa(f,IntrinsicFunction)
         if !(f === Intrinsics.pointerref || # this one is volatile
              f === Intrinsics.pointerset || # this one is never effect-free
-             f === Intrinsics.ccall ||      # this one is never effect-free
              f === Intrinsics.llvmcall ||   # this one is never effect-free
              f === Intrinsics.checked_trunc_sint ||
              f === Intrinsics.checked_trunc_uint ||
@@ -3022,13 +3145,6 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
             return (argexprs[2],())
         end
     end
-    if length(atypes)==3 && f === unbox
-        at3 = widenconst(atypes[3])
-        if isa(at3,DataType) && !at3.mutable && at3.layout != C_NULL && datatype_pointerfree(at3)
-            # remove redundant unbox
-            return (argexprs[3],())
-        end
-    end
     topmod = _topmod(sv)
     # special-case inliners for known pure functions that compute types
     if sv.params.inlining
@@ -3152,14 +3268,14 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     end
 
     argexprs0 = argexprs
-    na = method.nargs
+    na = Int(method.nargs)
     # check for vararg function
     isva = false
     if na > 0 && method.isva
-        @assert length(argexprs) >= na-1
+        @assert length(argexprs) >= na - 1
         # construct tuple-forming expression for argument tail
         vararg = mk_tuplecall(argexprs[na:end], sv)
-        argexprs = Any[argexprs[1:(na-1)]..., vararg]
+        argexprs = Any[argexprs[1:(na - 1)]..., vararg]
         isva = true
     elseif na != length(argexprs)
         # we have a method match only because an earlier
@@ -3308,7 +3424,7 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     prelude_stmts = []
     stmts_free = true # true = all entries of stmts are effect_free
 
-    for i=na:-1:1 # stmts_free needs to be calculated in reverse-argument order
+    for i = na:-1:1 # stmts_free needs to be calculated in reverse-argument order
         #args_i = args[i]
         aei = argexprs[i]
         aeitype = argtype = widenconst(exprtype(aei, sv.src, sv.mod))
@@ -3361,10 +3477,10 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     end
 
     # ok, substitute argument expressions for argument names in the body
-    body = substitute!(body, na, argexprs, spvals, length(sv.src.slotnames) - na)
-    append!(sv.src.slotnames, src.slotnames[na+1:end])
-    append!(sv.src.slottypes, src.slottypes[na+1:end])
-    append!(sv.src.slotflags, src.slotflags[na+1:end])
+    body = substitute!(body, na, argexprs, method.sig, spvals, length(sv.src.slotnames) - na)
+    append!(sv.src.slotnames, src.slotnames[(na + 1):end])
+    append!(sv.src.slottypes, src.slottypes[(na + 1):end])
+    append!(sv.src.slotflags, src.slotflags[(na + 1):end])
 
     # make labels / goto statements unique
     # relocate inlining information
@@ -3598,32 +3714,30 @@ const corenumtype = Union{Int32, Int64, Float32, Float64}
 function inlining_pass(e::Expr, sv::InferenceState)
     if e.head === :method
         # avoid running the inlining pass on function definitions
-        return (e,())
+        return (e, ())
     end
     eargs = e.args
-    if length(eargs)<1
-        return (e,())
+    if length(eargs) < 1
+        return (e, ())
     end
     stmts = []
     arg1 = eargs[1]
+    isccall = false
+    i0 = 1
     # don't inline first (global) arguments of ccall, as this needs to be evaluated
     # by the interpreter and inlining might put in something it can't handle,
     # like another ccall (or try to move the variables out into the function)
-    if is_known_call(e, Core.Intrinsics.ccall, sv.src, sv.mod)
-        # 4 is rewritten to 2 below to handle the callee.
-        i0 = 4
+    if e.head === :foreigncall
+        # 3 is rewritten to 1 below to handle the callee.
+        i0 = 3
         isccall = true
     elseif is_known_call(e, Core.Intrinsics.llvmcall, sv.src, sv.mod)
         i0 = 5
-        isccall = false
-    else
-        i0 = 1
-        isccall = false
     end
     has_stmts = false # needed to preserve order-of-execution
-    for _i=length(eargs):-1:i0
-        if isccall && _i == 4
-            i = 2
+    for _i = length(eargs):-1:i0
+        if isccall && _i == 3
+            i = 1
             isccallee = true
         else
             i = _i
@@ -3682,17 +3796,17 @@ function inlining_pass(e::Expr, sv::InferenceState)
             end
         end
     end
-    if e.head !== :call
-        return (e, stmts)
-    end
     if isccall
         le = length(eargs)
-        for i=5:2:le-1
-            if eargs[i] === eargs[i+1]
-                eargs[i+1] = 0
+        for i = 4:2:(le - 1)
+            if eargs[i] === eargs[i + 1]
+                eargs[i + 1] = 0
             end
         end
     end
+    if e.head !== :call
+        return (e, stmts)
+    end
 
     ft = exprtype(arg1, sv.src, sv.mod)
     if isa(ft, Const)
diff --git a/base/int.jl b/base/int.jl
index 4792f0c4ca833..daba5620e8c06 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -7,12 +7,12 @@
 # they are also used elsewhere where Int128/UInt128 support is separated out,
 # such as in hashing2.jl
 
-const BitSigned64_types   = (Int8,Int16,Int32,Int64)
-const BitUnsigned64_types = (UInt8,UInt16,UInt32,UInt64)
-const BitInteger64_types  = (BitSigned64_types...,BitUnsigned64_types...)
-const BitSigned_types     = (BitSigned64_types...,Int128)
-const BitUnsigned_types   = (BitUnsigned64_types...,UInt128)
-const BitInteger_types    = (BitSigned_types...,BitUnsigned_types...)
+const BitSigned64_types   = (Int8, Int16, Int32, Int64)
+const BitUnsigned64_types = (UInt8, UInt16, UInt32, UInt64)
+const BitInteger64_types  = (BitSigned64_types..., BitUnsigned64_types...)
+const BitSigned_types     = (BitSigned64_types..., Int128)
+const BitUnsigned_types   = (BitUnsigned64_types..., UInt128)
+const BitInteger_types    = (BitSigned_types..., BitUnsigned_types...)
 
 typealias BitSigned64   Union{BitSigned64_types...}
 typealias BitUnsigned64 Union{BitUnsigned64_types...}
@@ -20,22 +20,22 @@ typealias BitInteger64  Union{BitInteger64_types...}
 typealias BitSigned     Union{BitSigned_types...}
 typealias BitUnsigned   Union{BitUnsigned_types...}
 typealias BitInteger    Union{BitInteger_types...}
-typealias BitSigned64T  Union{Type{Int8},Type{Int16},Type{Int32},Type{Int64}}
-typealias BitUnsigned64T Union{Type{UInt8},Type{UInt16},Type{UInt32},Type{UInt64}}
+typealias BitSigned64T  Union{Type{Int8}, Type{Int16}, Type{Int32}, Type{Int64}}
+typealias BitUnsigned64T Union{Type{UInt8}, Type{UInt16}, Type{UInt32}, Type{UInt64}}
 
 ## integer comparisons ##
 
-<{T<:BitSigned}(x::T, y::T)  = slt_int(unbox(T,x),unbox(T,y))
+<{T<:BitSigned}(x::T, y::T)  = slt_int(x, y)
 
--{T<:BitInteger}(x::T)       = box(T, neg_int(unbox(T,x)))
--{T<:BitInteger}(x::T, y::T) = box(T, sub_int(unbox(T,x),unbox(T,y)))
-+{T<:BitInteger}(x::T, y::T) = box(T, add_int(unbox(T,x),unbox(T,y)))
-*{T<:BitInteger}(x::T, y::T) = box(T, mul_int(unbox(T,x),unbox(T,y)))
+-{T<:BitInteger}(x::T)       = neg_int(x)
+-{T<:BitInteger}(x::T, y::T) = sub_int(x, y)
++{T<:BitInteger}(x::T, y::T) = add_int(x, y)
+*{T<:BitInteger}(x::T, y::T) = mul_int(x, y)
 
-inv(x::Integer) = float(one(x))/float(x)
-/{T<:Integer}(x::T, y::T) = float(x)/float(y)
+inv(x::Integer) = float(one(x)) / float(x)
+/{T<:Integer}(x::T, y::T) = float(x) / float(y)
 # skip promotion for system integer types
-/(x::BitInteger, y::BitInteger) = float(x)/float(y)
+/(x::BitInteger, y::BitInteger) = float(x) / float(y)
 
 """
     isodd(x::Integer) -> Bool
@@ -50,7 +50,7 @@ julia> isodd(10)
 false
 ```
 """
-isodd(n::Integer) = rem(n,2) != 0
+isodd(n::Integer) = rem(n, 2) != 0
 
 """
     iseven(x::Integer) -> Bool
@@ -70,19 +70,19 @@ iseven(n::Integer) = !isodd(n)
 signbit(x::Integer) = x < 0
 signbit(x::Unsigned) = false
 
-flipsign{T<:BitSigned}(x::T, y::T) = box(T,flipsign_int(unbox(T,x),unbox(T,y)))
+flipsign{T<:BitSigned}(x::T, y::T) = flipsign_int(x, y)
 
-flipsign(x::Signed, y::Signed)  = convert(typeof(x), flipsign(promote_noncircular(x,y)...))
-flipsign(x::Signed, y::Float16) = flipsign(x, reinterpret(Int16,y))
-flipsign(x::Signed, y::Float32) = flipsign(x, reinterpret(Int32,y))
-flipsign(x::Signed, y::Float64) = flipsign(x, reinterpret(Int64,y))
-flipsign(x::Signed, y::Real)    = flipsign(x, -oftype(x,signbit(y)))
+flipsign(x::Signed, y::Signed)  = convert(typeof(x), flipsign(promote_noncircular(x, y)...))
+flipsign(x::Signed, y::Float16) = flipsign(x, bitcast(Int16, y))
+flipsign(x::Signed, y::Float32) = flipsign(x, bitcast(Int32, y))
+flipsign(x::Signed, y::Float64) = flipsign(x, bitcast(Int64, y))
+flipsign(x::Signed, y::Real)    = flipsign(x, -oftype(x, signbit(y)))
 
 copysign(x::Signed, y::Signed)  = flipsign(x, x ⊻ y)
-copysign(x::Signed, y::Float16) = copysign(x, reinterpret(Int16,y))
-copysign(x::Signed, y::Float32) = copysign(x, reinterpret(Int32,y))
-copysign(x::Signed, y::Float64) = copysign(x, reinterpret(Int64,y))
-copysign(x::Signed, y::Real)    = copysign(x, -oftype(x,signbit(y)))
+copysign(x::Signed, y::Float16) = copysign(x, bitcast(Int16, y))
+copysign(x::Signed, y::Float32) = copysign(x, bitcast(Int32, y))
+copysign(x::Signed, y::Float64) = copysign(x, bitcast(Int64, y))
+copysign(x::Signed, y::Real)    = copysign(x, -oftype(x, signbit(y)))
 
 """
     abs(x)
@@ -102,20 +102,20 @@ abs(x::Signed) = flipsign(x,x)
 
 ~(n::Integer) = -n-1
 
-unsigned(x::Signed) = reinterpret(typeof(convert(Unsigned,zero(x))), x)
+unsigned(x::Signed) = reinterpret(typeof(convert(Unsigned, zero(x))), x)
 unsigned(x::Bool) = convert(Unsigned, x)
 unsigned(x) = convert(Unsigned, x)
-signed(x::Unsigned) = reinterpret(typeof(convert(Signed,zero(x))), x)
+signed(x::Unsigned) = reinterpret(typeof(convert(Signed, zero(x))), x)
 signed(x) = convert(Signed, x)
 
-div(x::Signed, y::Unsigned) = flipsign(signed(div(unsigned(abs(x)),y)),x)
-div(x::Unsigned, y::Signed) = unsigned(flipsign(signed(div(x,unsigned(abs(y)))),y))
+div(x::Signed, y::Unsigned) = flipsign(signed(div(unsigned(abs(x)), y)), x)
+div(x::Unsigned, y::Signed) = unsigned(flipsign(signed(div(x, unsigned(abs(y)))), y))
 
-rem(x::Signed, y::Unsigned) = flipsign(signed(rem(unsigned(abs(x)),y)),x)
-rem(x::Unsigned, y::Signed) = rem(x,unsigned(abs(y)))
+rem(x::Signed, y::Unsigned) = flipsign(signed(rem(unsigned(abs(x)), y)), x)
+rem(x::Unsigned, y::Signed) = rem(x, unsigned(abs(y)))
 
-fld(x::Signed, y::Unsigned) = div(x,y)-(signbit(x)&(rem(x,y)!=0))
-fld(x::Unsigned, y::Signed) = div(x,y)-(signbit(y)&(rem(x,y)!=0))
+fld(x::Signed, y::Unsigned) = div(x, y) - (signbit(x) & (rem(x, y) != 0))
+fld(x::Unsigned, y::Signed) = div(x, y) - (signbit(y) & (rem(x, y) != 0))
 
 
 """
@@ -130,50 +130,50 @@ x == fld(x,y)*y + mod(x,y)
 """
 function mod{T<:Integer}(x::T, y::T)
     y == -1 && return T(0)   # avoid potential overflow in fld
-    x - fld(x,y)*y
+    return x - fld(x, y) * y
 end
-mod(x::Signed, y::Unsigned) = rem(y+unsigned(rem(x,y)),y)
-mod(x::Unsigned, y::Signed) = rem(y+signed(rem(x,y)),y)
-mod{T<:Unsigned}(x::T, y::T) = rem(x,y)
+mod(x::Signed, y::Unsigned) = rem(y + unsigned(rem(x, y)), y)
+mod(x::Unsigned, y::Signed) = rem(y + signed(rem(x, y)), y)
+mod{T<:Unsigned}(x::T, y::T) = rem(x, y)
 
-cld(x::Signed, y::Unsigned) = div(x,y)+(!signbit(x)&(rem(x,y)!=0))
-cld(x::Unsigned, y::Signed) = div(x,y)+(!signbit(y)&(rem(x,y)!=0))
+cld(x::Signed, y::Unsigned) = div(x, y) + (!signbit(x) & (rem(x, y) != 0))
+cld(x::Unsigned, y::Signed) = div(x, y) + (!signbit(y) & (rem(x, y) != 0))
 
 # Don't promote integers for div/rem/mod since there is no danger of overflow,
 # while there is a substantial performance penalty to 64-bit promotion.
-div{T<:BitSigned64}(x::T, y::T) = box(T,checked_sdiv_int(unbox(T,x),unbox(T,y)))
-rem{T<:BitSigned64}(x::T, y::T) = box(T,checked_srem_int(unbox(T,x),unbox(T,y)))
-div{T<:BitUnsigned64}(x::T, y::T) = box(T,checked_udiv_int(unbox(T,x),unbox(T,y)))
-rem{T<:BitUnsigned64}(x::T, y::T) = box(T,checked_urem_int(unbox(T,x),unbox(T,y)))
+div{T<:BitSigned64}(x::T, y::T) = checked_sdiv_int(x, y)
+rem{T<:BitSigned64}(x::T, y::T) = checked_srem_int(x, y)
+div{T<:BitUnsigned64}(x::T, y::T) = checked_udiv_int(x, y)
+rem{T<:BitUnsigned64}(x::T, y::T) = checked_urem_int(x, y)
 
 
 # fld(x,y) == div(x,y) - ((x>=0) != (y>=0) && rem(x,y) != 0 ? 1 : 0)
 fld{T<:Unsigned}(x::T, y::T) = div(x,y)
 function fld{T<:Integer}(x::T, y::T)
-    d = div(x,y)
-    d - (signbit(x ⊻ y) & (d*y!=x))
+    d = div(x, y)
+    return d - (signbit(x ⊻ y) & (d * y != x))
 end
 
 # cld(x,y) = div(x,y) + ((x>0) == (y>0) && rem(x,y) != 0 ? 1 : 0)
 function cld{T<:Unsigned}(x::T, y::T)
-    d = div(x,y)
-    d + (d*y!=x)
+    d = div(x, y)
+    return d + (d * y != x)
 end
 function cld{T<:Integer}(x::T, y::T)
-    d = div(x,y)
-    d + (((x>0) == (y>0)) & (d*y!=x))
+    d = div(x, y)
+    return d + (((x > 0) == (y > 0)) & (d * y != x))
 end
 
 ## integer bitwise operations ##
 
-(~){T<:BitInteger}(x::T)       = box(T,not_int(unbox(T,x)))
-(&){T<:BitInteger}(x::T, y::T) = box(T,and_int(unbox(T,x),unbox(T,y)))
-(|){T<:BitInteger}(x::T, y::T) = box(T, or_int(unbox(T,x),unbox(T,y)))
-xor{T<:BitInteger}(x::T, y::T) = box(T,xor_int(unbox(T,x),unbox(T,y)))
+(~){T<:BitInteger}(x::T)       = not_int(x)
+(&){T<:BitInteger}(x::T, y::T) = and_int(x, y)
+(|){T<:BitInteger}(x::T, y::T) = or_int(x, y)
+xor{T<:BitInteger}(x::T, y::T) = xor_int(x, y)
 
-bswap{T<:Union{Int8,UInt8}}(x::T) = x
+bswap{T<:Union{Int8, UInt8}}(x::T) = x
 bswap{T<:Union{Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128}}(x::T) =
-    box(T,bswap_int(unbox(T,x)))
+    bswap_int(x)
 
 """
     count_ones(x::Integer) -> Integer
@@ -185,7 +185,7 @@ julia> count_ones(7)
 3
 ```
 """
-count_ones{T<:BitInteger}(x::T) = Int(box(T,ctpop_int(unbox(T,x))))
+count_ones{T<:BitInteger}(x::T) = Int(ctpop_int(x))
 
 """
     leading_zeros(x::Integer) -> Integer
@@ -197,7 +197,7 @@ julia> leading_zeros(Int32(1))
 31
 ```
 """
-leading_zeros{T<:BitInteger}(x::T) = Int(box(T,ctlz_int(unbox(T,x))))
+leading_zeros{T<:BitInteger}(x::T) = Int(ctlz_int(x))
 
 """
     trailing_zeros(x::Integer) -> Integer
@@ -209,7 +209,7 @@ julia> trailing_zeros(2)
 1
 ```
 """
-trailing_zeros{T<:BitInteger}(x::T) = Int(box(T,cttz_int(unbox(T,x))))
+trailing_zeros{T<:BitInteger}(x::T) = Int(cttz_int(x))
 
 """
     count_zeros(x::Integer) -> Integer
@@ -249,9 +249,9 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 ## integer comparisons ##
 
-<{T<:BitUnsigned}(x::T, y::T)  = ult_int(unbox(T,x),unbox(T,y))
-<={T<:BitSigned}(x::T, y::T)   = sle_int(unbox(T,x),unbox(T,y))
-<={T<:BitUnsigned}(x::T, y::T) = ule_int(unbox(T,x),unbox(T,y))
+<{T<:BitUnsigned}(x::T, y::T)  = ult_int(x, y)
+<={T<:BitSigned}(x::T, y::T)   = sle_int(x, y)
+<={T<:BitUnsigned}(x::T, y::T) = ule_int(x, y)
 
 ==(x::Signed,   y::Unsigned) = (x >= 0) & (unsigned(x) == y)
 ==(x::Unsigned, y::Signed  ) = (y >= 0) & (x == unsigned(y))
@@ -263,14 +263,10 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 ## integer shifts ##
 
 # unsigned shift counts always shift in the same direction
->>{T<:BitSigned,S<:BitUnsigned}(x::T, y::S) =
-    box(T,ashr_int(unbox(T,x),unbox(S,y)))
->>{T<:BitUnsigned,S<:BitUnsigned}(x::T, y::S) =
-    box(T,lshr_int(unbox(T,x),unbox(S,y)))
-<<{T<:BitInteger,S<:BitUnsigned}(x::T,  y::S) =
-    box(T, shl_int(unbox(T,x),unbox(S,y)))
->>>{T<:BitInteger,S<:BitUnsigned}(x::T, y::S) =
-    box(T,lshr_int(unbox(T,x),unbox(S,y)))
+>>{T<:BitSigned,S<:BitUnsigned}(x::T, y::S) = ashr_int(x, y)
+>>{T<:BitUnsigned,S<:BitUnsigned}(x::T, y::S) = lshr_int(x, y)
+<<{T<:BitInteger,S<:BitUnsigned}(x::T,  y::S) = shl_int(x, y)
+>>>{T<:BitInteger,S<:BitUnsigned}(x::T, y::S) = lshr_int(x, y)
 # signed shift counts can shift in either direction
 # note: this early during bootstrap, `>=` is not yet available
 # note: we only define Int shift counts here; the generic case is handled later
@@ -283,50 +279,50 @@ trailing_ones(x::Integer) = trailing_zeros(~x)
 
 ## integer conversions ##
 
-for to in BitInteger_types, from in (BitInteger_types...,Bool)
+for to in BitInteger_types, from in (BitInteger_types..., Bool)
     if !(to === from)
         if to.size < from.size
             if issubtype(to, Signed)
                 if issubtype(from, Unsigned)
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,checked_trunc_sint($to,check_top_bit(unbox($from,x))))
+                        checked_trunc_sint($to, check_top_bit(x))
                 else
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,checked_trunc_sint($to,unbox($from,x)))
+                        checked_trunc_sint($to, x)
                 end
             else
                 @eval convert(::Type{$to}, x::($from)) =
-                    box($to,checked_trunc_uint($to,unbox($from,x)))
+                    checked_trunc_uint($to, x)
             end
-            @eval rem(x::($from), ::Type{$to}) = box($to,trunc_int($to,unbox($from,x)))
+            @eval rem(x::($from), ::Type{$to}) = trunc_int($to, x)
         elseif from.size < to.size || from === Bool
             if issubtype(from, Signed)
                 if issubtype(to, Unsigned)
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,sext_int($to,check_top_bit(unbox($from,x))))
+                        sext_int($to, check_top_bit(x))
                 else
                     @eval convert(::Type{$to}, x::($from)) =
-                        box($to,sext_int($to,unbox($from,x)))
+                        sext_int($to, x)
                 end
-                @eval rem(x::($from), ::Type{$to}) = box($to,sext_int($to,unbox($from,x)))
+                @eval rem(x::($from), ::Type{$to}) = sext_int($to, x)
             else
-                @eval convert(::Type{$to}, x::($from)) = box($to,zext_int($to,unbox($from,x)))
-                @eval rem(x::($from), ::Type{$to}) = convert($to,x)
+                @eval convert(::Type{$to}, x::($from)) = zext_int($to, x)
+                @eval rem(x::($from), ::Type{$to}) = convert($to, x)
             end
         else
-            if !(issubtype(from,Signed) === issubtype(to,Signed))
+            if !(issubtype(from, Signed) === issubtype(to, Signed))
                 # raise InexactError if x's top bit is set
-                @eval convert(::Type{$to}, x::($from)) = box($to,check_top_bit(unbox($from,x)))
+                @eval convert(::Type{$to}, x::($from)) = bitcast($to, check_top_bit(x))
             else
-                @eval convert(::Type{$to}, x::($from)) = box($to,unbox($from,x))
+                @eval convert(::Type{$to}, x::($from)) = bitcast($to, x)
             end
-            @eval rem(x::($from), ::Type{$to}) = box($to,unbox($from,x))
+            @eval rem(x::($from), ::Type{$to}) = bitcast($to, x)
         end
     end
 end
 
 rem{T<:Integer}(x::T, ::Type{T}) = x
-rem(x::Integer, ::Type{Bool}) = ((x&1)!=0)
+rem(x::Integer, ::Type{Bool}) = ((x & 1) != 0)
 mod{T<:Integer}(x::Integer, ::Type{T}) = rem(x, T)
 
 unsafe_trunc{T<:Integer}(::Type{T}, x::Integer) = rem(x, T)
@@ -335,39 +331,39 @@ for (Ts, Tu) in ((Int8, UInt8), (Int16, UInt16), (Int32, UInt32), (Int64, UInt64
     @eval convert(::Type{Unsigned}, x::$Ts) = convert($Tu, x)
 end
 
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x)
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x)
+convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int, x)
+convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt, x)
 
 convert(::Type{Integer}, x::Integer) = x
-convert(::Type{Integer}, x::Real) = convert(Signed,x)
+convert(::Type{Integer}, x::Real) = convert(Signed, x)
 
 round(x::Integer) = x
 trunc(x::Integer) = x
 floor(x::Integer) = x
  ceil(x::Integer) = x
 
-round{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
-trunc{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
-floor{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
- ceil{T<:Integer}(::Type{T},x::Integer) = convert(T,x)
+round{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+trunc{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+floor{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
+ ceil{T<:Integer}(::Type{T}, x::Integer) = convert(T, x)
 
 ## integer construction ##
 
 macro int128_str(s)
-    parse(Int128,s)
+    return parse(Int128, s)
 end
 
 macro uint128_str(s)
-    parse(UInt128,s)
+    return parse(UInt128, s)
 end
 
 macro big_str(s)
-    n = tryparse(BigInt,s)
+    n = tryparse(BigInt, s)
     !isnull(n) && return get(n)
-    n = tryparse(BigFloat,s)
+    n = tryparse(BigFloat, s)
     !isnull(n) && return get(n)
     message = "invalid number format $s for BigInt or BigFloat"
-    :(throw(ArgumentError($message)))
+    return :(throw(ArgumentError($message)))
 end
 
 ## integer promotions ##
@@ -413,9 +409,9 @@ typemax(::Type{Int64 }) = 9223372036854775807
 typemin(::Type{UInt64}) = UInt64(0)
 typemax(::Type{UInt64}) = 0xffffffffffffffff
 @eval typemin(::Type{UInt128}) = $(convert(UInt128, 0))
-@eval typemax(::Type{UInt128}) = $(box(UInt128,unbox(Int128,convert(Int128,-1))))
-@eval typemin(::Type{Int128} ) = $(convert(Int128,1)<<127)
-@eval typemax(::Type{Int128} ) = $(box(Int128,unbox(UInt128,typemax(UInt128)>>1)))
+@eval typemax(::Type{UInt128}) = $(bitcast(UInt128, convert(Int128, -1)))
+@eval typemin(::Type{Int128} ) = $(convert(Int128, 1) << 127)
+@eval typemax(::Type{Int128} ) = $(bitcast(Int128, typemax(UInt128) >> 1))
 
 widen{T<:Union{Int8, Int16}}(::Type{T}) = Int32
 widen(::Type{Int32}) = Int64
@@ -427,12 +423,12 @@ widen(::Type{UInt64}) = UInt128
 # a few special cases,
 # Int64*UInt64 => Int128
 # |x|<=2^(k-1), |y|<=2^k-1   =>   |x*y|<=2^(2k-1)-1
-widemul(x::Signed,y::Unsigned) = widen(x)*signed(widen(y))
-widemul(x::Unsigned,y::Signed) = signed(widen(x))*widen(y)
+widemul(x::Signed,y::Unsigned) = widen(x) * signed(widen(y))
+widemul(x::Unsigned,y::Signed) = signed(widen(x)) * widen(y)
 # multplication by Bool doesn't require widening
-widemul(x::Bool,y::Bool) = x*y
-widemul(x::Bool,y::Number) = x*y
-widemul(x::Number,y::Bool) = x*y
+widemul(x::Bool,y::Bool) = x * y
+widemul(x::Bool,y::Number) = x * y
+widemul(x::Number,y::Bool) = x * y
 
 
 ## wide multiplication, Int128 multiply and divide ##
@@ -442,41 +438,41 @@ if Core.sizeof(Int) == 4
         local u0::UInt64, v0::UInt64, w0::UInt64
         local u1::Int64, v1::Int64, w1::UInt64, w2::Int64, t::UInt64
 
-        u0 = u&0xffffffff; u1 = u>>32
-        v0 = v&0xffffffff; v1 = v>>32
-        w0 = u0*v0
-        t = reinterpret(UInt64,u1)*v0 + (w0>>>32)
-        w2 = reinterpret(Int64,t) >> 32
-        w1 = u0*reinterpret(UInt64,v1) + (t&0xffffffff)
-        hi = u1*v1 + w2 + (reinterpret(Int64,w1) >> 32)
-        lo = w0&0xffffffff + (w1 << 32)
-        Int128(hi)<<64 + Int128(lo)
+        u0 = u & 0xffffffff; u1 = u >> 32
+        v0 = v & 0xffffffff; v1 = v >> 32
+        w0 = u0 * v0
+        t = reinterpret(UInt64, u1) * v0 + (w0 >>> 32)
+        w2 = reinterpret(Int64, t) >> 32
+        w1 = u0 * reinterpret(UInt64, v1) + (t & 0xffffffff)
+        hi = u1 * v1 + w2 + (reinterpret(Int64, w1) >> 32)
+        lo = w0 & 0xffffffff + (w1 << 32)
+        return Int128(hi) << 64 + Int128(lo)
     end
 
     function widemul(u::UInt64, v::UInt64)
         local u0::UInt64, v0::UInt64, w0::UInt64
         local u1::UInt64, v1::UInt64, w1::UInt64, w2::UInt64, t::UInt64
 
-        u0 = u&0xffffffff; u1 = u>>>32
-        v0 = v&0xffffffff; v1 = v>>>32
-        w0 = u0*v0
-        t = u1*v0 + (w0>>>32)
-        w2 = t>>>32
-        w1 = u0*v1 + (t&0xffffffff)
-        hi = u1*v1 + w2 + (w1 >>> 32)
-        lo = w0&0xffffffff + (w1 << 32)
-        UInt128(hi)<<64 + UInt128(lo)
+        u0 = u & 0xffffffff; u1 = u >>> 32
+        v0 = v & 0xffffffff; v1 = v >>> 32
+        w0 = u0 * v0
+        t = u1 * v0 + (w0 >>> 32)
+        w2 = t >>> 32
+        w1 = u0 * v1 + (t & 0xffffffff)
+        hi = u1 * v1 + w2 + (w1 >>> 32)
+        lo = w0 & 0xffffffff + (w1 << 32)
+        return UInt128(hi) << 64 + UInt128(lo)
     end
 
     function *(u::Int128, v::Int128)
-        u0 = u % UInt64; u1 = Int64(u>>64)
-        v0 = v % UInt64; v1 = Int64(v>>64)
+        u0 = u % UInt64; u1 = Int64(u >> 64)
+        v0 = v % UInt64; v1 = Int64(v >> 64)
         lolo = widemul(u0, v0)
-        lohi = widemul(reinterpret(Int64,u0), v1)
-        hilo = widemul(u1, reinterpret(Int64,v0))
-        t = reinterpret(UInt128,hilo) + (lolo>>>64)
-        w1 = reinterpret(UInt128,lohi) + (t&0xffffffffffffffff)
-        Int128(lolo&0xffffffffffffffff) + reinterpret(Int128,w1)<<64
+        lohi = widemul(reinterpret(Int64, u0), v1)
+        hilo = widemul(u1, reinterpret(Int64, v0))
+        t = reinterpret(UInt128, hilo) + (lolo >>> 64)
+        w1 = reinterpret(UInt128, lohi) + (t & 0xffffffffffffffff)
+        return Int128(lolo & 0xffffffffffffffff) + reinterpret(Int128, w1) << 64
     end
 
     function *(u::UInt128, v::UInt128)
@@ -485,35 +481,35 @@ if Core.sizeof(Int) == 4
         lolo = widemul(u0, v0)
         lohi = widemul(u0, v1)
         hilo = widemul(u1, v0)
-        t = hilo + (lolo>>>64)
-        w1 = lohi + (t&0xffffffffffffffff)
-        (lolo&0xffffffffffffffff) + UInt128(w1)<<64
+        t = hilo + (lolo >>> 64)
+        w1 = lohi + (t & 0xffffffffffffffff)
+        return (lolo & 0xffffffffffffffff) + UInt128(w1) << 64
     end
 
     function div(x::Int128, y::Int128)
         (x == typemin(Int128)) & (y == -1) && throw(DivideError())
-        Int128(div(BigInt(x),BigInt(y)))
+        return Int128(div(BigInt(x), BigInt(y)))
     end
     function div(x::UInt128, y::UInt128)
-        UInt128(div(BigInt(x),BigInt(y)))
+        return UInt128(div(BigInt(x), BigInt(y)))
     end
 
     function rem(x::Int128, y::Int128)
-        Int128(rem(BigInt(x),BigInt(y)))
+        return Int128(rem(BigInt(x), BigInt(y)))
     end
     function rem(x::UInt128, y::UInt128)
-        UInt128(rem(BigInt(x),BigInt(y)))
+        return UInt128(rem(BigInt(x), BigInt(y)))
     end
 
     function mod(x::Int128, y::Int128)
-        Int128(mod(BigInt(x),BigInt(y)))
+        return Int128(mod(BigInt(x), BigInt(y)))
     end
 else
-    *{T<:Union{Int128,UInt128}}(x::T, y::T)  = box(T,mul_int(unbox(T,x),unbox(T,y)))
+    *{T<:Union{Int128,UInt128}}(x::T, y::T)  = mul_int(x, y)
 
-    div(x::Int128,  y::Int128)  = box(Int128,checked_sdiv_int(unbox(Int128,x),unbox(Int128,y)))
-    div(x::UInt128, y::UInt128) = box(UInt128,checked_udiv_int(unbox(UInt128,x),unbox(UInt128,y)))
+    div(x::Int128,  y::Int128)  = checked_sdiv_int(x, y)
+    div(x::UInt128, y::UInt128) = checked_udiv_int(x, y)
 
-    rem(x::Int128,  y::Int128)  = box(Int128,checked_srem_int(unbox(Int128,x),unbox(Int128,y)))
-    rem(x::UInt128, y::UInt128) = box(UInt128,checked_urem_int(unbox(UInt128,x),unbox(UInt128,y)))
+    rem(x::Int128,  y::Int128)  = checked_srem_int(x, y)
+    rem(x::UInt128, y::UInt128) = checked_urem_int(x, y)
 end
diff --git a/base/math.jl b/base/math.jl
index f74d46436b0ba..d6b2e63483647 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -32,7 +32,7 @@ using Base: sign_mask, exponent_mask, exponent_one, exponent_bias,
             exponent_half, exponent_max, exponent_raw_max, fpinttype,
             significand_mask, significand_bits, exponent_bits
 
-using Core.Intrinsics: sqrt_llvm, box, unbox, powi_llvm
+using Core.Intrinsics: sqrt_llvm, powi_llvm
 
 # non-type specific math functions
 
@@ -420,8 +420,8 @@ for f in (:sin, :cos, :tan, :asin, :acos, :acosh, :atanh, :log, :log2, :log10,
     end
 end
 
-sqrt(x::Float64) = box(Float64,sqrt_llvm(unbox(Float64,x)))
-sqrt(x::Float32) = box(Float32,sqrt_llvm(unbox(Float32,x)))
+sqrt(x::Float64) = sqrt_llvm(x)
+sqrt(x::Float32) = sqrt_llvm(x)
 
 """
     sqrt(x)
@@ -644,10 +644,10 @@ end
 ^(x::Float64, y::Float64) = nan_dom_err(ccall((:pow,libm),  Float64, (Float64,Float64), x, y), x+y)
 ^(x::Float32, y::Float32) = nan_dom_err(ccall((:powf,libm), Float32, (Float32,Float32), x, y), x+y)
 
-^(x::Float64, y::Integer) =
-    box(Float64, powi_llvm(unbox(Float64,x), unbox(Int32,Int32(y))))
-^(x::Float32, y::Integer) =
-    box(Float32, powi_llvm(unbox(Float32,x), unbox(Int32,Int32(y))))
+^(x::Float64, y::Integer) = x^Int32(y)
+^(x::Float64, y::Int32) = powi_llvm(x, y)
+^(x::Float32, y::Integer) = x^Int32(y)
+^(x::Float32, y::Int32) = powi_llvm(x, y)
 ^(x::Float16, y::Integer) = Float16(Float32(x)^y)
 
 function angle_restrict_symm(theta)
diff --git a/base/pointer.jl b/base/pointer.jl
index 688e69ebe8ba5..a201bb0816621 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -15,19 +15,21 @@ Ptr
 
 The C null pointer constant, sometimes used when calling external code.
 """
-const C_NULL = box(Ptr{Void}, 0)
+const C_NULL = bitcast(Ptr{Void}, 0)
+
+# TODO: deprecate these conversions. C doesn't even allow them.
 
 # pointer to integer
-convert{T<:Union{Int,UInt}}(::Type{T}, x::Ptr) = box(T, unbox(Ptr{Void},x))
-convert{T<:Integer}(::Type{T}, x::Ptr) = convert(T,convert(UInt, x))
+convert{T<:Union{Int,UInt}}(::Type{T}, x::Ptr) = bitcast(T, x)
+convert{T<:Integer}(::Type{T}, x::Ptr) = convert(T, convert(UInt, x))
 
 # integer to pointer
-convert{T}(::Type{Ptr{T}}, x::UInt) = box(Ptr{T},unbox(UInt,UInt(x)))
-convert{T}(::Type{Ptr{T}}, x::Int) = box(Ptr{T},unbox(Int,Int(x)))
+convert{T}(::Type{Ptr{T}}, x::UInt) = bitcast(Ptr{T}, x)
+convert{T}(::Type{Ptr{T}}, x::Int) = bitcast(Ptr{T}, x)
 
 # pointer to pointer
 convert{T}(::Type{Ptr{T}}, p::Ptr{T}) = p
-convert{T}(::Type{Ptr{T}}, p::Ptr) = box(Ptr{T}, unbox(Ptr{Void},p))
+convert{T}(::Type{Ptr{T}}, p::Ptr) = bitcast(Ptr{T}, p)
 
 # object to pointer (when used with ccall)
 unsafe_convert(::Type{Ptr{UInt8}}, x::Symbol) = ccall(:jl_symbol_name, Ptr{UInt8}, (Any,), x)
@@ -62,7 +64,7 @@ function unsafe_wrap{T,N}(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}},
 end
 function unsafe_wrap{T}(::Union{Type{Array},Type{Array{T}},Type{Array{T,1}}},
                         p::Ptr{T}, d::Integer, own::Bool=false)
-    ccall(:jl_ptr_to_array_1d, Vector{T},
+    ccall(:jl_ptr_to_array_1d, Array{T,1},
           (Any, Ptr{Void}, Csize_t, Cint), Array{T,1}, p, d, own)
 end
 unsafe_wrap{N,I<:Integer}(Atype::Type, p::Ptr, dims::NTuple{N,I}, own::Bool=false) =
diff --git a/base/reflection.jl b/base/reflection.jl
index 0cf01a6289615..3f1ca951314ce 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -83,7 +83,7 @@ are also included.
 As a special case, all names defined in `Main` are considered \"exported\",
 since it is not idiomatic to explicitly export names from `Main`.
 """
-names(m::Module, all::Bool=false, imported::Bool=false) = sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any,Cint,Cint), m, all, imported))
+names(m::Module, all::Bool=false, imported::Bool=false) = sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported))
 
 isexported(m::Module, s::Symbol) = ccall(:jl_module_exports_p, Cint, (Any, Any), m, s) != 0
 isdeprecated(m::Module, s::Symbol) = ccall(:jl_is_binding_deprecated, Cint, (Any, Any), m, s) != 0
@@ -597,7 +597,7 @@ function _dump_function_linfo(linfo::Core.MethodInstance, world::UInt, native::B
 
     if native
         str = ccall(:jl_dump_function_asm, Ref{String},
-                    (Ptr{Void}, Cint, Cstring), llvmf, 0, syntax)
+                    (Ptr{Void}, Cint, Ptr{UInt8}), llvmf, 0, syntax)
     else
         str = ccall(:jl_dump_function_ir, Ref{String},
                     (Ptr{Void}, Bool, Bool), llvmf, strip_ir_metadata, dump_module)
diff --git a/base/replutil.jl b/base/replutil.jl
index 57b24b99771a5..f839f597164ea 100644
--- a/base/replutil.jl
+++ b/base/replutil.jl
@@ -101,7 +101,7 @@ function show(io::IO, ::MIME"text/plain", f::Function)
     mt = ft.name.mt
     if isa(f, Core.IntrinsicFunction)
         show(io, f)
-        id = Core.Intrinsics.box(Int32, f)
+        id = Core.Intrinsics.bitcast(Int32, f)
         print(io, " (intrinsic function #$id)")
     elseif isa(f, Core.Builtin)
         print(io, mt.name, " (built-in function)")
diff --git a/base/serialize.jl b/base/serialize.jl
index dd48fc521c9e6..2defd0c591430 100644
--- a/base/serialize.jl
+++ b/base/serialize.jl
@@ -39,7 +39,7 @@ const TAGS = Any[
     :a, :b, :c, :d, :e, :f, :g, :h, :i, :j, :k, :l, :m, :n, :o,
     :p, :q, :r, :s, :t, :u, :v, :w, :x, :y, :z,
     :add_int, :sub_int, :mul_int, :add_float, :sub_float,
-    :mul_float, :unbox, :box,
+    :mul_float, :bitcast, :box,
     :eq_int, :slt_int, :sle_int, :ne_int,
     :arrayset, :arrayref,
     :Core, :Base, svec(), Tuple{},
diff --git a/base/show.jl b/base/show.jl
index 53931126e5b47..45b7f9d13edaf 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -517,18 +517,6 @@ is_quoted(ex::Expr)      = is_expr(ex, :quote, 1) || is_expr(ex, :inert, 1)
 unquoted(ex::QuoteNode)  = ex.value
 unquoted(ex::Expr)       = ex.args[1]
 
-function is_intrinsic_expr(x::ANY)
-    isa(x, Core.IntrinsicFunction) && return true
-    if isa(x, GlobalRef)
-        x = x::GlobalRef
-        return (isdefined(x.mod, x.name) &&
-                isa(getfield(x.mod, x.name), Core.IntrinsicFunction))
-    elseif isa(x, Expr)
-        return (x::Expr).typ === Core.IntrinsicFunction
-    end
-    return false
-end
-
 ## AST printing helpers ##
 
 typeemphasize(io::IO) = get(io, :TYPEEMPHASIZE, false) === true
@@ -775,9 +763,8 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
         end
         func_args = args[2:end]
 
-        if (in(ex.args[1], (GlobalRef(Base, :box), :throw)) ||
-            ismodulecall(ex) ||
-            (ex.typ === Any && is_intrinsic_expr(ex.args[1])))
+        if (in(ex.args[1], (GlobalRef(Base, :bitcast), :throw)) ||
+            ismodulecall(ex))
             show_type = false
         end
         if show_type
@@ -859,10 +846,6 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int)
         print(io, " if ")
         show_unquoted(io, args[1], indent)
 
-    elseif head === :ccall
-        show_unquoted(io, :ccall, indent)
-        show_enclosed_list(io, '(', args, ",", ')', indent)
-
     # comparison (i.e. "x < y < z")
     elseif head === :comparison && nargs >= 3 && (nargs&1==1)
         comp_prec = minimum(operator_precedence, args[2:2:end])
diff --git a/base/sort.jl b/base/sort.jl
index df34f0b092357..19293811a2269 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -684,7 +684,7 @@ module Float
 using ..Sort
 using ...Order
 
-import Core.Intrinsics: unbox, slt_int
+import Core.Intrinsics: slt_int
 import ..Sort: sort!
 import ...Order: lt, DirectOrdering
 
@@ -699,8 +699,8 @@ right(::DirectOrdering) = Right()
 left(o::Perm) = Perm(left(o.order), o.data)
 right(o::Perm) = Perm(right(o.order), o.data)
 
-lt{T<:Floats}(::Left, x::T, y::T) = slt_int(unbox(T,y),unbox(T,x))
-lt{T<:Floats}(::Right, x::T, y::T) = slt_int(unbox(T,x),unbox(T,y))
+lt{T<:Floats}(::Left, x::T, y::T) = slt_int(y, x)
+lt{T<:Floats}(::Right, x::T, y::T) = slt_int(x, y)
 
 isnan(o::DirectOrdering, x::Floats) = (x!=x)
 isnan(o::Perm, i::Int) = isnan(o.order,o.data[i])
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 1a50f184a3128..d96af3366c5c7 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -135,7 +135,7 @@ function cpu_info()
     Base.uv_error("uv_cpu_info",ccall(:uv_cpu_info, Int32, (Ptr{Ptr{UV_cpu_info_t}}, Ptr{Int32}), UVcpus, count))
     cpus = Array{CPUinfo}(count[1])
     for i = 1:length(cpus)
-        cpus[i] = CPUinfo(unsafe_load(UVcpus[1],i))
+        cpus[i] = CPUinfo(unsafe_load(UVcpus[1], i))
     end
     ccall(:uv_free_cpu_info, Void, (Ptr{UV_cpu_info_t}, Int32), UVcpus[1], count[1])
     return cpus
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 70f9ad231155d..8e8d9fc94e0bb 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -35,21 +35,28 @@ must be passed by reference.
 Finally, you can use [`ccall`](@ref) to actually generate a call to the library function. Arguments
 to [`ccall`](@ref) are as follows:
 
-1. (:function, "library") pair (must be a constant, but see below).
+1. A `(:function, "library")` pair, which must be written as a literal constant,
+
+   OR
+
+   a function pointer (for example, from `dlsym`).
+
 2. Return type (see below for mapping the declared C type to Julia)
 
-     * This argument will be evaluated at compile-time.
+     * This argument will be evaluated at compile-time, when the containing method is defined.
+
 3. A tuple of input types. The input types must be written as a literal tuple, not a tuple-valued
    variable or expression.
 
-     * This argument will be evaluated at compile-time.
+     * This argument will be evaluated at compile-time, when the containing method is defined.
+
 4. The following arguments, if any, are the actual argument values passed to the function.
 
 As a complete but simple example, the following calls the `clock` function from the standard C
 library:
 
 ```julia
-julia> t = ccall( (:clock, "libc"), Int32, ())
+julia> t = ccall((:clock, "libc"), Int32, ())
 2292761
 
 julia> t
@@ -118,12 +125,12 @@ Here is a slightly more complex example that discovers the local machine's hostn
 
 ```julia
 function gethostname()
-  hostname = Array{UInt8}(128)
-  ccall((:gethostname, "libc"), Int32,
-        (Ptr{UInt8}, Csize_t),
-        hostname, sizeof(hostname))
-  hostname[end] = 0; # ensure null-termination
-  return unsafe_string(pointer(hostname))
+    hostname = Vector{UInt8}(128)
+    ccall((:gethostname, "libc"), Int32,
+          (Ptr{UInt8}, Csize_t),
+          hostname, sizeof(hostname))
+    hostname[end] = 0; # ensure null-termination
+    return unsafe_string(pointer(hostname))
 end
 ```
 
@@ -241,16 +248,18 @@ it to be freed prematurely.
 
 First, a review of some relevant Julia type terminology:
 
-| Syntax / Keyword              | Example                                   | Description                                                                                                                                                                                                                                                                    |
-|:----------------------------- |:----------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `type`                        | `String`                                  | "Leaf Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a leaf type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed.              |
-| `abstract`                    | `Any`, `AbstractArray{T,N}`, `Complex{T}` | "Super Type" :: A super-type (not a leaf-type) that cannot be instantiated, but can be used to describe a group of types.                                                                                                                                                      |
-| `{T}`                         | `Vector{Int}`                             | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization). "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                            |
-| `bitstype`                    | `Int`, `Float64`                          | "Bits Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                           |
-| `immutable`                   | `Pair{Int,Int}``Complex128` (`isbits`)    | "Immutable" :: A type with all fields defined to be constant. It is defined by-value. And may be stored with a type-tag."Is-Bits" :: A `bitstype`, or an `immutable` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag. |
-| `type ...; end`               | `nothing`                                 | "Singleton" :: a Leaf Type or Immutable with no fields.                                                                                                                                                                                                                        |
-| `(...)` or ```tuple(...)` ``` | `(1,2,3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous immutable type, or a constant array. Represented as either an array or a struct.                                                                                                                                |
-| `typealias`                   | Not applicable here                       | Type aliases, and other similar mechanisms of doing type indirection, are resolved to their base type (this includes assigning a type to another name, or getting the type out of a function call).                                                                            |
+| Syntax / Keyword              | Example                                     | Description                                                                                                                                                                                                                                                                    |
+|:----------------------------- |:------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `type`                        | `String`                                    | "Leaf Type" :: A group of related data that includes a type-tag, is managed by the Julia GC, and is defined by object-identity. The type parameters of a leaf type must be fully defined (no `TypeVars` are allowed) in order for the instance to be constructed.              |
+| `abstract`                    | `Any`, `AbstractArray{T, N}`, `Complex{T}`  | "Super Type" :: A super-type (not a leaf-type) that cannot be instantiated, but can be used to describe a group of types.                                                                                                                                                      |
+| `{T}`                         | `Vector{Int}`                               | "Type Parameter" :: A specialization of a type (typically used for dispatch or storage optimization).                                                                                                                                                                          |
+|                               |                                             | "TypeVar" :: The `T` in the type parameter declaration is referred to as a TypeVar (short for type variable).                                                                                                                                                                  |
+| `bitstype`                    | `Int`, `Float64`                            | "Bits Type" :: A type with no fields, but a size. It is stored and defined by-value.                                                                                                                                                                                           |
+| `immutable`                   | `Pair{Int, Int}`                            | "Immutable" :: A type with all fields defined to be constant. It is defined by-value. And may be stored with a type-tag.                                                                                                                                                       |
+|                               | `Complex128` (`isbits`)                     | "Is-Bits"   :: A `bitstype`, or an `immutable` type where all fields are other `isbits` types. It is defined by-value, and is stored without a type-tag.                                                                                                                       |
+| `type ...; end`               | `nothing`                                   | "Singleton" :: a Leaf Type or Immutable with no fields.                                                                                                                                                                                                                        |
+| `(...)` or `tuple(...)`       | `(1, 2, 3)`                                 | "Tuple" :: an immutable data-structure similar to an anonymous immutable type, or a constant array. Represented as either an array or a struct.                                                                                                                                |
+| `typealias`                   | Not applicable here                         | Type aliases, and other similar mechanisms of doing type indirection, are resolved to their base type (this includes assigning a type to another name, or getting the type out of a function call).                                                                            |
 
 ### Bits Types:
 
@@ -260,29 +269,37 @@ same:
   * `Float32`
 
     Exactly corresponds to the `float` type in C (or `REAL*4` in Fortran).
+
   * `Float64`
 
     Exactly corresponds to the `double` type in C (or `REAL*8` in Fortran).
+
   * `Complex64`
 
     Exactly corresponds to the `complex float` type in C (or `COMPLEX*8` in Fortran).
+
   * `Complex128`
 
     Exactly corresponds to the `complex double` type in C (or `COMPLEX*16` in Fortran).
+
   * `Signed`
 
-    Exactly corresponds to the `signed` type annotation in C (or any `INTEGER` type in Fortran). Any
-    Julia type that is not a subtype of `Signed` is assumed to be unsigned.
+    Exactly corresponds to the `signed` type annotation in C (or any `INTEGER` type in Fortran).
+    Any Julia type that is not a subtype of `Signed` is assumed to be unsigned.
+
+
   * `Ref{T}`
 
-    Behaves like a `Ptr{T}` that owns its memory.
+    Behaves like a `Ptr{T}` that can manage its memory via the Julia GC.
+
+
   * `Array{T,N}`
 
     When an array is passed to C as a `Ptr{T}` argument, it is not reinterpret-cast: Julia requires
     that the element type of the array matches `T`, and the address of the first element is passed.
 
     Therefore, if an `Array` contains data in the wrong format, it will have to be explicitly converted
-    using a call such as `trunc(Int32,a)`.
+    using a call such as `trunc(Int32, a)`.
 
     To pass an array `A` as a pointer of a different type *without* converting the data beforehand
     (for example, to pass a `Float64` array to a function that operates on uninterpreted bytes), you
@@ -298,36 +315,38 @@ as follows. Every C type also has a corresponding Julia type with the same name,
 This can help for writing portable code (and remembering that an `int` in C is not the same as
 an `Int` in Julia).
 
+
 **System Independent:**
 
-| C name                                                  | Fortran name           | Standard Julia Alias | Julia Base Type                                                                                                |
-|:------------------------------------------------------- |:---------------------- |:-------------------- |:-------------------------------------------------------------------------------------------------------------- |
-| `unsigned char``bool` (C++)                             | `CHARACTER`            | `Cuchar`             | `UInt8`                                                                                                        |
-| `short`                                                 | `INTEGER*2``LOGICAL*2` | `Cshort`             | `Int16`                                                                                                        |
-| `unsigned short`                                        |                        | `Cushort`            | `UInt16`                                                                                                       |
-| `int``BOOL` (C, typical)                                | `INTEGER*4``LOGICAL*4` | `Cint`               | `Int32`                                                                                                        |
-| `unsigned int`                                          |                        | `Cuint`              | `UInt32`                                                                                                       |
-| `long long`                                             | `INTEGER*8``LOGICAL*8` | `Clonglong`          | `Int64`                                                                                                        |
-| `unsigned long long`                                    |                        | `Culonglong`         | `UInt64`                                                                                                       |
-| `intmax_t`                                              |                        | `Cintmax_t`          | `Int64`                                                                                                        |
-| `uintmax_t`                                             |                        | `Cuintmax_t`         | `UInt64`                                                                                                       |
-| `float`                                                 | `REAL*4i`              | `Cfloat`             | `Float32`                                                                                                      |
-| `double`                                                | `REAL*8`               | `Cdouble`            | `Float64`                                                                                                      |
-| `complex float`                                         | `COMPLEX*8`            | `Complex64`          | `Complex{Float32}`                                                                                             |
-| `complex double`                                        | `COMPLEX*16`           | `Complex128`         | `Complex{Float64}`                                                                                             |
-| `ptrdiff_t`                                             |                        | `Cptrdiff_t`         | `Int`                                                                                                          |
-| `ssize_t`                                               |                        | `Cssize_t`           | `Int`                                                                                                          |
-| `size_t`                                                |                        | `Csize_t`            | `UInt`                                                                                                         |
-| `void`                                                  |                        |                      | `Void`                                                                                                         |
-| `void` and `[[noreturn]]` or `_Noreturn`                |                        |                      | `Union{}`                                                                                                      |
-| `void*`                                                 |                        |                      | `Ptr{Void}`                                                                                                    |
-| `T*` (where T represents an appropriately defined type) |                        |                      | `Ref{T}`                                                                                                       |
-| `char*` (or `char[]`, e.g. a string)                    | `CHARACTER*N`          |                      | `Cstring` if NUL-terminated, or `Ptr{UInt8}` if not                                                            |
-| `char**` (or `*char[]`)                                 |                        |                      | `Ptr{Ptr{UInt8}}`                                                                                              |
-| `jl_value_t*` (any Julia Type)                          |                        |                      | `Any`                                                                                                          |
-| `jl_value_t**` (a reference to a Julia Type)            |                        |                      | `Ref{Any}`                                                                                                     |
-| `va_arg`                                                |                        |                      | Not supported                                                                                                  |
-| `...` (variadic function specification)                 |                        |                      | `T...` (where `T` is one of the above types, variadic functions of different argument types are not supported) |
+| C name                                                  | Fortran name             | Standard Julia Alias | Julia Base Type                                                                                                |
+|:------------------------------------------------------- |:------------------------ |:-------------------- |:-------------------------------------------------------------------------------------------------------------- |
+| `unsigned char`                                         | `CHARACTER`              | `Cuchar`             | `UInt8`                                                                                                        |
+| `bool` (only in C++)                                    |                          | `Cuchar`             | `UInt8`                                                                                                        |
+| `short`                                                 | `INTEGER*2`, `LOGICAL*2` | `Cshort`             | `Int16`                                                                                                        |
+| `unsigned short`                                        |                          | `Cushort`            | `UInt16`                                                                                                       |
+| `int`, `BOOL` (C, typical)                              | `INTEGER*4`, `LOGICAL*4` | `Cint`               | `Int32`                                                                                                        |
+| `unsigned int`                                          |                          | `Cuint`              | `UInt32`                                                                                                       |
+| `long long`                                             | `INTEGER*8`, `LOGICAL*8` | `Clonglong`          | `Int64`                                                                                                        |
+| `unsigned long long`                                    |                          | `Culonglong`         | `UInt64`                                                                                                       |
+| `intmax_t`                                              |                          | `Cintmax_t`          | `Int64`                                                                                                        |
+| `uintmax_t`                                             |                          | `Cuintmax_t`         | `UInt64`                                                                                                       |
+| `float`                                                 | `REAL*4i`                | `Cfloat`             | `Float32`                                                                                                      |
+| `double`                                                | `REAL*8`                 | `Cdouble`            | `Float64`                                                                                                      |
+| `complex float`                                         | `COMPLEX*8`              | `Complex64`          | `Complex{Float32}`                                                                                             |
+| `complex double`                                        | `COMPLEX*16`             | `Complex128`         | `Complex{Float64}`                                                                                             |
+| `ptrdiff_t`                                             |                          | `Cptrdiff_t`         | `Int`                                                                                                          |
+| `ssize_t`                                               |                          | `Cssize_t`           | `Int`                                                                                                          |
+| `size_t`                                                |                          | `Csize_t`            | `UInt`                                                                                                         |
+| `void`                                                  |                          |                      | `Void`                                                                                                         |
+| `void` and `[[noreturn]]` or `_Noreturn`                |                          |                      | `Union{}`                                                                                                      |
+| `void*`                                                 |                          |                      | `Ptr{Void}`                                                                                                    |
+| `T*` (where T represents an appropriately defined type) |                          |                      | `Ref{T}`                                                                                                       |
+| `char*` (or `char[]`, e.g. a string)                    | `CHARACTER*N`            |                      | `Cstring` if NUL-terminated, or `Ptr{UInt8}` if not                                                            |
+| `char**` (or `*char[]`)                                 |                          |                      | `Ptr{Ptr{UInt8}}`                                                                                              |
+| `jl_value_t*` (any Julia Type)                          |                          |                      | `Any`                                                                                                          |
+| `jl_value_t**` (a reference to a Julia Type)            |                          |                      | `Ref{Any}`                                                                                                     |
+| `va_arg`                                                |                          |                      | Not supported                                                                                                  |
+| `...` (variadic function specification)                 |                          |                      | `T...` (where `T` is one of the above types, variadic functions of different argument types are not supported) |
 
 The `Cstring` type is essentially a synonym for `Ptr{UInt8}`, except the conversion to `Cstring`
 throws an error if the Julia string contains any embedded NUL characters (which would cause the
@@ -340,12 +359,12 @@ checks and is only meant to improve readability of the call.
 
 **System-dependent:**
 
-| C name          | Standard Julia Alias | Julia Base Type                            |
-|:--------------- |:-------------------- |:------------------------------------------ |
-| `char`          | `Cchar`              | `Int8` (x86, x86_64)`UInt8` (powerpc, arm) |
-| `long`          | `Clong`              | `Int` (UNIX)`Int32` (Windows)              |
-| `unsigned long` | `Culong`             | `UInt` (UNIX)`UInt32` (Windows)            |
-| `wchar_t`       | `Cwchar_t`           | `Int32` (UNIX)`UInt16` (Windows)           |
+| C name          | Standard Julia Alias | Julia Base Type                              |
+|:--------------- |:-------------------- |:-------------------------------------------- |
+| `char`          | `Cchar`              | `Int8` (x86, x86_64), `UInt8` (powerpc, arm) |
+| `long`          | `Clong`              | `Int` (UNIX), `Int32` (Windows)              |
+| `unsigned long` | `Culong`             | `UInt` (UNIX), `UInt32` (Windows)            |
+| `wchar_t`       | `Cwchar_t`           | `Int32` (UNIX), `UInt16` (Windows)           |
 
 !!! note
     When calling a Fortran function, all inputs must be passed by reference, so all type correspondences
@@ -364,7 +383,7 @@ checks and is only meant to improve readability of the call.
 !!! warning
     A return type of `Union{}` means the function will not return i.e. C++11 `[[noreturn]]` or C11
     `_Noreturn` (e.g. `jl_throw` or `longjmp`). Do not use this for functions that return no value
-    (`void`) but do return.
+    (`void`) but do return, use `Void` instead.
 
 !!! note
     For `wchar_t*` arguments, the Julia type should be `Cwstring` (if the C routine expects a NUL-terminated
@@ -409,8 +428,7 @@ You can get a near approximation of a `union` if you know, a priori, the field t
 the greatest size (potentially including padding). When translating your fields to Julia, declare
 the Julia field to be only of that type.
 
-Arrays of parameters must be expanded manually, currently (either inline, or in an immutable helper
-type). For example:
+Arrays of parameters can be expressed with `NTuple`:
 
 ```
 in C:
@@ -420,20 +438,48 @@ struct B {
 b_a_2 = B.A[2];
 
 in Julia:
-immutable B_A
-    A_1::Cint
-    A_2::Cint
-    A_3::Cint
-end
 type B
-    A::B_A
+    A::NTuple{3, CInt}
 end
-b_a_2 = B.A.(2)
+b_a_2 = B.A[2]
 ```
 
-Arrays of unknown size are not supported.
+Arrays of unknown size (C99-compliant variable length structs specified by `[]` or `[0]`) are not directly supported.
+Often the best way to deal with these is to deal with the byte offsets directly.
+For example, if a c-library declared a proper string type and returned a pointer to it:
 
-In the future, some of these restrictions may be reduced or eliminated.
+```
+struct String {
+    int strlen;
+    char data[];
+};
+```
+
+In Julia, we can access the parts independently to make a copy of that string:
+
+```
+str = from_c::Ptr{Void}
+len = unsafe_load(Ptr{Cint}(str))
+unsafe_string(str + Core.sizeof(Cint), len)
+```
+
+### Type Parameters
+
+The type arguments to `ccall` are evaluated statically, when the method containing the ccall is defined.
+They therefore must take the form of a literal tuple, not a variable, and cannot reference local variables.
+
+This may sound like a strange restriction,
+but remember that since C is not a dynamic language like Julia,
+its functions can only accept argument types with a statically-known, fixed signature.
+
+However, while the type layout must be known statically to compute the `ccall` ABI,
+the static parameters of the function are considered to be part of this static environment.
+The static parameters of the function may be used as type parameters in the `ccall` signature,
+as long as they don't affect the layout of the type.
+For example, `f{T}(x::T) = ccall(:valid, Ptr{T}, (Ptr{T},), x)`
+is valid, since `Ptr` is always a word-size bitstype.
+But, `g{T}(x::T) = ccall(:notvalid, T, (T,), x)`
+is not valid, since the type layout of `T` is not known statically.
 
 ### SIMD Values
 
@@ -461,10 +507,10 @@ __m256 dist( __m256 a, __m256 b ) {
 The following Julia code calls `dist` using `ccall`:
 
 ```julia
-typealias m256 NTuple{8,VecElement{Float32}}
+typealias m256 NTuple{8, VecElement{Float32}}
 
-a = m256(ntuple(i->VecElement(sin(Float32(i))),8))
-b = m256(ntuple(i->VecElement(cos(Float32(i))),8))
+a = m256(ntuple(i -> VecElement(sin(Float32(i))), 8))
+b = m256(ntuple(i -> VecElement(cos(Float32(i))), 8))
 
 function call_dist(a::m256, b::m256)
     ccall((:dist, "libdist"), m256, (m256, m256), a, b)
@@ -650,12 +696,12 @@ end
 #     gsl_permutation * gsl_permutation_alloc (size_t n);
 function permutation_alloc(n::Integer)
     output_ptr = ccall(
-        (:gsl_permutation_alloc, :libgsl), #name of C function and library
-        Ptr{gsl_permutation},              #output type
-        (Csize_t,),                        #tuple of input types
-        n                                  #name of Julia variable to pass in
+        (:gsl_permutation_alloc, :libgsl), # name of C function and library
+        Ptr{gsl_permutation},              # output type
+        (Csize_t,),                        # tuple of input types
+        n                                  # name of Julia variable to pass in
     )
-    if output_ptr==C_NULL # Could not allocate memory
+    if output_ptr == C_NULL # Could not allocate memory
         throw(OutOfMemoryError())
     end
     return output_ptr
@@ -683,10 +729,10 @@ Here is a second example wrapping the corresponding destructor:
 #     void gsl_permutation_free (gsl_permutation * p);
 function permutation_free(p::Ref{gsl_permutation})
     ccall(
-        (:gsl_permutation_free, :libgsl), #name of C function and library
-        Void,                             #output type
-        (Ref{gsl_permutation},),          #tuple of input types
-        p                                 #name of Julia variable to pass in
+        (:gsl_permutation_free, :libgsl), # name of C function and library
+        Void,                             # output type
+        (Ref{gsl_permutation},),          # tuple of input types
+        p                                 # name of Julia variable to pass in
     )
 end
 ```
@@ -711,15 +757,19 @@ Here is a third example passing Julia arrays:
 #    int gsl_sf_bessel_Jn_array (int nmin, int nmax, double x,
 #                                double result_array[])
 function sf_bessel_Jn_array(nmin::Integer, nmax::Integer, x::Real)
-    if nmax<nmin throw(DomainError()) end
-    result_array = Array{Cdouble}(nmax-nmin+1)
+    if nmax < nmin
+        throw(DomainError())
+    end
+    result_array = Vector{Cdouble}(nmax - nmin + 1)
     errorcode = ccall(
-        (:gsl_sf_bessel_Jn_array, :libgsl), #name of C function and library
-        Cint,                               #output type
-        (Cint, Cint, Cdouble, Ref{Cdouble}),#tuple of input types
-        nmin, nmax, x, result_array         #names of Julia variables to pass in
+        (:gsl_sf_bessel_Jn_array, :libgsl), # name of C function and library
+        Cint,                               # output type
+        (Cint, Cint, Cdouble, Ref{Cdouble}),# tuple of input types
+        nmin, nmax, x, result_array         # names of Julia variables to pass in
     )
-    if errorcode!= 0 error("GSL error code $errorcode") end
+    if errorcode != 0
+        error("GSL error code $errorcode")
+    end
     return result_array
 end
 ```
@@ -766,7 +816,7 @@ A `(name, library)` function specification must be a constant expression. Howeve
 to use computed values as function names by staging through `eval` as follows:
 
 ```
-@eval ccall(($(string("a","b")),"lib"), ...
+@eval ccall(($(string("a", "b")), "lib"), ...
 ```
 
 This expression constructs a name using `string`, then substitutes this name into a new [`ccall`](@ref)
@@ -790,7 +840,7 @@ that session. For example:
 ```julia
 macro dlsym(func, lib)
     z, zlocal = gensym(string(func)), gensym()
-    eval(current_module(),:(global $z = C_NULL))
+    eval(current_module(), :(global $z = C_NULL))
     z = esc(z)
     quote
         let $zlocal::Ptr{Void} = $z::Ptr{Void}
@@ -816,12 +866,26 @@ Other supported conventions are: `stdcall`, `cdecl`, `fastcall`, and `thiscall`.
 signature for Windows:
 
 ```julia
-hn = Array{UInt8}(256)
+hn = Vector{UInt8}(256)
 err = ccall(:gethostname, stdcall, Int32, (Ptr{UInt8}, UInt32), hn, length(hn))
 ```
 
 For more information, please see the [LLVM Language Reference](http://llvm.org/docs/LangRef.html#calling-conventions).
 
+There is one additional special calling convention `llvmcall`,
+which allows inserting calls to LLVM intrinsics directly.
+This can be especially useful when targeting unusual platforms such as GPGPUs.
+For example, for [CUDA](http://llvm.org/docs/NVPTXUsage.html), we need to be able to read the thread index:
+
+```julia
+ccall("llvm.nvvm.read.ptx.sreg.tid.x", llvmcall, Int32, ())
+```
+
+As with any `ccall`, it is essential to get the argument signature exactly correct.
+Also, note that there is no compatibility layer that ensures the intrinsic makes
+sense and works on the current target,
+unlike the equivalent Julia functions exposed by `Core.Intrinsics`.
+
 ## Accessing Global Variables
 
 Global variables exported by native libraries can be accessed by name using the [`cglobal()`](@ref)
@@ -829,7 +893,7 @@ function. The arguments to [`cglobal()`](@ref) are a symbol specification identi
 by [`ccall`](@ref), and a type describing the value stored in the variable:
 
 ```julia
-julia> cglobal((:errno,:libc), Int32)
+julia> cglobal((:errno, :libc), Int32)
 Ptr{Int32} @0x00007f418d0816b8
 ```
 
@@ -891,7 +955,7 @@ The callback you pass to C should only execute a [`ccall`](@ref) to `:uv_async_s
 `cb.handle` as the argument, taking care to avoid any allocations or other interactions with the
 Julia runtime.
 
-Note that events may be coalesced, so multiple calls to uv_async_send may result in a single wakeup
+Note that events may be coalesced, so multiple calls to `uv_async_send` may result in a single wakeup
 notification to the condition.
 
 ## More About Callbacks
diff --git a/src/anticodegen.c b/src/anticodegen.c
index 552ae552e5911..03e1178ea45d8 100644
--- a/src/anticodegen.c
+++ b/src/anticodegen.c
@@ -49,3 +49,8 @@ void jl_generate_fptr(jl_method_instance_t *li)
     li->fptr = (jl_fptr_t)&jl_interpret_call;
     li->jlcall_api = 4;
 }
+
+JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
+{
+    return 0;
+}
diff --git a/src/ast.c b/src/ast.c
index c1cc0084151d4..f895029980991 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -45,7 +45,7 @@ jl_sym_t *new_sym;     jl_sym_t *using_sym;
 jl_sym_t *const_sym;   jl_sym_t *thunk_sym;
 jl_sym_t *anonymous_sym;  jl_sym_t *underscore_sym;
 jl_sym_t *abstracttype_sym; jl_sym_t *bitstype_sym;
-jl_sym_t *compositetype_sym;
+jl_sym_t *compositetype_sym; jl_sym_t *foreigncall_sym;
 jl_sym_t *global_sym; jl_sym_t *list_sym;
 jl_sym_t *dot_sym;    jl_sym_t *newvar_sym;
 jl_sym_t *boundscheck_sym; jl_sym_t *inbounds_sym;
@@ -358,6 +358,7 @@ void jl_init_frontend(void)
     empty_sym = jl_symbol("");
     call_sym = jl_symbol("call");
     invoke_sym = jl_symbol("invoke");
+    foreigncall_sym = jl_symbol("foreigncall");
     quote_sym = jl_symbol("quote");
     inert_sym = jl_symbol("inert");
     top_sym = jl_symbol("top");
diff --git a/src/ccall.cpp b/src/ccall.cpp
index f94926e7aa6fb..636e481a1d3ba 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -396,46 +396,28 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
   typedef ABI_LLVMLayout DefaultAbiState;
 #endif
 
-
+// basic type widening and cast conversions
 static Value *llvm_type_rewrite(
-        Value *v, Type *from_type, Type *target_type,
-        bool tojulia, /* only matters if byref is set (declares the direction of the byref attribute) */
-        bool byref, /* only applies to arguments, set false for return values -- effectively the same as jl_cgval_t.ispointer() */
+        Value *v, Type *target_type,
         bool issigned, /* determines whether an integer value should be zero or sign extended */
         jl_codectx_t *ctx)
 {
-    if (v->getType() == T_void)
-        return UndefValue::get(target_type); // convert undef (unreachable) -> undef (target_type)
-
-    if (byref) {
-        if (tojulia) {
-            Type *ptarget_type = PointerType::get(target_type, 0);
-            if (v->getType() != ptarget_type)
-                v = builder.CreatePointerCast(v, ptarget_type);
-            return builder.CreateAlignedLoad(v, 1); // unknown alignment from C
-        }
-        else {
-            // julia_to_native should already have done the alloca and store
-            if (v->getType() != target_type)
-                v = builder.CreatePointerCast(v, target_type);
-            return v;
-        }
-    }
-    assert(v->getType() == from_type);
-
-    if (target_type == from_type) {
+    Type *from_type = v->getType();
+    if (target_type == from_type)
         return v;
-    }
+
+    if (from_type == T_void || isa<UndefValue>(v))
+        return UndefValue::get(target_type); // convert undef (unreachable) -> undef (target_type)
 
     assert(from_type->isPointerTy() == target_type->isPointerTy()); // expect that all ABIs consider all pointers to be equivalent
-    if (target_type->isPointerTy()) {
-        return builder.CreatePointerCast(v, target_type);
-    }
+    if (target_type->isPointerTy())
+        return emit_bitcast(v, target_type);
 
     // simple integer and float widening & conversion cases
-    if (from_type->getPrimitiveSizeInBits() > 0 && target_type->getPrimitiveSizeInBits() == from_type->getPrimitiveSizeInBits()) {
+    if (from_type->getPrimitiveSizeInBits() > 0 &&
+            target_type->getPrimitiveSizeInBits() == from_type->getPrimitiveSizeInBits())
         return emit_bitcast(v, target_type);
-    }
+
     if (target_type->isFloatingPointTy() && from_type->isFloatingPointTy()) {
         if (target_type->getPrimitiveSizeInBits() > from_type->getPrimitiveSizeInBits())
             return builder.CreateFPExt(v, target_type);
@@ -444,6 +426,7 @@ static Value *llvm_type_rewrite(
         else
             return v;
     }
+
     if (target_type->isIntegerTy() && from_type->isIntegerTy()) {
         if (issigned)
             return builder.CreateSExtOrTrunc(v, target_type);
@@ -465,11 +448,11 @@ static Value *llvm_type_rewrite(
 #endif
     if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
         to = emit_static_alloca(target_type, ctx);
-        from = builder.CreatePointerCast(to, from_type->getPointerTo());
+        from = emit_bitcast(to, from_type->getPointerTo());
     }
     else {
         from = emit_static_alloca(from_type, ctx);
-        to = builder.CreatePointerCast(from, target_type->getPointerTo());
+        to = emit_bitcast(from, target_type->getPointerTo());
     }
     builder.CreateStore(v, from);
     return builder.CreateLoad(to);
@@ -477,43 +460,24 @@ static Value *llvm_type_rewrite(
 
 // --- argument passing and scratch space utilities ---
 
-// Emit code to convert argument to form expected by C ABI
-// to = desired LLVM type
-// jlto = Julia type of formal argument
-// jvinfo = value of actual argument
-static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl_cgval_t &jvinfo,
-                              bool addressOf, bool byRef,
-                              bool tojulia, int argn, jl_codectx_t *ctx,
-                              bool *needStackRestore)
+static Value *runtime_apply_type(jl_value_t *ty, jl_unionall_t *unionall, jl_codectx_t *ctx)
 {
-    // We're passing Any
-    if (toboxed) {
-        assert(!addressOf && !byRef); // don't expect any ABI to pass pointers by pointer
-        return boxed(jvinfo, ctx);
-    }
-    assert(jl_is_leaf_type(jlto));
-
-    // TODO: Tuple arguments are currently undefined behavior, for defining the calling convention that they match to.
-    // XXX: However, they are used in the llvmcall test, so I guess it'll have to stay.
-    //if (jl_is_tuple(jlto) || jl_is_tuple_type(jlto)) {
-    //    emit_error("ccall: unimplemented: unboxed tuple argument type", ctx);
-    //    return UndefValue::get(to);
-    //}
+    // box if concrete type was not statically known
+    Value *args[3];
+    args[0] = literal_pointer_val(ty);
+    args[1] = literal_pointer_val((jl_value_t*)ctx->linfo->def->sig);
+    args[2] = builder.CreateInBoundsGEP(
+            LLVM37_param(T_pjlvalue)
+            emit_bitcast(ctx->spvals_ptr, T_ppjlvalue),
+            ConstantInt::get(T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)));
+    return builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
+}
 
-    jl_value_t *ety = jlto;
-    if (addressOf) {
-        if (!jl_is_cpointer_type(jlto)) {
-            emit_error("ccall: & on argument was not matched by Ptr{T} argument type", ctx);
-            return UndefValue::get(T_void);
-        }
-        ety = jl_tparam0(jlto);
-        if (jlto == (jl_value_t*)jl_voidpointer_type)
-            ety = jvinfo.typ; // skip the type-check
-        assert(to->isPointerTy());
-    }
-    if (jvinfo.typ != ety && ety != (jl_value_t*)jl_any_type) {
-        if (!addressOf && ety == (jl_value_t*)jl_voidpointer_type) {
-            // allow a bit more flexibility for what can be passed to (void*) due to Ref{T} conversion behavior below
+static void typeassert_input(const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn, bool addressOf, jl_codectx_t *ctx)
+{
+    if (jlto != (jl_value_t*)jl_any_type && !jl_subtype(jvinfo.typ, jlto)) {
+        if (!addressOf && jlto == (jl_value_t*)jl_voidpointer_type) {
+            // allow a bit more flexibility for what can be passed to (void*) due to Ref{T} conversion behavior in input
             if (!jl_is_cpointer_type(jvinfo.typ)) {
                 // emit a typecheck, if not statically known to be correct
                 std::stringstream msg;
@@ -527,14 +491,55 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl
             std::stringstream msg;
             msg << "ccall argument ";
             msg << argn;
-            emit_typecheck(jvinfo, ety, msg.str(), ctx);
+            if (!jlto_env || !jl_has_typevar_from_unionall(jlto, jlto_env)) {
+                emit_typecheck(jvinfo, jlto, msg.str(), ctx);
+            }
+            else {
+                jl_cgval_t jlto_runtime = mark_julia_type(runtime_apply_type(jlto, jlto_env, ctx), true, jl_any_type, ctx);
+                Value *vx = boxed(jvinfo, ctx);
+                Value *istype = builder.
+                    CreateICmpNE(
+#if JL_LLVM_VERSION >= 30700
+                                 builder.CreateCall(prepare_call(jlisa_func), { vx, boxed(jlto_runtime, ctx) }),
+#else
+                                 builder.CreateCall2(prepare_call(jlisa_func), vx, boxed(jlto_runtime, ctx)),
+#endif
+                                 ConstantInt::get(T_int32, 0));
+                BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx->f);
+                BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "pass", ctx->f);
+                builder.CreateCondBr(istype, passBB, failBB);
+
+                builder.SetInsertPoint(failBB);
+                emit_type_error(mark_julia_type(vx, true, jl_any_type, ctx), boxed(jlto_runtime, ctx), msg.str(), ctx);
+                builder.CreateUnreachable();
+
+                builder.SetInsertPoint(passBB);
+            }
         }
     }
+}
+
+static Value *julia_to_address(Type *to, jl_value_t *jlto, jl_unionall_t *jlto_env, const jl_cgval_t &jvinfo,
+                               int argn, jl_codectx_t *ctx, bool *needStackRestore)
+{
+    assert(jl_is_datatype(jlto) && julia_struct_has_layout((jl_datatype_t*)jlto, jlto_env));
+
+    if (!jl_is_cpointer_type(jlto) || !to->isPointerTy()) {
+        emit_error("ccall: & on argument was not matched by Ptr{T} argument type", ctx);
+        return UndefValue::get(to);
+    }
 
-    if (!addressOf && !byRef)
-        return emit_unbox(to, jvinfo, ety);
+    jl_value_t *ety;
+    if (jlto == (jl_value_t*)jl_voidpointer_type) {
+        ety = jvinfo.typ; // skip the type-check
+    }
+    else {
+        ety = jl_tparam0(jlto);
+        typeassert_input(jvinfo, ety, jlto_env, argn, true, ctx);
+    }
+    assert(to->isPointerTy());
 
-    if (addressOf && jvinfo.isboxed) {
+    if (jvinfo.isboxed) {
         if (!jl_is_abstracttype(ety)) {
             if (jl_is_mutable_datatype(ety)) {
                 // no copy, just reference the data field
@@ -544,7 +549,7 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl
                 // yes copy
                 Value *nbytes;
                 AllocaInst *ai;
-                if (jl_is_leaf_type(ety)) {
+                if (jl_is_leaf_type(ety) || jl_is_bitstype(ety)) {
                     int nb = jl_datatype_size(ety);
                     nbytes = ConstantInt::get(T_int32, nb);
                     ai = emit_static_alloca(T_int8, nb, ctx);
@@ -555,8 +560,7 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl
                     *needStackRestore = true;
                 }
                 ai->setAlignment(16);
-                prepare_call(
-                    builder.CreateMemCpy(ai, data_pointer(jvinfo, ctx, T_pint8), nbytes, sizeof(void*))->getCalledValue()); // minimum gc-alignment in julia is pointer size
+                builder.CreateMemCpy(ai, data_pointer(jvinfo, ctx, T_pint8), nbytes, sizeof(void*)); // minimum gc-alignment in julia is pointer size
                 return emit_bitcast(ai, to);
             }
         }
@@ -575,7 +579,7 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl
         Value *nbytes = emit_datatype_size(jvt);
         AllocaInst *ai = builder.CreateAlloca(T_int8, nbytes);
         ai->setAlignment(16);
-        prepare_call(builder.CreateMemCpy(ai, data_pointer(jvinfo, ctx, T_pint8), nbytes, sizeof(void*))->getCalledValue()); // minimum gc-alignment in julia is pointer size
+        builder.CreateMemCpy(ai, data_pointer(jvinfo, ctx, T_pint8), nbytes, sizeof(void*)); // minimum gc-alignment in julia is pointer size
         Value *p2 = emit_bitcast(ai, to);
         builder.CreateBr(afterBB);
         builder.SetInsertPoint(afterBB);
@@ -585,18 +589,57 @@ static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, const jl
         return p;
     }
 
+    Type *slottype = julia_struct_to_llvm(jvinfo.typ, NULL, NULL);
+    // pass the address of an alloca'd thing, not a box
+    // since those are immutable.
+    Value *slot = emit_static_alloca(slottype, ctx);
+    if (!jvinfo.ispointer()) {
+        builder.CreateStore(emit_unbox(slottype, jvinfo, ety), slot);
+    }
+    else {
+        builder.CreateMemCpy(slot,
+                             data_pointer(jvinfo, ctx, slot->getType()),
+                             (uint64_t)jl_datatype_size(ety),
+                             (uint64_t)((jl_datatype_t*)ety)->layout->alignment);
+        mark_gc_use(jvinfo);
+    }
+    if (slot->getType() != to)
+        slot = emit_bitcast(slot, to);
+    return slot;
+}
+
+
+// Emit code to convert argument to form expected by C ABI
+// to = desired LLVM type
+// jlto = Julia type of formal argument
+// jvinfo = value of actual argument
+static Value *julia_to_native(Type *to, bool toboxed, jl_value_t *jlto, jl_unionall_t *jlto_env,
+                              const jl_cgval_t &jvinfo,
+                              bool byRef, int argn, jl_codectx_t *ctx,
+                              bool *needStackRestore)
+{
+    // We're passing Any
+    if (toboxed) {
+        assert(!byRef); // don't expect any ABI to pass pointers by pointer
+        return boxed(jvinfo, ctx);
+    }
+    assert(jl_is_datatype(jlto) && julia_struct_has_layout((jl_datatype_t*)jlto, jlto_env));
+
+    typeassert_input(jvinfo, jlto, jlto_env, argn, false, ctx);
+    if (!byRef)
+        return emit_unbox(to, jvinfo, jlto);
+
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
-    if (addressOf)
-        to = to->getContainedType(0);
     Value *slot = emit_static_alloca(to, ctx);
     if (!jvinfo.ispointer()) {
-        builder.CreateStore(emit_unbox(to, jvinfo, ety), slot);
+        builder.CreateStore(emit_unbox(to, jvinfo, jlto), slot);
     }
     else {
-        prepare_call(builder.CreateMemCpy(slot, data_pointer(jvinfo, ctx, slot->getType()),
-                    (uint64_t)jl_datatype_size(ety),
-                    (uint64_t)((jl_datatype_t*)ety)->layout->alignment)->getCalledValue());
+        builder.CreateMemCpy(slot,
+                             data_pointer(jvinfo, ctx, slot->getType()),
+                             (uint64_t)jl_datatype_size(jlto),
+                             (uint64_t)((jl_datatype_t*)jlto)->layout->alignment);
         mark_gc_use(jvinfo);
     }
     return slot;
@@ -607,15 +650,18 @@ typedef struct {
     void (*fptr)(void);     // if the argument is a constant pointer
     const char *f_name;   // if the symbol name is known
     const char *f_lib;    // if a library name is specified
+    jl_value_t *gcroot;
 } native_sym_arg_t;
 
 // --- parse :sym or (:sym, :lib) argument into address info ---
-static native_sym_arg_t interpret_symbol_arg(jl_value_t *arg, jl_codectx_t *ctx, const char *fname)
+static void interpret_symbol_arg(native_sym_arg_t &out, jl_value_t *arg, jl_codectx_t *ctx, const char *fname, bool llvmcall)
 {
-    jl_value_t *ptr = NULL;
-    Value *jl_ptr=NULL;
+    Value *&jl_ptr = out.jl_ptr;
+    void (*&fptr)(void) = out.fptr;
+    const char *&f_name = out.f_name;
+    const char *&f_lib = out.f_lib;
 
-    ptr = static_eval(arg, ctx, true);
+    jl_value_t *ptr = static_eval(arg, ctx, true);
     if (ptr == NULL) {
         jl_value_t *ptr_ty = expr_type(arg, ctx);
         jl_cgval_t arg1 = emit_expr(arg, ctx);
@@ -629,38 +675,38 @@ static native_sym_arg_t interpret_symbol_arg(jl_value_t *arg, jl_codectx_t *ctx,
         arg1 = remark_julia_type(arg1, (jl_value_t*)jl_voidpointer_type);
         jl_ptr = emit_unbox(T_size, arg1, (jl_value_t*)jl_voidpointer_type);
     }
-
-    void (*fptr)(void) = NULL;
-    const char *f_name=NULL, *f_lib=NULL;
-    jl_value_t *t0 = NULL, *t1 = NULL;
-    JL_GC_PUSH3(&ptr, &t0, &t1);
-    if (ptr != NULL) {
-        if (jl_is_tuple(ptr) && jl_nfields(ptr)==1) {
-            ptr = jl_fieldref(ptr,0);
+    else {
+        out.gcroot = ptr;
+        if (jl_is_tuple(ptr) && jl_nfields(ptr) == 1) {
+            ptr = jl_fieldref(ptr, 0);
         }
+
         if (jl_is_symbol(ptr))
             f_name = jl_symbol_name((jl_sym_t*)ptr);
         else if (jl_is_string(ptr))
             f_name = jl_string_data(ptr);
+
         if (f_name != NULL) {
             // just symbol, default to JuliaDLHandle
             // will look in process symbol table
 #ifdef _OS_WINDOWS_
-            f_lib = jl_dlfind_win32(f_name);
+            if (!llvmcall)
+                f_lib = jl_dlfind_win32(f_name);
 #endif
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
             fptr = *(void(**)(void))jl_data_ptr(ptr);
         }
-        else if (jl_is_tuple(ptr) && jl_nfields(ptr)>1) {
-            jl_value_t *t0 = jl_fieldref(ptr,0);
-            jl_value_t *t1 = jl_fieldref(ptr,1);
+        else if (jl_is_tuple(ptr) && jl_nfields(ptr) > 1) {
+            jl_value_t *t0 = jl_fieldref(ptr, 0);
             if (jl_is_symbol(t0))
                 f_name = jl_symbol_name((jl_sym_t*)t0);
             else if (jl_is_string(t0))
                 f_name = jl_string_data(t0);
             else
                 JL_TYPECHKS(fname, symbol, t0);
+
+            jl_value_t *t1 = jl_fieldref(ptr, 1);
             if (jl_is_symbol(t1))
                 f_lib = jl_symbol_name((jl_sym_t*)t1);
             else if (jl_is_string(t1))
@@ -672,13 +718,6 @@ static native_sym_arg_t interpret_symbol_arg(jl_value_t *arg, jl_codectx_t *ctx,
             JL_TYPECHKS(fname, pointer, ptr);
         }
     }
-    JL_GC_POP();
-    native_sym_arg_t r;
-    r.jl_ptr = jl_ptr;
-    r.fptr = fptr;
-    r.f_name = f_name;
-    r.f_lib = f_lib;
-    return r;
 }
 
 
@@ -706,18 +745,24 @@ static jl_value_t* try_eval(jl_value_t *ex, jl_codectx_t *ctx, const char *failu
 
 // --- code generator for cglobal ---
 
+static jl_cgval_t emit_runtime_call(JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs, jl_codectx_t *ctx);
+
 static jl_cgval_t emit_cglobal(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
 {
     JL_NARGS(cglobal, 1, 2);
-    jl_value_t *rt=NULL;
+    jl_value_t *rt = NULL;
     Value *res;
-    JL_GC_PUSH1(&rt);
+    native_sym_arg_t sym = {};
+    JL_GC_PUSH2(&rt, &sym.gcroot);
 
     if (nargs == 2) {
-        rt = try_eval(args[2], ctx, "error interpreting cglobal pointer type");
+        rt = static_eval(args[2], ctx, true, true);
         if (rt == NULL) {
             JL_GC_POP();
-            return jl_cgval_t();
+            jl_cgval_t argv[2];
+            argv[0] = emit_expr(args[0], ctx);
+            argv[1] = emit_expr(args[1], ctx);
+            return emit_runtime_call(JL_I::cglobal, argv, nargs, ctx);
         }
 
         JL_TYPECHK(cglobal, type, rt);
@@ -727,9 +772,10 @@ static jl_cgval_t emit_cglobal(jl_value_t **args, size_t nargs, jl_codectx_t *ct
         rt = (jl_value_t*)jl_voidpointer_type;
     }
     Type *lrt = julia_type_to_llvm(rt);
-    if (lrt == NULL) lrt = T_pint8;
+    if (lrt == NULL)
+        lrt = T_pint8;
 
-    native_sym_arg_t sym = interpret_symbol_arg(args[1], ctx, "cglobal");
+    interpret_symbol_arg(sym, args[1], ctx, "cglobal", false);
 
     if (sym.jl_ptr != NULL) {
         res = builder.CreateIntToPtr(sym.jl_ptr, lrt);
@@ -984,10 +1030,10 @@ static jl_cgval_t emit_llvmcall(jl_value_t **args, size_t nargs, jl_codectx_t *c
         jl_cgval_t &arg = argv[i];
         arg = emit_expr(argi, ctx);
 
-        Value *v = julia_to_native(t, toboxed, tti, arg, false, false, false, i, ctx, NULL);
-        // make sure args are rooted
+        Value *v = julia_to_native(t, toboxed, tti, NULL, arg, false, i, ctx, NULL);
         bool issigned = jl_signed_type && jl_subtype(tti, (jl_value_t*)jl_signed_type);
-        argvals[i] = llvm_type_rewrite(v, t, t, false, false, issigned, ctx);
+        // make sure args are rooted
+        argvals[i] = llvm_type_rewrite(v, t, issigned, ctx);
     }
 
     Function *f;
@@ -1133,49 +1179,90 @@ static jl_cgval_t emit_llvmcall(jl_value_t **args, size_t nargs, jl_codectx_t *c
 
 // --- code generator for ccall itself ---
 
-static jl_cgval_t mark_or_box_ccall_result(Value *result, bool isboxed, jl_value_t *rt_expr, jl_value_t *rt, bool static_rt, jl_codectx_t *ctx)
+static jl_cgval_t mark_or_box_ccall_result(Value *result, bool isboxed, jl_value_t *rt, jl_unionall_t *unionall, bool static_rt, jl_codectx_t *ctx)
 {
     if (!static_rt) {
-        // box if concrete type was not statically known
-        assert(rt == (jl_value_t*)jl_voidpointer_type);
-        Value *runtime_bt = boxed(emit_expr(rt_expr, ctx), ctx);
-        int nb = sizeof(void*);
-        // TODO: can this be tighter than tbaa_value?
-        return mark_julia_type(
-            init_bits_value(emit_allocobj(ctx, nb, runtime_bt), result, tbaa_value),
-            true, (jl_value_t*)jl_pointer_type, ctx);
+        assert(!isboxed && ctx->spvals_ptr && unionall && jl_is_datatype(rt));
+        Value *runtime_dt = runtime_apply_type(rt, unionall, ctx);
+        // TODO: is this leaf check actually necessary, or is it structurally guaranteed?
+        emit_leafcheck(runtime_dt, "ccall: return type must be a leaf DataType", ctx);
+#if JL_LLVM_VERSION >= 30600
+        const DataLayout &DL = jl_ExecutionEngine->getDataLayout();
+#else
+        const DataLayout &DL = *jl_ExecutionEngine->getDataLayout();
+#endif
+        unsigned nb = DL.getTypeStoreSize(result->getType());
+        MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
+        Value *strct = emit_allocobj(ctx, nb, runtime_dt);
+        init_bits_value(strct, result, tbaa);
+        return mark_julia_type(strct, true, rt, ctx);
     }
     return mark_julia_type(result, isboxed, rt, ctx);
 }
 
-static std::string generate_func_sig(
-        Type **lrt, // input parameter of the llvm return type (from julia_struct_to_llvm)
-        Type **prt, // out parameter of the llvm return type for the function signature
-        int &sret, // out parameter for indicating whether return value has been moved to the first argument position
-        std::vector<Type *> &fargt, // vector of llvm output types (julia_struct_to_llvm) for arguments (vararg is the last item, if applicable)
-        std::vector<bool> &fargt_isboxed, // vector of whether the llvm output types is boxed for each argument (vararg is the last item, if applicable)
-        std::vector<Type *> &fargt_sig, // vector of ABI coercion types for call signature
-        Type *&fargt_vasig, // ABI coercion type for vararg list
-        std::vector<bool> &byRefList, // vector of "byref" parameters (vararg is the last item, if applicable)
-        AttributeSet &attributes, // vector of function call site attributes (vararg is the last item, if applicable)
-        jl_value_t *rt, // julia return type
-        jl_svec_t *tt, // tuple of julia argument types
-        size_t nargs) // number of actual arguments (can be different from the size of tt when varargs)
+class function_sig_t {
+public:
+    std::vector<Type*> fargt; // vector of llvm output types (julia_struct_to_llvm) for arguments (vararg is the last item, if applicable)
+    std::vector<Type*> fargt_sig; // vector of ABI coercion types for call signature
+    std::vector<bool> fargt_isboxed; // vector of whether the llvm output type is a Julia-box for each argument (vararg is the last item, if applicable)
+    Type *fargt_vasig = NULL; // ABI coercion type for vararg list
+    std::vector<bool> byRefList; // vector of "byref" parameters (vararg is the last item, if applicable)
+    AttributeSet attributes; // vector of function call site attributes (vararg is the last item, if applicable)
+    Type *lrt; // input parameter of the llvm return type (from julia_struct_to_llvm)
+    bool retboxed; // input parameter indicating whether lrt is jl_value_t*
+    Type *prt; // out parameter of the llvm return type for the function signature
+    int sret; // out parameter for indicating whether return value has been moved to the first argument position
+    std::string err_msg;
+    CallingConv::ID cc; // calling convention ABI
+    bool llvmcall;
+    FunctionType *functype;
+    jl_svec_t *at; // svec of julia argument types
+    jl_value_t *rt; // julia return type
+    jl_unionall_t *unionall_env; // UnionAll environment for `at` and `rt`
+    size_t nargs; // number of actual arguments (can be different from the size of at when varargs)
+    size_t isVa;
+
+    function_sig_t(Type *lrt, jl_value_t *rt, bool retboxed, jl_svec_t *at, jl_unionall_t *unionall_env, size_t nargs, size_t isVa, CallingConv::ID cc, bool llvmcall)
+      : fargt_vasig(NULL), lrt(lrt), retboxed(retboxed),
+        prt(NULL), sret(0), cc(cc), llvmcall(llvmcall),
+        functype(NULL), at(at), rt(rt), unionall_env(unionall_env),
+        nargs(nargs), isVa(isVa)
+    {
+        err_msg = generate_func_sig();
+        if (err_msg.empty())
+            functype = FunctionType::get(sret ? T_void : prt, fargt_sig, isVa);
+    }
+
+    jl_cgval_t emit_a_ccall(
+            const native_sym_arg_t &symarg,
+            size_t nargt,
+            std::vector<bool> &addressOf,
+            jl_cgval_t *argv,
+            SmallVector<Value*, 16> &gc_uses,
+            bool static_rt,
+            jl_codectx_t *ctx);
+
+private:
+std::string generate_func_sig()
 {
-    size_t nargt = jl_svec_len(tt);
+    size_t nargt = jl_svec_len(at);
     assert(rt && !jl_is_abstract_ref_type(rt));
 
     std::vector<AttributeSet> paramattrs;
-    std::unique_ptr<AbiLayout> abi(new DefaultAbiState());
+    std::unique_ptr<AbiLayout> abi;
+    if (llvmcall)
+        abi.reset(new ABI_LLVMLayout());
+    else
+        abi.reset(new DefaultAbiState());
     sret = 0;
 
-    if (type_is_ghost(*lrt)) {
-        *prt = *lrt = T_void;
+    if (type_is_ghost(lrt)) {
+        prt = lrt = T_void;
         abi->use_sret(jl_void_type);
     }
     else {
-        if (!jl_is_datatype(rt) || ((jl_datatype_t*)rt)->layout == NULL || jl_is_cpointer_type(rt) || jl_is_array_type(rt)) {
-            *prt = *lrt; // passed as pointer
+        if (!jl_is_datatype(rt) || ((jl_datatype_t*)rt)->layout == NULL || jl_is_cpointer_type(rt) || jl_is_array_type(rt) || retboxed) {
+            prt = lrt; // passed as pointer
             abi->use_sret(jl_voidpointer_type);
         }
         else if (abi->use_sret((jl_datatype_t*)rt)) {
@@ -1185,22 +1272,22 @@ static std::string generate_func_sig(
 #endif
             retattrs.addAttribute(Attribute::NoAlias);
             paramattrs.push_back(AttributeSet::get(jl_LLVMContext, 1, retattrs));
-            fargt_sig.push_back(PointerType::get(*lrt, 0));
+            fargt_sig.push_back(PointerType::get(lrt, 0));
             sret = 1;
-            *prt = *lrt;
+            prt = lrt;
         }
         else {
-            *prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true);
-            if (*prt == NULL)
-                *prt = *lrt;
+            prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true);
+            if (prt == NULL)
+                prt = lrt;
         }
     }
 
     size_t i;
     bool current_isVa = false;
-    for (i = 0; i < nargt;) {
+    for (i = 0; i < nargt; ) {
         AttrBuilder ab;
-        jl_value_t *tti = jl_svecref(tt,i);
+        jl_value_t *tti = jl_svecref(at, i);
         if (jl_is_vararg_type(tti)) {
             current_isVa = true;
             tti = jl_unwrap_vararg(tti);
@@ -1215,8 +1302,6 @@ static std::string generate_func_sig(
             isboxed = false;
         }
         else {
-            if (jl_is_cpointer_type(tti) && jl_is_typevar(jl_tparam0(tti)))
-                jl_error("ccall: argument type Ptr should have an element type, not Ptr{T}");
             if (jl_is_bitstype(tti)) {
                 // see pull req #978. need to annotate signext/zeroext for
                 // small integer arguments.
@@ -1229,11 +1314,11 @@ static std::string generate_func_sig(
                 }
             }
 
-            t = julia_struct_to_llvm(tti, &isboxed);
+            t = julia_struct_to_llvm(tti, unionall_env, &isboxed);
             if (t == NULL || t == T_void) {
                 std::stringstream msg;
                 msg << "ccall: the type of argument ";
-                msg << i+1;
+                msg << (i + 1);
                 msg << " doesn't correspond to a C type";
                 return msg.str();
             }
@@ -1287,202 +1372,195 @@ static std::string generate_func_sig(
     }
     return "";
 }
+};
 
+static std::pair<CallingConv::ID, bool> convert_cconv(jl_sym_t *lhd)
+{
+    // check for calling convention specifier
+    if (lhd == jl_symbol("stdcall")) {
+        return std::make_pair(CallingConv::X86_StdCall, false);
+    }
+    else if (lhd == jl_symbol("cdecl")) {
+        return std::make_pair(CallingConv::C, false);
+    }
+    else if (lhd == jl_symbol("fastcall")) {
+        return std::make_pair(CallingConv::X86_FastCall, false);
+    }
+    else if (lhd == jl_symbol("thiscall")) {
+        return std::make_pair(CallingConv::X86_ThisCall, false);
+    }
+    else if (lhd == jl_symbol("llvmcall")) {
+        return std::make_pair(CallingConv::C, true);
+    }
+    jl_errorf("ccall: invalid calling convention %s", jl_symbol_name(lhd));
+}
 
-// ccall(pointer, rettype, (argtypes...), args...)
-static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
+static const std::string verify_ccall_sig(size_t nargs, jl_value_t *&rt, jl_value_t *at,
+                                          jl_unionall_t *unionall_env, jl_svec_t *sparam_vals, const char *funcName,
+                                          size_t &nargt, bool &isVa, Type *&lrt, bool &retboxed, bool &static_rt)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    JL_NARGSV(ccall, 3);
-    jl_value_t *rt=NULL, *at=NULL;
-    JL_GC_PUSH2(&rt, &at);
-
-    native_sym_arg_t symarg = interpret_symbol_arg(args[1], ctx, "ccall");
-    Value *jl_ptr=NULL;
-    void (*fptr)(void) = NULL;
-    const char *f_name = NULL, *f_lib = NULL;
-    jl_ptr = symarg.jl_ptr;
-    fptr = symarg.fptr;
-    f_name = symarg.f_name;
-    f_lib = symarg.f_lib;
-    bool isVa = false;
+    assert(rt && !jl_is_abstract_ref_type(rt));
+    JL_TYPECHK(ccall, type, rt);
+    JL_TYPECHK(ccall, simplevector, at);
 
-    if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
-        emit_error("ccall: null function pointer", ctx);
-        JL_GC_POP();
-        return jl_cgval_t();
+    if (jl_is_array_type(rt)) {
+        // `Array` used as return type just returns a julia object reference
+        rt = (jl_value_t*)jl_any_type;
     }
 
-    jl_value_t *rtt_ = expr_type(args[2], ctx);
-    bool static_rt = true;  // is return type fully statically known?
-    if (jl_is_type_type(rtt_) && jl_is_leaf_type(jl_tparam0(rtt_))) {
-        rt = jl_tparam0(rtt_);
+    lrt = julia_struct_to_llvm(rt, unionall_env, &retboxed);
+    if (lrt == NULL)
+        return "ccall: return type doesn't correspond to a C type";
+
+    // is return type fully statically known?
+    if (unionall_env == NULL) {
+        static_rt = true;
     }
     else {
-        rt = try_eval(args[2], ctx, NULL);
-        if (rt == NULL) {
-            static_rt = false;
-            if (jl_is_type_type(rtt_)) {
-                if (jl_subtype(jl_tparam0(rtt_), (jl_value_t*)jl_pointer_type)) {
-                    // substitute Ptr{Void} for statically-unknown pointer type
-                    rt = (jl_value_t*)jl_voidpointer_type;
-                }
-                else if (jl_subtype(jl_tparam0(rtt_), (jl_value_t*)jl_array_type)) {
-                    // `Array` used as return type just returns a julia object reference
-                    rt = (jl_value_t*)jl_any_type;
-                    static_rt = true;
-                }
-                else if (jl_is_typevar(jl_tparam0(rtt_)) && jl_is_abstract_ref_type(((jl_tvar_t*)jl_tparam0(rtt_))->ub)) {
-                    // `Ref{T}` used as return type just returns T (from a jl_value_t*)
-                    rt = (jl_value_t*)jl_any_type;
-                    static_rt = true;
-                }
-            }
-            if (rt == NULL) {
-                if (jl_is_expr(args[2])) {
-                    jl_expr_t *rtexpr = (jl_expr_t*)args[2];
-                    if (rtexpr->head == call_sym && jl_expr_nargs(rtexpr) == 4 &&
-                        static_eval(jl_exprarg(rtexpr, 0), ctx, true, false) == jl_builtin_apply_type &&
-                        static_eval(jl_exprarg(rtexpr, 1), ctx, true, false) == (jl_value_t*)jl_array_type) {
-                        // `Array` used as return type just returns a julia object reference
-                        rt = (jl_value_t*)jl_any_type;
-                        static_rt = true;
-                    }
-                    else if (rtexpr->head == call_sym && jl_expr_nargs(rtexpr) == 3 &&
-                             static_eval(jl_exprarg(rtexpr, 0), ctx, true, false) == jl_builtin_apply_type &&
-                             static_eval(jl_exprarg(rtexpr, 1), ctx, true, false) == (jl_value_t*)jl_pointer_type) {
-                        // substitute Ptr{Void} for statically-unknown pointer type
-                        rt = (jl_value_t*)jl_voidpointer_type;
-                    }
-                    else if (rtexpr->head == call_sym && jl_expr_nargs(rtexpr) == 3 &&
-                             static_eval(jl_exprarg(rtexpr, 0), ctx, true, false) == jl_builtin_apply_type &&
-                             static_eval(jl_exprarg(rtexpr, 1), ctx, true, false) == (jl_value_t*)jl_ref_type) {
-                        // `Ref{T}` used as return type just returns T (from a jl_value_t*)
-                        rt = (jl_value_t*)jl_any_type;
-                        static_rt = true;
-                    }
-                }
-            }
-            if (rt == NULL) {
-                if (ptls->exception_in_transit &&
-                    jl_typeis(ptls->exception_in_transit,
-                              jl_undefvarerror_type) &&
-                    jl_is_symbol(args[2])) {
-                    std::string msg = "ccall return type undefined: " +
-                                      std::string(jl_symbol_name((jl_sym_t*)args[2]));
-                    emit_error(msg.c_str(), ctx);
-                    JL_GC_POP();
-                    return jl_cgval_t();
-                }
-                emit_error("error interpreting ccall return type", ctx);
-                JL_GC_POP();
-                return jl_cgval_t();
-            }
+        static_rt = retboxed || !jl_has_typevar_from_unionall(rt, unionall_env);
+        if (!static_rt && sparam_vals != NULL) {
+            rt = jl_instantiate_type_in_env(rt, unionall_env, jl_svec_data(sparam_vals));
+            // `rt` is gc-rooted by the caller
+            static_rt = true;
         }
     }
 
-    if (jl_is_svec(rt)) {
-        std::string msg = "in " + ctx->funcName +
-            ": ccall: missing return type";
-        jl_error(msg.c_str());
+    if (!retboxed && static_rt) {
+        if (!jl_is_leaf_type(rt)) {
+            if (jl_is_cpointer_type(rt))
+                return "ccall: return type Ptr should have an element type (not Ptr{_<:T})";
+            else if (rt != jl_bottom_type)
+                return "ccall: return type must be a leaf DataType";
+        }
     }
-    if (jl_is_cpointer_type(rt) && jl_is_typevar(jl_tparam0(rt)))
-        jl_error("ccall: return type Ptr should have an element type, not Ptr{_<:T}");
 
-    if (jl_is_abstract_ref_type(rt)) {
-        if (jl_tparam0(rt) == (jl_value_t*)jl_any_type)
-            jl_error("ccall: return type Ref{Any} is invalid. use Ptr{Any} instead.");
-        rt = (jl_value_t*)jl_any_type; // convert return type to jl_value_t*
-    }
+    nargt = jl_svec_len(at);
+    isVa = (nargt > 0 && jl_is_vararg_type(jl_svecref(at, nargt - 1)));
+    if ((!isVa && nargt    != (nargs - 2) / 2) ||
+        ( isVa && nargt - 1 > (nargs - 2) / 2))
+        return "ccall: wrong number of arguments to C function";
 
-    if (jl_is_array_type(rt)) {
-        // `Array` used as return type just returns a julia object reference
-        rt = (jl_value_t*)jl_any_type;
-    }
+    return "";
+}
 
-    JL_TYPECHK(ccall, type, rt);
-    bool retboxed;
-    Type *lrt = julia_struct_to_llvm(rt, &retboxed);
-    if (lrt == NULL) {
-        emit_error("ccall: return type doesn't correspond to a C type", ctx);
-        JL_GC_POP();
-        return jl_cgval_t();
+// Expr(:foreigncall, pointer, rettype, (argtypes...), args...)
+static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
+{
+    JL_NARGSV(ccall, 3);
+    args -= 1;
+    jl_value_t *rt = args[2];
+    jl_value_t *at = args[3];
+    native_sym_arg_t symarg = {};
+    JL_GC_PUSH3(&rt, &at, &symarg.gcroot);
+
+    CallingConv::ID cc = CallingConv::C;
+    bool llvmcall = false;
+    if (nargs % 2 == 0) {
+        jl_value_t *last = args[nargs];
+        JL_TYPECHK(ccall, expr, last);
+        std::tie(cc, llvmcall) = convert_cconv(((jl_expr_t*)last)->head);
+        nargs -= 1;
     }
 
-    at = try_eval(args[3], ctx, "error interpreting ccall argument tuple");
-    if (at == NULL) {
+    interpret_symbol_arg(symarg, args[1], ctx, "ccall", llvmcall);
+    Value *&jl_ptr = symarg.jl_ptr;
+    void (*&fptr)(void) = symarg.fptr;
+    const char *&f_name = symarg.f_name;
+    const char *&f_lib = symarg.f_lib;
+
+    if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
+        emit_error("ccall: null function pointer", ctx);
         JL_GC_POP();
         return jl_cgval_t();
     }
 
-    JL_TYPECHK(ccall, simplevector, at);
-    //JL_TYPECHK(ccall, type, at);
-    jl_svec_t *tt = (jl_svec_t*)at;
+    jl_unionall_t *unionall = (ctx->linfo->def && jl_is_unionall(ctx->linfo->def->sig))
+        ? (jl_unionall_t*)ctx->linfo->def->sig
+        : NULL;
 
-    // check for calling convention specifier
-    CallingConv::ID cc = CallingConv::C;
-    jl_value_t *last = args[nargs];
-    if (jl_is_expr(last)) {
-        jl_sym_t *lhd = ((jl_expr_t*)last)->head;
-        if (lhd == jl_symbol("stdcall")) {
-            cc = CallingConv::X86_StdCall;
-            nargs--;
-        }
-        else if (lhd == jl_symbol("cdecl")) {
-            cc = CallingConv::C;
-            nargs--;
-        }
-        else if (lhd == jl_symbol("fastcall")) {
-            cc = CallingConv::X86_FastCall;
-            nargs--;
-        }
-        else if (lhd == jl_symbol("thiscall")) {
-            cc = CallingConv::X86_ThisCall;
-            nargs--;
+    if (jl_is_abstract_ref_type(rt)) {
+        // emit verification that the tparam for Ref isn't Any or a TypeVar
+        jl_value_t *ref = jl_tparam0(rt);
+        bool always_error = false;
+        if (ref == (jl_value_t*)jl_any_type) {
+            always_error = true;
+        }
+        else if (jl_is_typevar(ref)) {
+            always_error = true;
+            if (unionall) {
+                int i;
+                jl_unionall_t *ua = unionall;
+                for (i = 0; jl_is_unionall(ua); i++) {
+                    if (ua->var == (jl_tvar_t*)ref) {
+                        jl_cgval_t runtime_sp = emit_sparam(i, ctx);
+                        if (runtime_sp.constant) {
+                            if (runtime_sp.constant != (jl_value_t*)jl_any_type)
+                                always_error = false;
+                        }
+                        else {
+                            Value *notany = builder.CreateICmpNE(
+                                    boxed(runtime_sp, ctx, false),
+                                    literal_pointer_val((jl_value_t*)jl_any_type));
+                            error_unless(notany, "ccall: return type Ref{Any} is invalid. use Ptr{Any} instead.", ctx);
+                            always_error = false;
+                        }
+                        break;
+                    }
+                }
+            }
         }
-    }
-
-    // some sanity checking and check whether there's a vararg
-    size_t i;
-    size_t nargt = jl_svec_len(tt);
-    for(i=0; i < nargt; i++) {
-        jl_value_t *tti = jl_svecref(tt,i);
-        if (jl_is_cpointer_type(tti) && jl_is_typevar(jl_tparam0(tti))) {
+        if (always_error) {
+            emit_error("ccall: return type Ref{Any} is invalid. use Ptr{Any} instead.", ctx);
             JL_GC_POP();
-            emit_error("ccall: argument type Ptr should have an element type, Ptr{T}",ctx);
             return jl_cgval_t();
         }
-        if (jl_is_vararg_type(tti))
-            isVa = true;
+        rt = (jl_value_t*)jl_any_type; // convert return type to jl_value_t*
     }
 
-    if ((!isVa && nargt  != (nargs - 2)/2) ||
-        ( isVa && nargt-1 > (nargs - 2)/2))
-        jl_error("ccall: wrong number of arguments to C function");
+    // some sanity checking and check whether there's a vararg
+    bool isVa;
+    size_t nargt;
+    Type *lrt;
+    bool retboxed;
+    bool static_rt;
+    std::string err = verify_ccall_sig(
+            /* inputs:  */
+            nargs, rt, at, unionall,
+            ctx->spvals_ptr == NULL ? ctx->linfo->sparam_vals : NULL,
+            ctx->funcName.c_str(),
+            /* outputs: */
+            nargt, isVa, lrt, retboxed, static_rt);
+    if (!err.empty()) {
+        emit_error(err, ctx);
+        JL_GC_POP();
+        return jl_cgval_t();
+    }
+    if (rt != args[2] && rt != (jl_value_t*)jl_any_type)
+        jl_add_method_root(ctx, rt);
 
     // some special functions
     if (fptr == (void(*)(void))&jl_array_ptr ||
         ((f_lib==NULL || (intptr_t)f_lib==2)
          && f_name && !strcmp(f_name,"jl_array_ptr"))) {
         assert(lrt->isPointerTy());
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt==1);
         jl_value_t *argi = args[4];
         assert(!(jl_is_expr(argi) && ((jl_expr_t*)argi)->head == amp_sym));
         jl_cgval_t ary = emit_expr(argi, ctx);
         JL_GC_POP();
         return mark_or_box_ccall_result(emit_bitcast(emit_arrayptr(ary, ctx), lrt),
-                                        retboxed, args[2], rt, static_rt, ctx);
+                                        retboxed, rt, unionall, static_rt, ctx);
     }
     if (fptr == (void(*)(void))&jl_value_ptr ||
         ((f_lib==NULL || (intptr_t)f_lib==2)
          && f_name && !strcmp(f_name,"jl_value_ptr"))) {
         assert(lrt->isPointerTy());
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt==1);
         jl_value_t *argi = args[4];
         bool addressOf = false;
-        jl_value_t *tti = jl_svecref(tt,0);
+        jl_value_t *tti = jl_svecref(at, 0);
         if (jl_is_expr(argi) && ((jl_expr_t*)argi)->head == amp_sym) {
             addressOf = true;
             argi = jl_exprarg(argi,0);
@@ -1498,7 +1576,7 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
             isboxed = true;
         }
         else {
-            largty = julia_struct_to_llvm(tti, &isboxed);
+            largty = julia_struct_to_llvm(tti, unionall, &isboxed);
         }
         if (isboxed) {
             ary = boxed(emit_expr(argi, ctx), ctx);
@@ -1509,13 +1587,13 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         }
         JL_GC_POP();
         return mark_or_box_ccall_result(emit_bitcast(ary, lrt),
-                                        retboxed, args[2], rt, static_rt, ctx);
+                                        retboxed, rt, unionall, static_rt, ctx);
     }
     if (JL_CPU_WAKE_NOOP &&
         (fptr == &jl_cpu_wake || ((!f_lib || (intptr_t)f_lib == 2) &&
                                   f_name && !strcmp(f_name, "jl_cpu_wake")))) {
         assert(lrt == T_void);
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt == 0);
         JL_GC_POP();
         return ghostValue(jl_void_type);
@@ -1524,7 +1602,7 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         ((!f_lib || (intptr_t)f_lib == 2) && f_name &&
          strcmp(f_name, "jl_gc_safepoint") == 0)) {
         assert(lrt == T_void);
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt == 0);
         JL_GC_POP();
         builder.CreateCall(prepare_call(gcroot_flush_func));
@@ -1545,18 +1623,18 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         ((!f_lib || (intptr_t)f_lib == 2) && f_name &&
          strcmp(f_name, "jl_get_ptls_states") == 0)) {
         assert(lrt == T_pint8);
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(
             emit_bitcast(ctx->ptlsStates, lrt),
-            retboxed, args[2], rt, static_rt, ctx);
+            retboxed, rt, unionall, static_rt, ctx);
     }
     if (fptr == &jl_sigatomic_begin ||
         ((!f_lib || (intptr_t)f_lib == 2) && f_name &&
          strcmp(f_name, "jl_sigatomic_begin") == 0)) {
         assert(lrt == T_void);
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt == 0);
         JL_GC_POP();
         builder.CreateCall(prepare_call(gcroot_flush_func));
@@ -1572,7 +1650,7 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         ((!f_lib || (intptr_t)f_lib == 2) && f_name &&
          strcmp(f_name, "jl_sigatomic_end") == 0)) {
         assert(lrt == T_void);
-        assert(!isVa);
+        assert(!isVa && !llvmcall);
         assert(nargt == 0);
         JL_GC_POP();
         builder.CreateCall(prepare_call(gcroot_flush_func));
@@ -1603,19 +1681,21 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         ((f_lib==NULL || (intptr_t)f_lib==2)
          && f_name && !strcmp(f_name, "jl_is_leaf_type"))) {
         assert(nargt == 1);
+        assert(!isVa && !llvmcall);
         jl_value_t *arg = args[4];
         jl_value_t *ty = expr_type(arg, ctx);
         if (jl_is_type_type(ty) && !jl_is_typevar(jl_tparam0(ty))) {
             int isleaf = jl_is_leaf_type(jl_tparam0(ty));
             JL_GC_POP();
             return mark_or_box_ccall_result(ConstantInt::get(T_int32, isleaf),
-                    false, args[2], rt, static_rt, ctx);
+                    false, rt, unionall, static_rt, ctx);
         }
     }
     if (fptr == (void(*)(void))&jl_function_ptr ||
         ((f_lib==NULL || (intptr_t)f_lib==2)
          && f_name && !strcmp(f_name, "jl_function_ptr"))) {
         assert(nargt == 3);
+        assert(!isVa && !llvmcall);
         jl_value_t *f = static_eval(args[4], ctx, false, false);
         jl_value_t *frt = expr_type(args[6], ctx);
         if (f && (jl_is_type_type((jl_value_t*)frt) && !jl_has_free_typevars(jl_tparam0(frt)))) {
@@ -1648,69 +1728,19 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
                     emit_expr(args[8], ctx);
                     JL_GC_POP();
                     return mark_or_box_ccall_result(emit_bitcast(llvmf, lrt),
-                                                    retboxed, args[2], rt, static_rt, ctx);
+                                                    retboxed, rt, unionall, static_rt, ctx);
                 }
             }
         }
     }
 
-    // save place before arguments, for possible insertion of temp arg
-    // area saving code.
-    Value *stacksave=NULL;
-    BasicBlock::InstListType &instList = builder.GetInsertBlock()->getInstList();
-    Instruction *savespot;
-    if (instList.empty()) {
-        savespot = NULL;
-    }
-    else {
-        // hey C++, there's this thing called pointers...
-        Instruction &_savespot = builder.GetInsertBlock()->back();
-        savespot = &_savespot;
-    }
-
-    std::vector<Type*> fargt(0);
-    std::vector<Type*> fargt_sig(0);
-    std::vector<bool> fargt_isboxed(0);
-    Type *fargt_vasig = NULL;
-    std::vector<bool> byRefList(0);
-    AttributeSet attrs;
-    Type *prt = NULL;
-    int sret = 0;
-    std::string err_msg = generate_func_sig(&lrt, &prt, sret, fargt, fargt_isboxed, fargt_sig, fargt_vasig,
-                                            byRefList, attrs, rt, tt, (nargs - 3)/2);
-    if (!err_msg.empty()) {
-        JL_GC_POP();
-        emit_error(err_msg,ctx);
-        return jl_cgval_t();
-    }
-
     // emit arguments
-    Value **argvals = (Value**) alloca(((nargs - 3) / 2 + sret) * sizeof(Value*));
-    Value *result = NULL;
-    bool needStackRestore = false;
-
-    // First, if the ABI requires us to provide the space for the return
-    // argument, allocate the box and store that as the first argument type
-    bool sretboxed = false;
-    if (sret) {
-        jl_cgval_t sret_val = emit_new_struct(rt,1,NULL,ctx); // TODO: is it valid to be creating an incomplete type this way?
-        assert(sret_val.typ != NULL && "Type was not concrete");
-        if (!sret_val.ispointer()) {
-            Value *mem = emit_static_alloca(lrt, ctx);
-            builder.CreateStore(sret_val.V, mem);
-            result = mem;
-        }
-        else {
-            // XXX: result needs a GC root here if result->getType() == T_pjlvalue
-            result = sret_val.V;
-        }
-        argvals[0] = emit_bitcast(result, fargt_sig.at(0));
-        sretboxed = sret_val.isboxed;
-    }
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * (nargs - 3) / 2);
+    SmallVector<Value*, 16> gc_uses;
+    std::vector<bool> addressOf(0);
 
-    // number of parameters to the c function
-    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * (nargs - 3)/2);
-    for(i = 4; i < nargs + 1; i += 2) {
+    size_t i;
+    for (i = 4; i < nargs + 1; i += 2) {
         // Current C function parameter
         size_t ai = (i - 4) / 2;
 
@@ -1718,12 +1748,61 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         jl_value_t *argi = args[i];
 
         // pass the address of the argument rather than the argument itself
-        bool addressOf = false;
         if (jl_is_expr(argi) && ((jl_expr_t*)argi)->head == amp_sym) {
-            addressOf = true;
-            argi = jl_exprarg(argi,0);
+            addressOf.push_back(true);
+            argi = jl_exprarg(argi, 0);
+        }
+        else {
+            addressOf.push_back(false);
         }
 
+        jl_cgval_t &arg = argv[ai];
+        arg = emit_expr((jl_value_t*)argi, ctx);
+        push_gc_use(gc_uses, arg);
+
+        // Julia (expression) value of current parameter gcroot
+        jl_value_t *argi_root = args[i + 1];
+        if (jl_is_long(argi_root))
+            continue;
+        jl_cgval_t arg_root = emit_expr(argi_root, ctx);
+        push_gc_use(gc_uses, arg_root);
+    }
+
+    function_sig_t sig(lrt, rt, retboxed, (jl_svec_t*)at, unionall, (nargs - 3) / 2, isVa, cc, llvmcall);
+    jl_cgval_t retval = sig.emit_a_ccall(
+            symarg,
+            nargt,
+            addressOf,
+            argv,
+            gc_uses,
+            static_rt,
+            ctx);
+    JL_GC_POP();
+    return retval;
+}
+
+jl_cgval_t function_sig_t::emit_a_ccall(
+        const native_sym_arg_t &symarg,
+        size_t nargt,
+        std::vector<bool> &addressOf,
+        jl_cgval_t *argv,
+        SmallVector<Value*, 16> &gc_uses,
+        bool static_rt,
+        jl_codectx_t *ctx)
+{
+    if (!err_msg.empty()) {
+        emit_error(err_msg, ctx);
+        return jl_cgval_t();
+    }
+
+    // save place before arguments, for possible insertion of temp arg area saving code.
+    BasicBlock::InstListType &instList = builder.GetInsertBlock()->getInstList();
+    Instruction *savespot = instList.empty() ? NULL : &instList.back();
+
+    bool needStackRestore = false;
+    Value **argvals = (Value**) alloca((nargs + sret) * sizeof(Value*));
+    for (size_t ai = 0; ai < nargs; ai++) {
+        // Current C function parameter
         Type *largty; // LLVM type of the current parameter
         bool toboxed;
         jl_value_t *jargty; // Julia type of the current parameter
@@ -1731,83 +1810,160 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         if (isVa && ai >= nargt - 1) {
             largty = fargt.at(nargt - 1);
             toboxed = fargt_isboxed.at(nargt - 1);
-            jargty = jl_unwrap_vararg(jl_svecref(tt, nargt - 1));
+            jargty = jl_unwrap_vararg(jl_svecref(at, nargt - 1));
             byRef = byRefList.at(nargt - 1);
         }
         else {
             largty = fargt.at(ai);
             toboxed = fargt_isboxed.at(ai);
-            jargty = jl_svecref(tt, ai);
+            jargty = jl_svecref(at, ai);
             byRef = byRefList.at(ai);
         }
-
+        Type *pargty = ai + sret < fargt_sig.size() ? fargt_sig.at(ai + sret) : fargt_vasig;
         jl_cgval_t &arg = argv[ai];
-        arg = emit_expr((jl_value_t*)argi, ctx);
-        if (jl_is_abstract_ref_type(jargty)) {
-            if (addressOf) {
-                JL_GC_POP();
+
+        // if we know the function sparams, try to fill those in now
+        // so that the julia_to_native type checks are more likely to be doable (e.g. leaf types) at compile-time
+        jl_value_t *jargty_in_env = jargty;
+        if (ctx->spvals_ptr == NULL && !toboxed && unionall_env && jl_has_typevar_from_unionall(jargty, unionall_env)) {
+            jargty_in_env = jl_instantiate_type_in_env(jargty_in_env, unionall_env, jl_svec_data(ctx->linfo->sparam_vals));
+            if (jargty_in_env != jargty)
+                jl_add_method_root(ctx, jargty_in_env);
+        }
+
+        Value *v;
+        if (!addressOf.at(ai)) {
+            if (jl_is_abstract_ref_type(jargty)) {
+                if (!jl_is_cpointer_type(arg.typ)) {
+                    emit_cpointercheck(arg, "ccall: argument to Ref{T} is not a pointer", ctx);
+                    arg.typ = (jl_value_t*)jl_voidpointer_type;
+                    arg.isboxed = false;
+                }
+                jargty_in_env = (jl_value_t*)jl_voidpointer_type;
+            }
+
+            v = julia_to_native(largty, toboxed, jargty_in_env, unionall_env, arg, byRef,
+                                ai + 1, ctx, &needStackRestore);
+            bool issigned = jl_signed_type && jl_subtype(jargty, (jl_value_t*)jl_signed_type);
+            if (byRef) {
+                // julia_to_native should already have done the alloca and store
+                assert(v->getType() == pargty);
+            }
+            else {
+                v = llvm_type_rewrite(v, pargty, issigned, ctx);
+            }
+        }
+        else {
+            if (jl_is_abstract_ref_type(jargty)) {
                 emit_error("ccall: & on a Ref{T} argument is invalid", ctx);
+                JL_GC_POP();
                 return jl_cgval_t();
             }
-            if (!jl_is_cpointer_type(arg.typ)) {
-                emit_cpointercheck(arg, "ccall: argument to Ref{T} is not a pointer", ctx);
-                arg.typ = (jl_value_t*)jl_voidpointer_type;
-                arg.isboxed = false;
-            }
-            jargty = (jl_value_t*)jl_voidpointer_type;
+            v = julia_to_address(largty, jargty_in_env, unionall_env, arg,
+                                 ai + 1, ctx, &needStackRestore);
+            assert((!toboxed && !byRef) || isa<UndefValue>(v));
         }
 
-        Value *v = julia_to_native(largty, toboxed, jargty, arg, addressOf, byRef,
-                                   false, ai + 1, ctx, &needStackRestore);
-        bool issigned = jl_signed_type && jl_subtype(jargty, (jl_value_t*)jl_signed_type);
-        argvals[ai + sret] = llvm_type_rewrite(v, largty,
-                ai + sret < fargt_sig.size() ? fargt_sig.at(ai + sret) : fargt_vasig,
-                false, byRef, issigned, ctx);
+        if (isa<UndefValue>(v)) {
+            JL_GC_POP();
+            return jl_cgval_t();
+        }
+        assert(v->getType() == pargty);
+        argvals[ai + sret] = v;
     }
 
+    Value *result = NULL;
+    // First, if the ABI requires us to provide the space for the return
+    // argument, allocate the box and store that as the first argument type
+    bool sretboxed = false;
+    if (sret) {
+        jl_cgval_t sret_val = emit_new_struct(rt, 1, NULL, ctx); // TODO: is it valid to be creating an incomplete type this way?
+        assert(sret_val.typ != NULL && "Type was not concrete");
+        if (!sret_val.ispointer()) {
+            Value *mem = emit_static_alloca(lrt, ctx);
+            builder.CreateStore(sret_val.V, mem);
+            result = mem;
+        }
+        else {
+            // XXX: result needs a GC root here if result->getType() == T_pjlvalue
+            result = sret_val.V;
+        }
+        argvals[0] = emit_bitcast(result, fargt_sig.at(0));
+        sretboxed = sret_val.isboxed;
+    }
+
+    Instruction *stacksave = NULL;
+    if (needStackRestore) {
+        stacksave = CallInst::Create(Intrinsic::getDeclaration(jl_Module,
+                                                               Intrinsic::stacksave));
+        if (savespot) {
+#if JL_LLVM_VERSION >= 30800
+            instList.insertAfter(savespot->getIterator(), stacksave);
+#else
+            instList.insertAfter(savespot, stacksave);
+#endif
+        }
+        else {
+            instList.push_front(stacksave);
+        }
+    }
 
     // make LLVM function object for the target
     // keep this close to the function call, so that the compiler can
     // optimize the global pointer load in the common case
     Value *llvmf;
-    FunctionType *functype = FunctionType::get(sret ? T_void : prt, fargt_sig, isVa);
-
-    if (jl_ptr != NULL) {
-        null_pointer_check(jl_ptr,ctx);
-        Type *funcptype = PointerType::get(functype,0);
-        llvmf = builder.CreateIntToPtr(jl_ptr, funcptype);
+    if (llvmcall) {
+        if (symarg.jl_ptr != NULL) {
+            jl_error("llvmcall doesn't support dynamic pointers");
+        }
+        else if (symarg.fptr != NULL) {
+            jl_error("llvmcall doesn't support static pointers");
+        }
+        else if (symarg.f_lib != NULL) {
+            jl_error("llvmcall doesn't support dynamic libraries");
+        }
+        else {
+            assert(symarg.f_name != NULL);
+            llvmf = jl_Module->getOrInsertFunction(symarg.f_name, functype);
+            if (!isa<Function>(llvmf) || cast<Function>(llvmf)->getIntrinsicID() == Intrinsic::not_intrinsic)
+                jl_error("llvmcall only supports intrinsic calls");
+        }
+    }
+    else if (symarg.jl_ptr != NULL) {
+        null_pointer_check(symarg.jl_ptr, ctx);
+        Type *funcptype = PointerType::get(functype, 0);
+        llvmf = builder.CreateIntToPtr(symarg.jl_ptr, funcptype);
     }
-    else if (fptr != NULL) {
-        Type *funcptype = PointerType::get(functype,0);
-        llvmf = literal_static_pointer_val((void*)(uintptr_t)fptr, funcptype);
+    else if (symarg.fptr != NULL) {
+        Type *funcptype = PointerType::get(functype, 0);
+        llvmf = literal_static_pointer_val((void*)(uintptr_t)symarg.fptr, funcptype);
         if (imaging_mode)
-            jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", f_name);
+            jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", symarg.f_name);
     }
     else {
-        assert(f_name != NULL);
+        assert(symarg.f_name != NULL);
 
-        PointerType *funcptype = PointerType::get(functype,0);
+        PointerType *funcptype = PointerType::get(functype, 0);
         if (imaging_mode) {
             // vararg requires musttail,
             // but musttail is incompatible with noreturn.
             if (functype->isVarArg())
-                llvmf = runtime_sym_lookup(funcptype, f_lib, f_name, ctx->f);
+                llvmf = runtime_sym_lookup(funcptype, symarg.f_lib, symarg.f_name, ctx->f);
             else
-                llvmf = emit_plt(functype, attrs, cc, f_lib, f_name);
+                llvmf = emit_plt(functype, attributes, cc, symarg.f_lib, symarg.f_name);
         }
         else {
-            void *symaddr = jl_dlsym_e(jl_get_library(f_lib), f_name);
+            void *symaddr = jl_dlsym_e(jl_get_library(symarg.f_lib), symarg.f_name);
             if (symaddr == NULL) {
-                JL_GC_POP();
                 std::stringstream msg;
                 msg << "ccall: could not find function ";
-                msg << f_name;
-                if (f_lib != NULL) {
+                msg << symarg.f_name;
+                if (symarg.f_lib != NULL) {
 #ifdef _OS_WINDOWS_
-                    assert((intptr_t)f_lib != 1 && (intptr_t)f_lib != 2);
+                    assert((intptr_t)symarg.f_lib != 1 && (intptr_t)symarg.f_lib != 2);
 #endif
                     msg << " in library ";
-                    msg << f_lib;
+                    msg << symarg.f_lib;
                 }
                 emit_error(msg.str(), ctx);
                 return jl_cgval_t();
@@ -1818,43 +1974,13 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
         }
     }
 
-    if (needStackRestore) {
-        stacksave = CallInst::Create(Intrinsic::getDeclaration(jl_Module,
-                                                               Intrinsic::stacksave));
-        if (savespot) {
-#if JL_LLVM_VERSION >= 30800
-                instList.insertAfter(savespot->getIterator(), (Instruction*)stacksave);
-#else
-                instList.insertAfter((Instruction*)savespot, (Instruction*)stacksave);
-#endif
-        }
-        else
-            instList.push_front((Instruction*)stacksave);
-    }
-
-    //llvmf->dump();
-    //for (int i = 0; i < (nargs - 3) / 2 + sret; ++i)
-    //    argvals[i]->dump();
-
     // Mark GC use before **and** after the ccall to make sure the arguments
     // are alive during the ccall even if the function called is `noreturn`.
-    SmallVector<Value*, 16> gc_uses;
-    for(i = 4; i < nargs + 1; i += 2) {
-        // Current C function parameter
-        size_t ai = (i - 4) / 2;
-        push_gc_use(gc_uses, argv[ai]);
-
-        // Julia (expression) value of current parameter gcroot
-        jl_value_t *argi = args[i + 1];
-        if (jl_is_long(argi)) continue;
-        jl_cgval_t arg = emit_expr(argi, ctx);
-        push_gc_use(gc_uses, arg);
-    }
     mark_gc_uses(gc_uses);
     // the actual call
     Value *ret = builder.CreateCall(prepare_call(llvmf),
-                                    ArrayRef<Value*>(&argvals[0], (nargs - 3) / 2 + sret));
-    ((CallInst*)ret)->setAttributes(attrs);
+                                    ArrayRef<Value*>(&argvals[0], nargs + sret));
+    ((CallInst*)ret)->setAttributes(attributes);
 
     if (cc != CallingConv::C)
         ((CallInst*)ret)->setCallingConv(cc);
@@ -1869,54 +1995,65 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
     }
 
     mark_gc_uses(gc_uses);
-    JL_GC_POP();
     if (rt == jl_bottom_type) {
         // Do this after we marked all the GC uses.
         CreateTrap(builder);
+        return jl_cgval_t();
     }
+
     // Finally we need to box the result into julia type
     // However, if we have already created a box for the return
     // type because the ABI required us to pass a pointer (sret),
     // then we do not need to do this.
-    if (!sret) {
-        Type *jlrt = julia_type_to_llvm(rt, &retboxed); // compute the real "julian" return type and update retboxed
+    bool jlretboxed;
+    if (retboxed) {
+        assert(!sret);
+        jlretboxed = true;
+    }
+    else if (sret) {
+        jlretboxed = sretboxed;
+        if (!jlretboxed)
+            result = builder.CreateLoad(result); // something alloca'd above
+    }
+    else {
+        Type *jlrt = julia_type_to_llvm(rt, &jlretboxed); // compute the real "julian" return type and compute whether it is boxed
         if (type_is_ghost(jlrt)) {
             return ghostValue(rt);
         }
-        else if (lrt->isStructTy() && retboxed) {
-            assert(jl_is_structtype(rt));
-            jl_cgval_t newst = emit_new_struct(rt, 1, NULL, ctx); // emit a new, empty struct
-            assert(newst.typ != NULL && "Type was not concrete");
-            assert(newst.isboxed);
-            size_t rtsz = jl_datatype_size(rt);
-            assert(rtsz > 0);
-            int boxalign = jl_gc_alignment(rtsz);
+        else if (jl_is_datatype(rt) && jl_is_datatype_singleton((jl_datatype_t*)rt)) {
+            return mark_julia_const(((jl_datatype_t*)rt)->instance);
+        }
+        else if (jlretboxed && !retboxed) {
+            assert(jl_is_datatype(rt));
+            if (static_rt) {
+                Value *runtime_bt = literal_pointer_val(rt);
+                size_t rtsz = jl_datatype_size(rt);
+                assert(rtsz > 0);
+                Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
+                int boxalign = jl_gc_alignment(rtsz);
 #ifndef NDEBUG
 #if JL_LLVM_VERSION >= 30600
-            const DataLayout &DL = jl_ExecutionEngine->getDataLayout();
+                const DataLayout &DL = jl_ExecutionEngine->getDataLayout();
 #else
-            const DataLayout &DL = *jl_ExecutionEngine->getDataLayout();
+                const DataLayout &DL = *jl_ExecutionEngine->getDataLayout();
 #endif
-            // ARM and AArch64 can use a LLVM type larger than the julia
-            // type. However, the LLVM type size should be no larger than
-            // the GC allocation size. (multiple of `sizeof(void*)`)
-            assert(DL.getTypeStoreSize(lrt) <= LLT_ALIGN(jl_datatype_size(rt),
-                                                         boxalign));
+                // ARM and AArch64 can use a LLVM type larger than the julia
+                // type. However, the LLVM type size should be no larger than
+                // the GC allocation size. (multiple of `sizeof(void*)`)
+                assert(DL.getTypeStoreSize(lrt) <= LLT_ALIGN(rtsz, boxalign));
 #endif
-            // copy the data from the return value to the new struct
-            tbaa_decorate(newst.tbaa, builder.CreateAlignedStore(result, emit_bitcast(newst.V, prt->getPointerTo()), boxalign));
-            return newst;
+                // copy the data from the return value to the new struct
+                MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
+                init_bits_value(strct, result, tbaa, boxalign);
+                return mark_julia_type(strct, true, rt, ctx);
+            }
+            jlretboxed = false; // trigger mark_or_box_ccall_result to build the runtime box
         }
-        else if (jlrt != prt) {
-            assert(lrt == jlrt); // jl_struct_to_llvm and julia_type_to_llvm should only differ for concrete types, per the case above
-            result = llvm_type_rewrite(result, prt, jlrt, true, false, false, ctx);
+        else if (lrt != prt) {
+            assert(jlrt == lrt || !lrt->isStructTy()); // julia_type_to_llvm and julia_struct_to_llvm should be returning the same StructType
+            result = llvm_type_rewrite(result, lrt, false, ctx);
         }
     }
-    else {
-        retboxed = sretboxed;
-        if (!retboxed)
-            result = builder.CreateLoad(result); // something alloca'd above
-    }
 
-    return mark_or_box_ccall_result(result, retboxed, args[2], rt, static_rt, ctx);
+    return mark_or_box_ccall_result(result, jlretboxed, rt, unionall_env, static_rt, ctx);
 }
diff --git a/src/ccalltest.c b/src/ccalltest.c
index e943d347ced5c..b8781567fcf40 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -90,6 +90,7 @@ JL_DLLEXPORT complex_t *cptest(complex_t *a) {
 }
 
 JL_DLLEXPORT complex_t *cptest_static(complex_t *a) {
+    if (verbose) fprintf(stderr,"%" PRIjint " + %" PRIjint " i\n", a->real, a->imag);
     complex_t *b = (complex_t*)malloc(sizeof(complex_t));
     b->real = a->real;
     b->imag = a->imag;
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 7e90176e87022..af7f0efc56091 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -355,101 +355,139 @@ static Value *julia_binding_gv(jl_binding_t *b)
 
 // --- mapping between julia and llvm types ---
 
-static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed);
+static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua_env, bool *isboxed);
 
 extern "C" {
 JL_DLLEXPORT Type *julia_type_to_llvm(jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bool_type) return T_int8;
-    if (jt == (jl_value_t*)jl_bottom_type) return T_void;
-    if (!jl_is_leaf_type(jt)) {
-        if (isboxed) *isboxed = true;
-        return T_pjlvalue;
-    }
-    if (jl_is_cpointer_type(jt)) {
-        Type *lt = julia_type_to_llvm(jl_tparam0(jt));
-        if (lt == NULL)
-            return NULL;
+    if (jt == (jl_value_t*)jl_bottom_type)
+        return T_void;
+    if (jl_is_leaf_type(jt)) {
+        if ((jl_is_bitstype(jt) || jl_isbits(jt))) {
+            if (jl_datatype_nbits(jt) == 0)
+                return T_void;
+            Type *t = julia_struct_to_llvm(jt, NULL, isboxed);
+            assert(t != NULL);
+            return t;
+        }
+    }
+    if (isboxed) *isboxed = true;
+    return T_pjlvalue;
+}
+}
+
+// converts a julia bitstype into the equivalent LLVM bitstype
+static Type *bitstype_to_llvm(jl_value_t *bt)
+{
+    assert(jl_is_bitstype(bt));
+    if (bt == (jl_value_t*)jl_bool_type)
+        return T_int8;
+    if (bt == (jl_value_t*)jl_long_type)
+        return T_size;
+    if (jl_is_cpointer_type(bt)) {
+        Type *lt = julia_type_to_llvm(jl_tparam0(bt));
         if (lt == T_void)
             return T_pint8;
         return PointerType::get(lt, 0);
     }
-    if (jl_is_bitstype(jt)) {
-        if (jt == (jl_value_t*)jl_long_type)
-            return T_size;
-        int nb = jl_datatype_size(jt);
-        if (jl_is_floattype(jt)) {
+    int nb = jl_datatype_size(bt);
+    if (jl_is_floattype(bt)) {
 #ifndef DISABLE_FLOAT16
-            if (nb == 2)
-                return T_float16;
-            else
+        if (nb == 2)
+            return T_float16;
+        else
 #endif
-            if (nb == 4)
-                return T_float32;
-            else if (nb == 8)
-                return T_float64;
-            else if (nb == 16)
-                return T_float128;
-        }
-        return Type::getIntNTy(jl_LLVMContext, jl_datatype_nbits(jt));
+        if (nb == 4)
+            return T_float32;
+        else if (nb == 8)
+            return T_float64;
+        else if (nb == 16)
+            return T_float128;
     }
-    if (jl_isbits(jt)) {
-        if (jl_datatype_nbits(jt) == 0) {
-            return T_void;
+    return Type::getIntNTy(jl_LLVMContext, nb * 8);
+}
+
+// compute whether all leaf subtypes of this type have the same layout
+// (which is conservatively approximated here by asking whether the types of any of the
+// fields depend on any of the parameters of the containing type)
+static bool julia_struct_has_layout(jl_datatype_t *dt, jl_unionall_t *ua)
+{
+    if (dt->layout || dt->struct_decl || jl_is_bitstype(dt) || jl_isbits(dt))
+        return true;
+    if (ua) {
+        size_t i, ntypes = jl_datatype_nfields(dt);
+        for (i = 0; i < ntypes; i++) {
+            jl_value_t *ty = jl_svecref(dt->types, i);
+            if (jl_has_typevar_from_unionall(ty, ua))
+                return false;
         }
-        return julia_struct_to_llvm(jt, isboxed);
     }
-    if (isboxed) *isboxed = true;
-    return T_pjlvalue;
-}
+    return true;
 }
 
-static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
+static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
-    bool isTuple = jl_is_tuple_type(jt);
     if (isboxed) *isboxed = false;
+    if (jt == (jl_value_t*)jl_bottom_type)
+        return T_void;
+    if (jl_is_bitstype(jt))
+        return bitstype_to_llvm(jt);
+    bool isTuple = jl_is_tuple_type(jt);
     if ((isTuple || jl_is_structtype(jt)) && !jl_is_array_type(jt)) {
-        if (!jl_is_leaf_type(jt))
-            return NULL;
         jl_datatype_t *jst = (jl_datatype_t*)jt;
         if (jst->struct_decl == NULL) {
-            size_t ntypes = jl_datatype_nfields(jst);
-            if (ntypes == 0 || jl_datatype_nbits(jst) == 0)
+            size_t i, ntypes = jl_svec_len(jst->types);
+            if (ntypes == 0 || (jst->layout && jl_datatype_nbits(jst) == 0))
                 return T_void;
+            if (!julia_struct_has_layout(jst, ua))
+                return NULL;
             StructType *structdecl;
             if (!isTuple) {
                 structdecl = StructType::create(jl_LLVMContext, jl_symbol_name(jst->name->name));
                 jst->struct_decl = structdecl;
             }
             std::vector<Type*> latypes(0);
-            size_t i;
             bool isarray = true;
             bool isvector = true;
-            jl_value_t* jlasttype = NULL;
+            jl_value_t *jlasttype = NULL;
             Type *lasttype = NULL;
-            for(i = 0; i < ntypes; i++) {
+            bool allghost = true;
+            for (i = 0; i < ntypes; i++) {
                 jl_value_t *ty = jl_svecref(jst->types, i);
-                if (jlasttype!=NULL && ty!=jlasttype)
+                if (jlasttype != NULL && ty != jlasttype)
                     isvector = false;
                 jlasttype = ty;
+                bool isptr;
+                if (jst->layout)
+                    isptr = jl_field_isptr(jst, i);
+                else // compute what jl_compute_field_offsets would say
+                    isptr = jl_isbits(ty) && jl_is_leaf_type(ty) && ((jl_datatype_t*)ty)->layout;
                 Type *lty;
-                if (jl_field_isptr(jst, i))
+                if (isptr)
                     lty = T_pjlvalue;
+                else if (ty == (jl_value_t*)jl_bool_type)
+                    lty = T_int8;
                 else
-                    lty = ty==(jl_value_t*)jl_bool_type ? T_int8 : julia_type_to_llvm(ty);
+                    lty = julia_type_to_llvm(ty);
                 if (lasttype != NULL && lasttype != lty)
                     isarray = false;
                 lasttype = lty;
                 if (type_is_ghost(lty))
                     lty = NoopType;
+                else
+                    allghost = false;
                 latypes.push_back(lty);
             }
-            if (!isTuple) {
+            if (allghost) {
+                assert(jst->layout == NULL); // otherwise should have been caught above
+                jst->struct_decl = T_void;
+            }
+            else if (!isTuple) {
                 if (jl_is_vecelement_type(jt))
                     // VecElement type is unwrapped in LLVM
                     jst->struct_decl = latypes[0];
@@ -458,31 +496,34 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
             }
             else {
                 if (isarray && lasttype != T_int1 && !type_is_ghost(lasttype)) {
-                    if (isvector && jl_special_vector_alignment(ntypes, jlasttype)!=0)
+                    if (isvector && jl_special_vector_alignment(ntypes, jlasttype) != 0)
                         jst->struct_decl = VectorType::get(lasttype, ntypes);
                     else
                         jst->struct_decl = ArrayType::get(lasttype, ntypes);
                 }
                 else {
-                    jst->struct_decl = StructType::get(jl_LLVMContext,ArrayRef<Type*>(&latypes[0],ntypes));
+                    jst->struct_decl = StructType::get(jl_LLVMContext, ArrayRef<Type*>(&latypes[0], ntypes));
                 }
             }
 #ifndef NDEBUG
             // If LLVM and Julia disagree about alignment, much trouble ensues, so check it!
-            const DataLayout &DL =
+            if (jst->layout) {
+                const DataLayout &DL =
 #if JL_LLVM_VERSION >= 30600
-                jl_ExecutionEngine->getDataLayout();
+                    jl_ExecutionEngine->getDataLayout();
 #else
-                *jl_ExecutionEngine->getDataLayout();
+                    *jl_ExecutionEngine->getDataLayout();
 #endif
-            unsigned llvm_alignment = DL.getABITypeAlignment((Type*)jst->struct_decl);
-            unsigned julia_alignment = jst->layout->alignment;
-            assert(llvm_alignment == julia_alignment);
+                unsigned llvm_alignment = DL.getABITypeAlignment((Type*)jst->struct_decl);
+                unsigned julia_alignment = jst->layout->alignment;
+                assert(llvm_alignment == julia_alignment);
+            }
 #endif
         }
         return (Type*)jst->struct_decl;
     }
-    return julia_type_to_llvm(jt, isboxed);
+    if (isboxed) *isboxed = true;
+    return T_pjlvalue;
 }
 
 static bool is_datatype_all_pointers(jl_datatype_t *dt)
@@ -737,7 +778,7 @@ static void null_pointer_check(Value *v, jl_codectx_t *ctx)
                            literal_pointer_val(jl_undefref_exception), ctx);
 }
 
-static void emit_type_error(const jl_cgval_t &x, jl_value_t *type, const std::string &msg,
+static void emit_type_error(const jl_cgval_t &x, Value *type, const std::string &msg,
                             jl_codectx_t *ctx)
 {
     Value *fname_val = stringConstPtr(ctx->funcName);
@@ -745,11 +786,11 @@ static void emit_type_error(const jl_cgval_t &x, jl_value_t *type, const std::st
 #if JL_LLVM_VERSION >= 30700
     builder.CreateCall(prepare_call(jltypeerror_func),
                        { fname_val, msg_val,
-                         literal_pointer_val(type), boxed(x, ctx, false)}); // x is rooted by jl_type_error_rt
+                         type, boxed(x, ctx, false)}); // x is rooted by jl_type_error_rt
 #else
     builder.CreateCall4(prepare_call(jltypeerror_func),
                         fname_val, msg_val,
-                        literal_pointer_val(type), boxed(x, ctx, false)); // x is rooted by jl_type_error_rt
+                        type, boxed(x, ctx, false)); // x is rooted by jl_type_error_rt
 #endif
 }
 
@@ -762,7 +803,7 @@ static void emit_typecheck(const jl_cgval_t &x, jl_value_t *type, const std::str
     //     return;
     // }
     if (jl_type_intersection(x.typ, type) == (jl_value_t*)jl_bottom_type) {
-        emit_type_error(x, type, msg, ctx);
+        emit_type_error(x, literal_pointer_val(type), msg, ctx);
         builder.CreateUnreachable();
         BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx->f);
         builder.SetInsertPoint(failBB);
@@ -787,13 +828,24 @@ static void emit_typecheck(const jl_cgval_t &x, jl_value_t *type, const std::str
     builder.CreateCondBr(istype, passBB, failBB);
     builder.SetInsertPoint(failBB);
 
-    emit_type_error(x, type, msg, ctx);
+    emit_type_error(x, literal_pointer_val(type), msg, ctx);
     builder.CreateUnreachable();
 
     ctx->f->getBasicBlockList().push_back(passBB);
     builder.SetInsertPoint(passBB);
 }
 
+static void emit_leafcheck(Value *typ, const std::string &msg, jl_codectx_t *ctx)
+{
+    assert(typ->getType() == T_pjlvalue);
+    emit_typecheck(mark_julia_type(typ, true, jl_any_type, ctx, false), (jl_value_t*)jl_datatype_type, msg, ctx);
+    Value *isleaf;
+    isleaf = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_int8) emit_bitcast(typ, T_pint8), offsetof(jl_datatype_t, isleaftype));
+    isleaf = builder.CreateLoad(isleaf, tbaa_const);
+    isleaf = builder.CreateTrunc(isleaf, T_int1);
+    error_unless(isleaf, msg, ctx);
+}
+
 #define CHECK_BOUNDS 1
 static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_codectx_t *ctx)
 {
@@ -869,11 +921,6 @@ static unsigned julia_alignment(Value* /*ptr*/, jl_value_t *jltype, unsigned ali
     return alignment;
 }
 
-static LoadInst *build_load(Value *ptr, jl_value_t *jltype)
-{
-    return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0));
-}
-
 static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, bool volatile_store = false);
 
 static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
@@ -881,7 +928,6 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
 {
     bool isboxed;
     Type *elty = julia_type_to_llvm(jltype, &isboxed);
-    assert(elty != NULL);
     if (type_is_ghost(elty))
         return ghostValue(jltype);
     Value *data;
@@ -920,7 +966,6 @@ static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
 {
     bool isboxed;
     Type *elty = julia_type_to_llvm(jltype, &isboxed);
-    assert(elty != NULL);
     if (type_is_ghost(elty))
         return;
     Value *r;
@@ -1103,7 +1148,6 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx,
 {
     jl_value_t *jfty = jl_field_type(jt, idx);
     Type *elty = julia_type_to_llvm(jfty);
-    assert(elty != NULL);
     if (jfty == jl_bottom_type) {
         raise_exception(literal_pointer_val(jl_undefref_exception), ctx);
         return jl_cgval_t(); // unreachable
@@ -1390,11 +1434,11 @@ static Value *emit_array_nd_index(const jl_cgval_t &ainfo, jl_value_t *ex, ssize
 static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size, Value *jt);
 static Value *emit_allocobj(jl_codectx_t *ctx, size_t static_size,
                             const jl_cgval_t &v);
-static Value *init_bits_value(Value *newv, Value *v, MDNode *tbaa)
+static Value *init_bits_value(Value *newv, Value *v, MDNode *tbaa, unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
 {
     // newv should already be tagged
     tbaa_decorate(tbaa, builder.CreateAlignedStore(v, emit_bitcast(newv,
-        PointerType::get(v->getType(),0)), sizeof(void*))); // min alignment in julia's gc is pointer-aligned
+        PointerType::get(v->getType(), 0)), alignment));
     return newv;
 }
 static Value *as_value(Type *t, const jl_cgval_t&);
@@ -1488,14 +1532,12 @@ static Value *call_with_unsigned(Function *ufunc, Value *v)
     return Call;
 }
 
-static void jl_add_method_root(jl_method_instance_t *li, jl_value_t *val);
+static void jl_add_method_root(jl_codectx_t *ctx, jl_value_t *val);
 
 static Value *as_value(Type *t, const jl_cgval_t &v)
 {
     assert(!v.isboxed);
-    if (v.ispointer())
-        return tbaa_decorate(v.tbaa, build_load(builder.CreatePointerCast(v.V, t->getPointerTo()), v.typ));
-    return v.V;
+    return emit_unbox(t, v, v.typ);
 }
 
 // this is used to wrap values for generic contexts, where a
@@ -1517,15 +1559,15 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
     assert(!type_is_ghost(t)); // should have been handled by isghost above!
 
     if (jt == (jl_value_t*)jl_bool_type)
-        return julia_bool(builder.CreateTrunc(as_value(t,vinfo), T_int1));
+        return julia_bool(builder.CreateTrunc(as_value(t, vinfo), T_int1));
     if (t == T_int1)
-        return julia_bool(as_value(t,vinfo));
+        return julia_bool(as_value(t, vinfo));
 
     if (ctx->linfo && ctx->linfo->def && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
         if (Constant *c = dyn_cast<Constant>(v)) {
             jl_value_t *s = static_constant_instance(c, jt);
             if (s) {
-                jl_add_method_root(ctx->linfo, s);
+                jl_add_method_root(ctx, s);
                 return literal_pointer_val(s);
             }
         }
@@ -1537,26 +1579,26 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
     if (jb == jl_int8_type)
         box = call_with_signed(box_int8_func, as_value(t, vinfo));
     else if (jb == jl_int16_type)
-        box = call_with_signed(box_int16_func, as_value(t,vinfo));
+        box = call_with_signed(box_int16_func, as_value(t, vinfo));
     else if (jb == jl_int32_type)
-        box = call_with_signed(box_int32_func, as_value(t,vinfo));
+        box = call_with_signed(box_int32_func, as_value(t, vinfo));
     else if (jb == jl_int64_type)
-        box = call_with_signed(box_int64_func, as_value(t,vinfo));
+        box = call_with_signed(box_int64_func, as_value(t, vinfo));
     else if (jb == jl_float32_type)
-        box = builder.CreateCall(prepare_call(box_float32_func), as_value(t,vinfo));
+        box = builder.CreateCall(prepare_call(box_float32_func), as_value(t, vinfo));
     //if (jb == jl_float64_type)
-    //  box = builder.CreateCall(box_float64_func, as_value(t,vinfo);
+    //  box = builder.CreateCall(box_float64_func, as_value(t, vinfo);
     // for Float64, fall through to generic case below, to inline alloc & init of Float64 box. cheap, I know.
     else if (jb == jl_uint8_type)
-        box = call_with_unsigned(box_uint8_func, as_value(t,vinfo));
+        box = call_with_unsigned(box_uint8_func, as_value(t, vinfo));
     else if (jb == jl_uint16_type)
-        box = call_with_unsigned(box_uint16_func, as_value(t,vinfo));
+        box = call_with_unsigned(box_uint16_func, as_value(t, vinfo));
     else if (jb == jl_uint32_type)
-        box = call_with_unsigned(box_uint32_func, as_value(t,vinfo));
+        box = call_with_unsigned(box_uint32_func, as_value(t, vinfo));
     else if (jb == jl_uint64_type)
-        box = call_with_unsigned(box_uint64_func, as_value(t,vinfo));
+        box = call_with_unsigned(box_uint64_func, as_value(t, vinfo));
     else if (jb == jl_char_type)
-        box = call_with_unsigned(box_char_func, as_value(t,vinfo));
+        box = call_with_unsigned(box_char_func, as_value(t, vinfo));
     else if (jb == jl_ssavalue_type) {
         unsigned zero = 0;
         v = as_value(t, vinfo);
@@ -1590,7 +1632,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
 static void emit_cpointercheck(const jl_cgval_t &x, const std::string &msg, jl_codectx_t *ctx)
 {
     Value *t = emit_typeof_boxed(x,ctx);
-    emit_typecheck(mark_julia_type(t, true, jl_any_type, ctx), (jl_value_t*)jl_datatype_type, msg, ctx);
+    emit_typecheck(mark_julia_type(t, true, jl_any_type, ctx, false), (jl_value_t*)jl_datatype_type, msg, ctx);
 
     Value *istype =
         builder.CreateICmpEQ(emit_datatype_name(t),
@@ -1600,7 +1642,7 @@ static void emit_cpointercheck(const jl_cgval_t &x, const std::string &msg, jl_c
     builder.CreateCondBr(istype, passBB, failBB);
     builder.SetInsertPoint(failBB);
 
-    emit_type_error(x, (jl_value_t*)jl_pointer_type, msg, ctx);
+    emit_type_error(x, literal_pointer_val((jl_value_t*)jl_pointer_type), msg, ctx);
     builder.CreateUnreachable();
 
     ctx->f->getBasicBlockList().push_back(passBB);
@@ -1827,8 +1869,9 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
             return ghostValue(sty);
         if (nargs >= 2)
             return emit_expr(args[1], ctx);  // do side effects
-        Type *lt = julia_type_to_llvm(ty);
-        assert(lt != T_pjlvalue);
+        bool isboxed;
+        Type *lt = julia_type_to_llvm(ty, &isboxed);
+        assert(!isboxed);
         return mark_julia_type(UndefValue::get(lt), false, ty, ctx);
     }
     else {
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 9d545866992a5..b8aa96d2cfc14 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -212,7 +212,6 @@ static DataLayout *jl_data_layout;
 #endif
 
 // types
-static Type *T_jlvalue;
 static Type *T_pjlvalue;
 static Type *T_ppjlvalue;
 static Type *jl_parray_llvmt;
@@ -299,8 +298,15 @@ int32_t jl_jlcall_api(const void *function)
     // give the function an index in the constant lookup table
     if (function == NULL)
         return 0;
-    const Function *F = (const Function*)function;
-    return (F->getFunctionType() == jl_func_sig ? 1 : 3);
+    const Function *F = cast<const Function>((const Value*)function);
+    StringRef Name = F->getName();
+    if (Name.startswith("japi3_")) // jlcall abi 3 from JIT
+        return 3;
+    assert(Name.startswith("japi1_") || // jlcall abi 1 from JIT
+           Name.startswith("jsys1_") || // jlcall abi 1 from sysimg
+           Name.startswith("jlcall_") || // jlcall abi 1 from JIT wrapping a specsig method
+           Name.startswith("jlsysw_")); // jlcall abi 1 from sysimg wrapping a specsig method
+    return 1;
 }
 
 
@@ -348,8 +354,6 @@ static Function *jlboundserrorv_func;
 static Function *jlcheckassign_func;
 static Function *jldeclareconst_func;
 static Function *jlgetbindingorerror_func;
-static Function *jlpref_func;
-static Function *jlpset_func;
 static Function *jltopeval_func;
 static Function *jlcopyast_func;
 static Function *jltuple_func;
@@ -366,6 +370,7 @@ static Function *jlegal_func;
 static Function *jlalloc_pool_func;
 static Function *jlalloc_big_func;
 static Function *jlisa_func;
+static Function *jlapplytype_func;
 static Function *setjmp_func;
 static Function *memcmp_func;
 static Function *box_int8_func;
@@ -532,7 +537,8 @@ typedef struct {
 
 // information about the context of a piece of code: its enclosing
 // function and module, and visible local variables and labels.
-struct jl_codectx_t {
+class jl_codectx_t {
+public:
     Function *f;
     // local var info. globals are not in here.
     std::vector<jl_varinfo_t> slots;
@@ -544,6 +550,7 @@ struct jl_codectx_t {
     jl_code_info_t *source;
     jl_array_t *code;
     size_t world;
+    jl_array_t *roots;
     const char *name;
     StringRef file;
     ssize_t *line;
@@ -565,6 +572,10 @@ struct jl_codectx_t {
     bool is_inbounds{false};
 
     const jl_cgparams_t *params;
+
+    ~jl_codectx_t() {
+        assert(this->roots == NULL);
+    }
 };
 
 static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx);
@@ -575,7 +586,8 @@ static Value *make_jlcall(ArrayRef<const jl_cgval_t*> args, jl_codectx_t *ctx);
 static Value *global_binding_pointer(jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign, jl_codectx_t *ctx);
 static jl_cgval_t emit_checked_var(Value *bp, jl_sym_t *name, jl_codectx_t *ctx, bool isvol, MDNode *tbaa);
-static Value *emit_condition(jl_value_t *cond, const std::string &msg, jl_codectx_t *ctx);
+static jl_cgval_t emit_sparam(size_t i, jl_codectx_t *ctx);
+static Value *emit_condition(const jl_cgval_t &condV, const std::string &msg, jl_codectx_t *ctx);
 static void allocate_gc_frame(BasicBlock *b0, jl_codectx_t *ctx);
 static GlobalVariable *prepare_global(GlobalVariable *G, Module *M = jl_builderModule);
 static Value *prepare_call(Value *Callee);
@@ -627,7 +639,6 @@ static inline jl_cgval_t ghostValue(jl_datatype_t *typ)
 static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, MDNode *tbaa)
 {
     // eagerly put this back onto the stack
-    assert(v->getType() != T_pjlvalue);
     assert(tbaa);
     jl_cgval_t tagval(v, NULL, false, typ);
     tagval.tbaa = tbaa;
@@ -642,7 +653,6 @@ static inline jl_cgval_t mark_julia_type(Value *v, bool isboxed, jl_value_t *typ
         return ghostValue(typ);
     }
     if (v && T->isAggregateType() && !isboxed) {
-        assert(v->getType() != T_pjlvalue);
         // eagerly put this back onto the stack
         // llvm mem2reg pass will remove this if unneeded
         Value *loc = emit_static_alloca(T);
@@ -775,8 +785,9 @@ static Value *alloc_local(int s, jl_codectx_t *ctx)
     jl_varinfo_t &vi = ctx->slots[s];
     jl_value_t *jt = vi.value.typ;
     assert(store_unboxed_p(s,ctx));
-    Type *vtype = julia_type_to_llvm(jt);
-    assert(vtype != T_pjlvalue);
+    bool isboxed;
+    Type *vtype = julia_type_to_llvm(jt, &isboxed);
+    assert(!isboxed);
     if (type_is_ghost(vtype)) {
         vi.value = ghostValue(jt);
         return NULL;
@@ -807,7 +818,7 @@ static void maybe_alloc_arrayvar(int s, jl_codectx_t *ctx)
         // CreateAlloca is OK here because maybe_alloc_arrayvar is only called in the prologue setup
         av.dataptr = builder.CreateAlloca(PointerType::get(elt,0));
         av.len = builder.CreateAlloca(T_size);
-        for(int i=0; i < ndims-1; i++)
+        for (int i = 0; i < ndims - 1; i++)
             av.sizes.push_back(builder.CreateAlloca(T_size));
         av.ty = jt;
         (*ctx->arrayvars)[s] = av;
@@ -1094,33 +1105,7 @@ static jl_method_instance_t *jl_get_unspecialized(jl_method_instance_t *method)
 {
     // one unspecialized version of a function can be shared among all cached specializations
     jl_method_t *def = method->def;
-    if (def->needs_sparam_vals_ducttape == 2) {
-        if (def->isstaged) {
-            def->needs_sparam_vals_ducttape = 1;
-        }
-        else {
-            // determine if this needs an unspec version compiled for each
-            // sparam, or whether they can be shared
-            // TODO: remove this once runtime intrinsics are hooked up
-            int needs_sparam_vals_ducttape = 0;
-            if (method->sparam_vals != jl_emptysvec) {
-                jl_array_t *code = (jl_array_t*)def->source->code;
-                JL_GC_PUSH1(&code);
-                if (!jl_typeis(code, jl_array_any_type))
-                    code = jl_uncompress_ast(def, code);
-                size_t i, l = jl_array_len(code);
-                for (i = 0; i < l; i++) {
-                    if (jl_has_intrinsics(method, jl_array_ptr_ref(code, i), def->module)) {
-                        needs_sparam_vals_ducttape = 1;
-                        break;
-                    }
-                }
-                JL_GC_POP();
-            }
-            def->needs_sparam_vals_ducttape = needs_sparam_vals_ducttape;
-        }
-    }
-    if (def->needs_sparam_vals_ducttape) {
+    if (def->isstaged) {
         return method;
     }
     if (def->unspecialized == NULL) {
@@ -1986,16 +1971,6 @@ static void simple_escape_analysis(jl_value_t *expr, bool esc, jl_codectx_t *ctx
                 if (jl_value_t *fv = static_eval(f, ctx, false)) {
                     if (jl_typeis(fv, jl_intrinsic_type)) {
                         esc = false;
-                        JL_I::intrinsic fi = (JL_I::intrinsic)jl_unbox_int32(fv);
-                        if (fi == JL_I::ccall) {
-                            esc = true;
-                            simple_escape_analysis(jl_exprarg(e,1), esc, ctx);
-                            // 2nd and 3d arguments are static
-                            for(i=4; i < (size_t)alen; i+=2) {
-                                simple_escape_analysis(jl_exprarg(e,i), esc, ctx);
-                            }
-                            return;
-                        }
                     }
                     else {
                         if ((fv==jl_builtin_getfield && alen==3 &&
@@ -2008,10 +1983,19 @@ static void simple_escape_analysis(jl_value_t *expr, bool esc, jl_codectx_t *ctx
                 }
             }
 
-            for(i=1; i < (size_t)alen; i++) {
+            for (i = 1; i < (size_t)alen; i++) {
                 simple_escape_analysis(jl_exprarg(e,i), esc, ctx);
             }
         }
+        else if (e->head == foreigncall_sym) {
+            esc = true;
+            simple_escape_analysis(jl_exprarg(e, 0), esc, ctx);
+            // 2nd and 3d arguments are static
+            size_t alen = jl_array_dim0(e->args);
+            for (i = 3; i < alen; i += 2) {
+                simple_escape_analysis(jl_exprarg(e, i), esc, ctx);
+            }
+        }
         else if (e->head == method_sym) {
             simple_escape_analysis(jl_exprarg(e,0), esc, ctx);
             if (jl_expr_nargs(e) > 1) {
@@ -2086,30 +2070,26 @@ static Value *make_jlcall(ArrayRef<const jl_cgval_t*> args, jl_codectx_t *ctx)
     return largs;
 }
 
-static void jl_add_method_root(jl_method_instance_t *li, jl_value_t *val)
+static void jl_add_method_root(jl_codectx_t *ctx, jl_value_t *val)
 {
-    if (jl_is_leaf_type(val) || jl_is_bool(val) || jl_is_symbol(val))
+    if (jl_is_leaf_type(val) || jl_is_bool(val) || jl_is_symbol(val) ||
+            val == (jl_value_t*)jl_any_type || val == (jl_value_t*)jl_bottom_type)
         return;
-    jl_method_t *m = li->def;
     JL_GC_PUSH1(&val);
-    JL_LOCK(&m->writelock);
-    if (m->roots == NULL) {
-        m->roots = jl_alloc_vec_any(1);
-        jl_gc_wb(m, m->roots);
-        jl_array_ptr_set(m->roots, 0, val);
+    if (ctx->roots == NULL) {
+        ctx->roots = jl_alloc_vec_any(1);
+        jl_array_ptr_set(ctx->roots, 0, val);
     }
     else {
-        size_t rlen = jl_array_dim0(m->roots);
-        for(size_t i=0; i < rlen; i++) {
-            if (jl_array_ptr_ref(m->roots,i) == val) {
-                JL_UNLOCK(&li->def->writelock);
+        size_t rlen = jl_array_dim0(ctx->roots);
+        for (size_t i = 0; i < rlen; i++) {
+            if (jl_array_ptr_ref(ctx->roots,i) == val) {
                 JL_GC_POP();
                 return;
             }
         }
-        jl_array_ptr_1d_push(m->roots, val);
+        jl_array_ptr_1d_push(ctx->roots, val);
     }
-    JL_UNLOCK(&m->writelock);
     JL_GC_POP();
 }
 
@@ -2179,9 +2159,10 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
     Type *at = julia_type_to_llvm(arg1.typ);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
-        Value *varg1 = emit_unbox(at, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(at, arg2, arg2.typ);
-        return builder.CreateICmpEQ(JL_INT(varg1),JL_INT(varg2));
+        Type *at_int = INTT(at);
+        Value *varg1 = emit_unbox(at_int, arg1, arg1.typ);
+        Value *varg2 = emit_unbox(at_int, arg2, arg2.typ);
+        return builder.CreateICmpEQ(varg1, varg2);
     }
 
     if (at->isVectorTy()) {
@@ -2190,11 +2171,11 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
         Value *varg1 = emit_unbox(at, arg1, arg1.typ);
         Value *varg2 = emit_unbox(at, arg2, arg2.typ);
         size_t l = jl_svec_len(types);
-        for(unsigned i=0; i < l; i++) {
-            jl_value_t *fldty = jl_svecref(types,i);
+        for (unsigned i = 0; i < l; i++) {
+            jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
-            fld1 = builder.CreateExtractElement(varg1, ConstantInt::get(T_int32,i)),
-            fld2 = builder.CreateExtractElement(varg2, ConstantInt::get(T_int32,i)),
+            fld1 = builder.CreateExtractElement(varg1, ConstantInt::get(T_int32, i)),
+            fld2 = builder.CreateExtractElement(varg2, ConstantInt::get(T_int32, i)),
             subAns = emit_bits_compare(mark_julia_type(fld1, false, fldty, ctx), mark_julia_type(fld2, false, fldty, ctx), ctx);
             answer = builder.CreateAnd(answer, subAns);
         }
@@ -2241,7 +2222,7 @@ static Value *emit_bits_compare(const jl_cgval_t &arg1, const jl_cgval_t &arg2,
         }
     }
     assert(0 && "what is this llvm type?");
-    return 0;
+    abort();
 }
 
 // emit code for is (===).
@@ -2817,7 +2798,7 @@ static bool emit_builtin_call(jl_cgval_t *ret, jl_value_t *f, jl_value_t **args,
             if (ty!=NULL && jl_is_leaf_type(ty)) {
                 if (jl_has_free_typevars(ty)) {
                     // add root for types not cached. issue #7065
-                    jl_add_method_root(ctx->linfo, ty);
+                    jl_add_method_root(ctx, ty);
                 }
                 *ret = mark_julia_const(ty);
                 JL_GC_POP();
@@ -3173,7 +3154,7 @@ static jl_cgval_t emit_sparam(size_t i, jl_codectx_t *ctx)
     Value *bp = builder.CreateConstInBoundsGEP1_32(LLVM37_param(T_pjlvalue)
             emit_bitcast(ctx->spvals_ptr, T_ppjlvalue),
             i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
-    return mark_julia_type(tbaa_decorate(tbaa_const, builder.CreateLoad(bp)), true, jl_any_type, ctx);
+    return mark_julia_type(tbaa_decorate(tbaa_const, builder.CreateLoad(bp)), true, jl_any_type, ctx, false);
 }
 
 static jl_cgval_t emit_global(jl_sym_t *sym, jl_codectx_t *ctx)
@@ -3247,8 +3228,9 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx)
         assert(!ctx->ssavalue_assigned.at(idx));
         jl_cgval_t slot = emit_expr(r, ctx); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer)
         if (!slot.isboxed && !slot.isimmutable) { // emit a copy of values stored in mutable slots
-            Type *vtype = julia_type_to_llvm(slot.typ);
-            assert(vtype != T_pjlvalue);
+            bool isboxed;
+            Type *vtype = julia_type_to_llvm(slot.typ, &isboxed);
+            assert(!isboxed);
             Value *dest = emit_static_alloca(vtype);
             emit_unbox(vtype, slot, slot.typ, dest);
             slot = mark_julia_slot(dest, slot.typ, tbaa_stack);
@@ -3470,7 +3452,7 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx)
             }
         }
         if (needroot && ctx->linfo->def) { // toplevel exprs and some integers are already rooted
-            jl_add_method_root(ctx->linfo, expr);
+            jl_add_method_root(ctx, expr);
         }
         return mark_julia_const(expr);
     }
@@ -3488,7 +3470,7 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx)
         if (ctx->linfo->def) { // don't bother codegen constant-folding for toplevel
             jl_value_t *c = static_eval(expr, ctx, true, true);
             if (c) {
-                jl_add_method_root(ctx->linfo, c);
+                jl_add_method_root(ctx, c);
                 return mark_julia_const(c);
             }
         }
@@ -3504,6 +3486,9 @@ static jl_cgval_t emit_expr(jl_value_t *expr, jl_codectx_t *ctx)
         }
         return res;
     }
+    else if (head == foreigncall_sym) {
+        return emit_ccall(args, jl_array_dim0(ex->args), ctx);
+    }
     else if (head == assign_sym) {
         emit_assignment(args[0], args[1], ctx);
         return ghostValue(jl_void_type);
@@ -3668,25 +3653,15 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
 {
     // Generate a c-callable wrapper
     bool toboxed;
-    Type *crt = julia_struct_to_llvm(jlrettype, &toboxed);
+    Type *crt = julia_struct_to_llvm(jlrettype, NULL, &toboxed);
     if (crt == NULL)
         jl_error("cfunction: return type doesn't correspond to a C type");
 
-    std::vector<Type*> fargt(0);
-    std::vector<bool> fargt_isboxed(0);
-    std::vector<Type*> fargt_sig(0);
-    Type *fargt_vasig;
-    std::vector<bool> byRefList(0);
-    AttributeSet attrs;
-    Type *prt = NULL;
-    int sret = 0;
     size_t nargs = jl_nparams(argt);
-    std::string err_msg = generate_func_sig(&crt, &prt, sret, fargt, fargt_isboxed,
-                                            fargt_sig, fargt_vasig, byRefList,
-                                            attrs, jlrettype, argt->parameters, nargs);
-    if (!err_msg.empty())
-        jl_error(err_msg.c_str());
-    if (fargt.size() + sret != fargt_sig.size())
+    function_sig_t sig(crt, jlrettype, toboxed, argt->parameters, NULL, nargs, false, CallingConv::C, false);
+    if (!sig.err_msg.empty())
+        jl_error(sig.err_msg.c_str());
+    if (sig.fargt.size() + sig.sret != sig.fargt_sig.size())
         jl_error("va_arg syntax not allowed for cfunction argument list");
 
     const char *name = "cfunction";
@@ -3724,11 +3699,11 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
 
     Module *M = new Module(name, jl_LLVMContext);
     jl_setup_module(M);
-    Function *cw = Function::Create(FunctionType::get(sret ? T_void : prt, fargt_sig, false),
+    Function *cw = Function::Create(sig.functype,
             GlobalVariable::ExternalLinkage,
             funcName.str(), M);
     jl_init_function(cw);
-    cw->setAttributes(attrs);
+    cw->setAttributes(sig.attributes);
 #if JL_LLVM_VERSION >= 30700
     cw->addFnAttr("no-frame-pointer-elim", "true");
 #endif
@@ -3769,7 +3744,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
     size_t FParamIndex = 0;
     std::vector<Value*> args;
     Function::arg_iterator AI = cw->arg_begin();
-    Value *sretPtr = sret ? &*AI++ : NULL;
+    Value *sretPtr = sig.sret ? &*AI++ : NULL;
     if (lam == NULL) {
         theFptr = jlapplygeneric_func;
         specsig = false;
@@ -3790,7 +3765,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
         jlfunc_sret = theFptr->hasStructRetAttr();
         if (jlfunc_sret) {
             // fuse the two sret together, or emit an alloca to hold it
-            if (sret)
+            if (sig.sret)
                 result = emit_bitcast(sretPtr, theFptr->getFunctionType()->getParamType(0));
             else
                 result = builder.CreateAlloca(theFptr->getFunctionType()->getParamType(0)->getContainedType(0));
@@ -3851,7 +3826,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
         }
         else {
             bool argboxed;
-            (void)julia_struct_to_llvm(jargty, &argboxed);
+            (void)julia_struct_to_llvm(jargty, NULL, &argboxed);
             if (argboxed) {
                 // a jl_value_t*, even when represented as a struct
                 inputarg = mark_julia_type(val, true, jargty, &ctx);
@@ -3859,8 +3834,14 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
             else {
                 // something of type T
                 // undo whatever we might have done to this poor argument
-                bool issigned = jl_signed_type && jl_subtype(jargty, (jl_value_t*)jl_signed_type);
-                val = llvm_type_rewrite(val, val->getType(), fargt[i], true, byRefList[i], issigned, &ctx);
+                if (sig.byRefList.at(i)) {
+                    assert(val->getType() == sig.fargt[i]->getPointerTo());
+                    val = builder.CreateAlignedLoad(val, 1); // unknown alignment from C
+                }
+                else {
+                    bool issigned = jl_signed_type && jl_subtype(jargty, (jl_value_t*)jl_signed_type);
+                    val = llvm_type_rewrite(val, sig.fargt[i], issigned, &ctx);
+                }
                 bool isboxed;
                 (void)julia_type_to_llvm(jargty, &isboxed);
                 if (isboxed) {
@@ -3961,30 +3942,31 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
     // Prepare the return value
     Value *r;
     if (toboxed) {
-        assert(!sret);
+        assert(!sig.sret);
         // return a jl_value_t*
         r = boxed(retval, &ctx, false); // no gcroot since this is on the return path
     }
-    else if (sret && jlfunc_sret) {
+    else if (sig.sret && jlfunc_sret) {
         // nothing to do
     }
-    else if (!type_is_ghost(crt)) {
-        if (sret)
-            prt = fargt_sig[0]->getContainedType(0); // sret is a PointerType
+    else if (!type_is_ghost(sig.lrt)) {
+        Type *prt = sig.prt;
+        if (sig.sret)
+            prt = sig.fargt_sig[0]->getContainedType(0); // sret is a PointerType
         bool issigned = jl_signed_type && jl_subtype(declrt, (jl_value_t*)jl_signed_type);
-        Value *v = julia_to_native(crt, toboxed, declrt, retval,
-                false, false, false, 0, &ctx, NULL);
-        r = llvm_type_rewrite(v, crt, prt, false, false, issigned, &ctx);
-        if (sret)
+        Value *v = julia_to_native(sig.lrt, toboxed, declrt, NULL, retval,
+                                   false, 0, &ctx, NULL);
+        r = llvm_type_rewrite(v, prt, issigned, &ctx);
+        if (sig.sret)
             builder.CreateStore(r, sretPtr);
     }
     else {
-        assert(type_is_ghost(prt));
-        sret = true;
+        assert(type_is_ghost(sig.lrt));
+        sig.sret = true;
     }
 
     builder.CreateStore(last_age, ctx.world_age_field);
-    if (sret)
+    if (sig.sret)
         builder.CreateRetVoid();
     else
         builder.CreateRet(r);
@@ -3993,6 +3975,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
     builder.ClearInsertionPoint();
 
     jl_finalize_module(M, true);
+    assert(!ctx.roots);
 
     return cw_proto;
 }
@@ -4086,18 +4069,10 @@ static Function *jl_cfunction_object(jl_function_t *ff, jl_value_t *declrt, jl_t
 }
 
 // generate a julia-callable function that calls f (AKA lam)
-static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, Function *f, bool sret, Module *M)
+static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, Function *f, const std::string &funcName, bool sret, Module *M)
 {
-    std::stringstream funcName;
-    const std::string &fname = f->getName().str();
-    funcName << "jlcall_";
-    if (fname.compare(0, 6, "julia_") == 0)
-        funcName << fname.substr(6);
-    else
-        funcName << fname;
-
     Function *w = Function::Create(jl_func_sig, GlobalVariable::ExternalLinkage,
-                                   funcName.str(), M);
+                                   funcName, M);
     jl_init_function(w);
 #if JL_LLVM_VERSION >= 30700
     w->addFnAttr("no-frame-pointer-elim", "true");
@@ -4132,7 +4107,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, Function *f, bool
         args[idx] = result;
         idx++;
     }
-    for(size_t i=0; i < nargs; i++) {
+    for (size_t i = 0; i < nargs; i++) {
         jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
         bool isboxed;
         Type *lty = julia_type_to_llvm(ty, &isboxed);
@@ -4147,9 +4122,9 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, Function *f, bool
             theArg = builder.CreateLoad(argPtr);
         }
         if (lty != NULL && !isboxed) {
-            theArg = builder.CreatePointerCast(theArg, PointerType::get(lty,0));
+            theArg = builder.CreatePointerCast(theArg, PointerType::get(lty, 0));
             if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = build_load(theArg, ty);
+                theArg = builder.CreateAlignedLoad(theArg, julia_alignment(theArg, ty, 0));
         }
         assert(dyn_cast<UndefValue>(theArg) == NULL);
         args[idx] = theArg;
@@ -4165,6 +4140,7 @@ static Function *gen_jlcall_wrapper(jl_method_instance_t *lam, Function *f, bool
     jl_cgval_t retval = sret ? mark_julia_slot(result, jlretty, tbaa_stack) : mark_julia_type(call, retboxed, jlretty, &ctx, /*needsroot*/false);
     builder.CreateRet(boxed(retval, &ctx, false)); // no gcroot needed since this on the return path
 
+    assert(!ctx.roots);
     return w;
 }
 
@@ -4180,20 +4156,20 @@ static std::unique_ptr<Module> emit_function(
     assert(declarations && "Capturing declarations is always required");
 
     // step 1. unpack AST and allocate codegen context for this function
-    jl_array_t *code = (jl_array_t*)src->code;
-    JL_GC_PUSH1(&code);
-    if (!jl_typeis(code,jl_array_any_type))
-        code = jl_uncompress_ast(lam->def, code);
+    jl_codectx_t ctx = {};
+    JL_GC_PUSH2(&ctx.code, &ctx.roots);
+    ctx.code = (jl_array_t*)src->code;
+    if (!jl_typeis(ctx.code, jl_array_any_type))
+        ctx.code = jl_uncompress_ast(lam->def, ctx.code);
+
     //jl_static_show(JL_STDOUT, (jl_value_t*)ast);
     //jl_printf(JL_STDOUT, "\n");
     std::map<int, jl_arrayvar_t> arrayvars;
     std::map<int, BasicBlock*> labels;
-    jl_codectx_t ctx = {};
     ctx.arrayvars = &arrayvars;
     ctx.module = lam->def ? lam->def->module : ptls->current_module;
     ctx.linfo = lam;
     ctx.source = src;
-    ctx.code = code;
     ctx.world = world;
     ctx.name = jl_symbol_name(lam->def ? lam->def->name : anonymous_sym);
     ctx.funcName = ctx.name;
@@ -4256,7 +4232,7 @@ static std::unique_ptr<Module> emit_function(
         }
     }
 
-    jl_array_t *stmts = code;
+    jl_array_t *stmts = ctx.code;
     size_t stmtslen = jl_array_dim0(stmts);
 
     // finish recording escape info
@@ -4268,7 +4244,6 @@ static std::unique_ptr<Module> emit_function(
 
     // step 4. determine function signature
     jl_value_t *jlrettype = lam->rettype;
-    Function *f = NULL;
 
     bool specsig = false;
     bool needsparams = lam->def ? jl_svec_len(lam->def->sparam_syms) != jl_svec_len(lam->sparam_vals) : false;
@@ -4294,20 +4269,28 @@ static std::unique_ptr<Module> emit_function(
     if (!specsig)
         ctx.nReqArgs--;  // function not part of argArray in jlcall
 
+    Module *M = new Module(ctx.name, jl_LLVMContext);
+    jl_setup_module(M, params);
+
     std::stringstream funcName;
     // try to avoid conflicts in the global symbol table
-    funcName << "julia_" << ctx.name
+    if (specsig)
+        funcName << "jlcall_";
+    else if (needsparams)
+        funcName << "japi3_";
+    else
+        funcName << "japi1_";
+    const char* unadorned_name = ctx.name;
 #if (defined(_OS_LINUX_) && JL_LLVM_VERSION < 30400)
-        + (ctx.name[0] == '@') ? 1 : 0
+    if (unadorned_name[0] == '@')
+        unadorned_name++;
 #endif
-    ;
+    funcName << unadorned_name << "_" << globalUnique++;
 
+    Function *f = NULL;
     Function *fwrap = NULL;
-    funcName << "_" << globalUnique++;
-
     ctx.sret = false;
-    Module *M = new Module(ctx.name, jl_LLVMContext);
-    jl_setup_module(M, params);
+
     if (specsig) { // assumes !va and !needsparams
         std::vector<Type*> fsig(0);
         Type *rt;
@@ -4324,7 +4307,7 @@ static std::unique_ptr<Module> emit_function(
             fsig.push_back(rt->getPointerTo());
             rt = T_void;
         }
-        for(size_t i=0; i < jl_nparams(lam->specTypes); i++) {
+        for (size_t i = 0; i < jl_nparams(lam->specTypes); i++) {
             Type *ty = julia_type_to_llvm(jl_tparam(lam->specTypes,i));
             if (type_is_ghost(ty))
                 continue;
@@ -4332,9 +4315,11 @@ static std::unique_ptr<Module> emit_function(
                 ty = PointerType::get(ty,0);
             fsig.push_back(ty);
         }
+        std::stringstream specName;
+        specName << "julia_" << unadorned_name << "_" << globalUnique;
         f = Function::Create(FunctionType::get(rt, fsig, false),
                              GlobalVariable::ExternalLinkage,
-                             funcName.str(), M);
+                             specName.str(), M);
         jl_init_function(f);
         if (ctx.sret) {
             f->addAttribute(1, Attribute::StructRet);
@@ -4343,7 +4328,7 @@ static std::unique_ptr<Module> emit_function(
 #if JL_LLVM_VERSION >= 30700
         f->addFnAttr("no-frame-pointer-elim", "true");
 #endif
-        fwrap = gen_jlcall_wrapper(lam, f, ctx.sret, M);
+        fwrap = gen_jlcall_wrapper(lam, f, funcName.str(), ctx.sret, M);
         declarations->functionObject = function_proto(fwrap);
         declarations->specFunctionObject = function_proto(f);
     }
@@ -5227,8 +5212,34 @@ static std::unique_ptr<Module> emit_function(
         dbuilder.finalize();
     }
 
-    JL_GC_POP();
+    // copy ctx.roots into m->roots
+    // if we created any new roots during codegen
+    if (ctx.roots) {
+        jl_method_t *m = lam->def;
+        JL_LOCK(&m->writelock);
+        if (m->roots == NULL) {
+            m->roots = ctx.roots;
+            jl_gc_wb(m, m->roots);
+        }
+        else {
+            size_t i, ilen = jl_array_dim0(ctx.roots);
+            size_t j, jlen = jl_array_dim0(m->roots);
+            for (i = 0; i < ilen; i++) {
+                jl_value_t *ival = jl_array_ptr_ref(ctx.roots, i);
+                for (j = 0; j < jlen; j++) {
+                    jl_value_t *jval = jl_array_ptr_ref(m->roots, j);
+                    if (ival == jval)
+                        break;
+                }
+                if (j == jlen) // not found - add to array
+                    jl_array_ptr_1d_push(m->roots, ival);
+            }
+        }
+        ctx.roots = NULL;
+        JL_UNLOCK(&m->writelock);
+    }
 
+    JL_GC_POP();
     return std::unique_ptr<Module>(M);
 }
 
@@ -5298,7 +5309,18 @@ extern "C" void jl_fptr_to_llvm(jl_fptr_t fptr, jl_method_instance_t *lam, int s
     else {
         // this assigns a function pointer (from loading the system image), to the function object
         std::stringstream funcName;
-        funcName << "jlsys_" << jl_symbol_name(lam->def->name) << "_" << globalUnique++;
+        if (specsig)
+            funcName << "jlsys_"; // the specsig implementation
+        else if (lam->functionObjectsDecls.specFunctionObject)
+            funcName << "jlsysw_"; // it's a specsig wrapper
+        else if (lam->jlcall_api == 1)
+            funcName << "jsys1_"; // it's a jlcall without a specsig
+        const char* unadorned_name = jl_symbol_name(lam->def->name);
+#if (defined(_OS_LINUX_) && JL_LLVM_VERSION < 30400)
+        if (unadorned_name[0] == '@')
+            unadorned_name++;
+#endif
+        funcName << unadorned_name << "_" << globalUnique++;
         if (specsig) { // assumes !va
             SmallVector<Type*, 8> fsig;
             jl_value_t *jlrettype = lam->rettype;
@@ -5317,7 +5339,7 @@ extern "C" void jl_fptr_to_llvm(jl_fptr_t fptr, jl_method_instance_t *lam, int s
                 fsig.push_back(rt->getPointerTo());
                 rt = T_void;
             }
-            for (size_t i=0; i < jl_nparams(lam->specTypes); i++) {
+            for (size_t i = 0; i < jl_nparams(lam->specTypes); i++) {
                 Type *ty = julia_type_to_llvm(jl_tparam(lam->specTypes,i));
                 if (type_is_ghost(ty))
                     continue;
@@ -5415,13 +5437,7 @@ static void init_julia_llvm_env(Module *m)
     // This type is used to create undef Values for use in struct declarations to skip indices
     NoopType = ArrayType::get(T_int1, 0);
 
-    // add needed base definitions to our LLVM environment
-    StructType *valueSt = StructType::create(jl_LLVMContext, "jl_value_t");
-    Type *valueStructElts[1] = { PointerType::getUnqual(valueSt) };
-    ArrayRef<Type*> vselts(valueStructElts);
-    valueSt->setBody(vselts);
-    T_jlvalue = valueSt;
-
+    // add needed base debugging definitions to our LLVM environment
     DIBuilder dbuilder(*m);
 #if JL_LLVM_VERSION >= 30700
     DIFile *julia_h = dbuilder.createFile("julia.h","");
@@ -5486,7 +5502,7 @@ static void init_julia_llvm_env(Module *m)
         dbuilder.getOrCreateArray(ArrayRef<Value*>()));
 #endif
 
-    T_pjlvalue = PointerType::get(T_jlvalue, 0);
+    T_pjlvalue = T_ppint8;
     T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
     two_pvalue_llvmt.push_back(T_pjlvalue);
     two_pvalue_llvmt.push_back(T_pjlvalue);
@@ -5725,15 +5741,6 @@ static void init_julia_llvm_env(Module *m)
                          "jl_get_binding_or_error", m);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
 
-    jlpref_func = Function::Create(FunctionType::get(T_pjlvalue, three_pvalue_llvmt, false),
-                            Function::ExternalLinkage,
-                            "jl_pointerref", m);
-
-    jlpset_func = Function::Create(FunctionType::get(T_pjlvalue, four_pvalue_llvmt, false),
-                            Function::ExternalLinkage,
-                            "jl_pointerset", m);
-
-
     builtin_func_map[jl_f_is] = jlcall_func_to_llvm("jl_f_is", &jl_f_is, m);
     builtin_func_map[jl_f_typeof] = jlcall_func_to_llvm("jl_f_typeof", &jl_f_typeof, m);
     builtin_func_map[jl_f_sizeof] = jlcall_func_to_llvm("jl_f_sizeof", &jl_f_sizeof, m);
@@ -5904,6 +5911,16 @@ static void init_julia_llvm_env(Module *m)
                          "jl_isa", m);
     add_named_global(jlisa_func, &jl_isa);
 
+    std::vector<Type *> applytype_args(0);
+    applytype_args.push_back(T_pjlvalue);
+    applytype_args.push_back(T_pjlvalue);
+    applytype_args.push_back(T_ppjlvalue);
+    jlapplytype_func =
+        Function::Create(FunctionType::get(T_pjlvalue, applytype_args, false),
+                         Function::ExternalLinkage,
+                         "jl_instantiate_type_in_env", m);
+    add_named_global(jlapplytype_func, &jl_instantiate_type_in_env);
+
     std::vector<Type*> alloc_pool_args(0);
     alloc_pool_args.push_back(T_pint8);
     alloc_pool_args.push_back(T_int32);
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index 12c0004c2fbce..bc8380648a201 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,5 +1,5 @@
 jl_symbol("getfield"),
-jl_symbol("box"),
+jl_symbol("bitcast"),
 jl_symbol("apply_type"),
 jl_symbol("meta"),
 jl_symbol("Ptr"),
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 82c62b216931f..099b6e4c5c28b 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -706,18 +706,26 @@ char *jl_demangle(const char *name)
     const char *start = name + 6;
     const char *end = name + strlen(name);
     char *ret;
-    if (strncmp(name, "julia_", 6)) goto done;
-    if (*start == '\0') goto done;
+    if (strncmp(name, "japi1_", 6) &&
+        strncmp(name, "japi3_", 6) &&
+        strncmp(name, "julia_", 6) &&
+        strncmp(name, "jsys1_", 6) &&
+        strncmp(name, "jlsys_", 6))
+        goto done;
+    if (*start == '\0')
+        goto done;
     while (*(--end) != '_') {
         char c = *end;
-        if (c < '0' || c > '9') goto done;
+        if (c < '0' || c > '9')
+            goto done;
     }
-    if (end <= start) goto done;
-    ret = (char*)malloc(end-start+1);
-    memcpy(ret,start,end-start);
-    ret[end-start] = '\0';
+    if (end <= start)
+        goto done;
+    ret = (char*)malloc(end - start + 1);
+    memcpy(ret, start, end - start);
+    ret[end - start] = '\0';
     return ret;
- done:
+done:
     return strdup(name);
 }
 
@@ -822,7 +830,10 @@ static int lookup_pointer(DIContext *context, jl_frame_t **frames,
         else
             jl_copy_str(&frame->file_name, file_name.c_str());
 
-        if (!frame->func_name || !func_name.compare(0, 7, "jlcall_") || !func_name.compare(0, 7, "jlcapi_")) {
+        if (!frame->func_name ||
+                func_name.compare(0, 7, "jlsysw_") == 0 ||
+                func_name.compare(0, 7, "jlcall_") == 0 ||
+                func_name.compare(0, 7, "jlcapi_") == 0) {
             frame->fromC = 1;
         }
     }
diff --git a/src/dump.c b/src/dump.c
index db0e38f042e3c..6beb615d0bc38 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -432,16 +432,16 @@ static void jl_update_all_fptrs(void)
     for (i = 0; i < delayed_fptrs_n; i++) {
         jl_method_instance_t *li = delayed_fptrs[i].li;
         assert(li->def);
-        int32_t func = delayed_fptrs[i].func - 1;
-        if (func >= 0) {
-            jl_fptr_to_llvm((jl_fptr_t)fvars[func], li, 0);
-            linfos[func] = li;
-        }
         int32_t cfunc = delayed_fptrs[i].cfunc - 1;
         if (cfunc >= 0) {
             jl_fptr_to_llvm((jl_fptr_t)fvars[cfunc], li, 1);
             linfos[cfunc] = li;
         }
+        int32_t func = delayed_fptrs[i].func - 1;
+        if (func >= 0) {
+            jl_fptr_to_llvm((jl_fptr_t)fvars[func], li, 0);
+            linfos[func] = li;
+        }
     }
     jl_register_fptrs(sysimage_base, fvars, linfos, sysimg_fvars_max);
     delayed_fptrs_n = 0;
@@ -937,7 +937,6 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v)
         jl_serialize_value(s, (jl_value_t*)m->unspecialized);
         jl_serialize_value(s, (jl_value_t*)m->generator);
         jl_serialize_value(s, (jl_value_t*)m->invokes.unknown);
-        write_int8(s->s, m->needs_sparam_vals_ducttape);
     }
     else if (jl_is_method_instance(v)) {
         writetag(s->s, jl_method_instance_type);
@@ -1685,7 +1684,6 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
         jl_gc_wb(m, m->generator);
     m->invokes.unknown = jl_deserialize_value(s, (jl_value_t**)&m->invokes);
     jl_gc_wb(m, m->invokes.unknown);
-    m->needs_sparam_vals_ducttape = read_int8(s->s);
     m->traced = 0;
     JL_MUTEX_INIT(&m->writelock);
     return (jl_value_t*)m;
diff --git a/src/gf.c b/src/gf.c
index c919417826285..73c5e8eda1858 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -1592,7 +1592,7 @@ jl_llvm_functions_t jl_compile_for_dispatch(jl_method_instance_t **pli, size_t w
         if (jl_options.compile_enabled == JL_OPTIONS_COMPILE_OFF) {
             jl_printf(JL_STDERR, "code missing for ");
             jl_static_show(JL_STDERR, (jl_value_t*)li);
-            jl_printf(JL_STDERR, "  sysimg may not have been built with --compile=all\n");
+            jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
         }
     }
     jl_llvm_functions_t decls = li->functionObjectsDecls;
@@ -2002,7 +2002,7 @@ static void _precompile_enq_module(jl_module_t *m, jl_array_t *unspec)
     // removes all method caches
     size_t i;
     void **table = m->bindings.table;
-    for(i=1; i < m->bindings.size; i+=2) {
+    for (i = 1; i < m->bindings.size; i += 2) {
         if (table[i] != HT_NOTFOUND) {
             jl_binding_t *b = (jl_binding_t*)table[i];
             if (b->owner == m && b->value && b->constp) {
diff --git a/src/interpreter.c b/src/interpreter.c
index cb9806134bb35..955efc8bfa1d3 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -233,9 +233,7 @@ static jl_value_t *eval(jl_value_t *e, interpreter_state *s)
         ssize_t n = jl_unbox_long(args[0]);
         assert(n > 0);
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
-            jl_value_t *sp = jl_svecref(s->sparam_vals, n - 1);
-            if (!jl_is_typevar(sp))
-                return sp;
+            return jl_svecref(s->sparam_vals, n - 1);
         }
         // static parameter val unknown needs to be an error for ccall
         jl_error("could not determine static parameter value");
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 84b19ded8e5e1..27cc70985c070 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -8,6 +8,7 @@ namespace JL_I {
 
 using namespace JL_I;
 static Function *runtime_func[num_intrinsics];
+static bool float_func[num_intrinsics];
 static void jl_init_intrinsic_functions_codegen(Module *m)
 {
     std::vector<Type *> args1(0); \
@@ -33,14 +34,47 @@ static void jl_init_intrinsic_functions_codegen(Module *m)
     } while (0);
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) runtime_func[alias] = runtime_func[base];
-    ADD_HIDDEN(reinterpret, 2);
     INTRINSICS
 #undef ADD_I
 #undef ADD_HIDDEN
 #undef ALIAS
+
+    float_func[neg_float] = true;
+    float_func[neg_float_fast] = true;
+    float_func[add_float] = true;
+    float_func[sub_float] = true;
+    float_func[mul_float] = true;
+    float_func[div_float] = true;
+    float_func[rem_float] = true;
+    float_func[add_float_fast] = true;
+    float_func[sub_float_fast] = true;
+    float_func[mul_float_fast] = true;
+    float_func[div_float_fast] = true;
+    float_func[rem_float_fast] = true;
+    float_func[fma_float] = true;
+    float_func[muladd_float] = true;
+    float_func[eq_float] = true;
+    float_func[ne_float] = true;
+    float_func[lt_float] = true;
+    float_func[le_float] = true;
+    float_func[eq_float_fast] = true;
+    float_func[ne_float_fast] = true;
+    float_func[lt_float_fast] = true;
+    float_func[le_float_fast] = true;
+    float_func[fpiseq] = true;
+    float_func[fpislt] = true;
+    float_func[abs_float] = true;
+    //float_func[copysign_float] = false; // this is actually an integer operation
+    float_func[ceil_llvm] = true;
+    float_func[floor_llvm] = true;
+    float_func[trunc_llvm] = true;
+    float_func[rint_llvm] = true;
+    float_func[sqrt_llvm] = true;
+    float_func[sqrt_llvm_fast] = true;
+    float_func[powi_llvm] = true;
 }
 
-extern "C" JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void);
+extern "C"
 JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
 {
     return 10000 * LLVM_VERSION_MAJOR + 100 * LLVM_VERSION_MINOR
@@ -50,105 +84,96 @@ JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
         ;
 }
 
-extern "C" JL_DLLEXPORT int8_t jl_is_memdebug() {
-#ifdef MEMDEBUG
-    return true;
-#else
-    return false;
-#endif
-}
-
 /*
-  low-level intrinsics design: TODO: fix description below
-  functions like add_int expect unboxed values of matching bit-length.
+  low-level intrinsics design:
+  intrinsics only operate on bitstype values
+  any composite type is expected to be handled via its constructor,
+   so it is not permitted here
+  functions like add_int expect unboxed values of matching types
   every operation that can return an unboxed value does so.
   this maximizes opportunities for composing functions without
     unnecessary boxing.
-  this means that box and unbox functions might do nothing except change
-    the type tag of a value.
+  the bitcast function does nothing except change the type tag
+   of a value. At the user-level, it is perhaps better known as reinterpret.
   boxing is delayed until absolutely necessary, and handled at the point
     where the box is needed.
+  all intrinsics have a non-compiled implementation, this file contains
+    the optimizations for handling them unboxed
 */
 
-static Type *FTnbits(size_t nb)
+// convert an llvm type to same-size float type
+static Type *FLOATT(Type *t)
 {
+    if (t->isFloatingPointTy())
+        return t;
+    unsigned nb = (t->isPointerTy() ? sizeof(void*) * 8 : t->getPrimitiveSizeInBits());
+    if (nb == 64)
+        return T_float64;
+    if (nb == 32)
+        return T_float32;
 #ifndef DISABLE_FLOAT16
     if (nb == 16)
         return T_float16;
-    else
 #endif
-    if (nb == 32)
-        return T_float32;
-    else if (nb == 64)
-        return T_float64;
-    else if (nb == 128)
+    if (nb == 128)
         return T_float128;
-    else
-        jl_error("Unsupported Float Size");
-}
-// convert int type to same-size float type
-static Type *FT(Type *t)
-{
-    if (t->isFloatingPointTy())
-        return t;
-    return FTnbits(t->getPrimitiveSizeInBits());
-}
-
-// reinterpret-cast to float
-static Value *FP(Value *v)
-{
-    if (v->getType()->isFloatingPointTy())
-        return v;
-    return emit_bitcast(v, FT(v->getType()));
+    return NULL;
 }
 
-// convert float type to same-size int type
-static Type *JL_INTT(Type *t)
+// convert an llvm type to same-size int type
+static Type *INTT(Type *t)
 {
     if (t->isIntegerTy())
         return t;
     if (t->isPointerTy())
         return T_size;
-    if (t == T_float32) return T_int32;
-    if (t == T_float64) return T_int64;
-    assert(t == T_void);
-    return T_void;
+    if (t == T_float64)
+        return T_int64;
+    if (t == T_float32)
+        return T_int32;
+    if (t == T_float16)
+        return T_int16;
+    unsigned nb = t->getPrimitiveSizeInBits();
+    assert(t != T_void && nb > 0);
+    return IntegerType::get(jl_LLVMContext, nb);
 }
+
 // convert float type to same-size int type (as a Julia type)
 static jl_value_t *JL_JLUINTT(Type *t)
 {
     assert(!t->isIntegerTy());
-    if (t == T_float32) return (jl_value_t*)jl_uint32_type;
-    if (t == T_float64) return (jl_value_t*)jl_uint64_type;
-    if (t == T_float16) return (jl_value_t*)jl_uint16_type;
+    if (t == T_float64)
+        return (jl_value_t*)jl_uint64_type;
+    if (t == T_float32)
+        return (jl_value_t*)jl_uint32_type;
+    if (t == T_float16)
+        return (jl_value_t*)jl_uint16_type;
+    //if (t == T_float128)
+    //   return (jl_value_t*)jl_uint128_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
+
 static jl_value_t *JL_JLSINTT(Type *t)
 {
     assert(!t->isIntegerTy());
-    if (t == T_float32) return (jl_value_t*)jl_int32_type;
-    if (t == T_float64) return (jl_value_t*)jl_int64_type;
-    if (t == T_float16) return (jl_value_t*)jl_int16_type;
+    if (t == T_float64)
+        return (jl_value_t*)jl_int64_type;
+    if (t == T_float32)
+        return (jl_value_t*)jl_int32_type;
+    if (t == T_float16)
+        return (jl_value_t*)jl_int16_type;
+    //if (t == T_float128)
+    //   return (jl_value_t*)jl_uint128_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
 
-// reinterpret-cast to int
-static Value *JL_INT(Value *v)
-{
-    Type *t = v->getType();
-    if (t->isIntegerTy())
-        return v;
-    if (t->isPointerTy())
-        return builder.CreatePtrToInt(v, JL_INTT(t));
-    return emit_bitcast(v, JL_INTT(t));
-}
-
 static Value *uint_cnvt(Type *to, Value *x)
 {
     Type *t = x->getType();
-    if (t == to) return x;
+    if (t == to)
+        return x;
     if (to->getPrimitiveSizeInBits() < x->getType()->getPrimitiveSizeInBits())
         return builder.CreateTrunc(x, to);
     return builder.CreateZExt(x, to);
@@ -237,7 +262,7 @@ static Constant *julia_const_to_llvm(void *ptr, jl_value_t *bt)
         fields[i] = val;
     }
 
-    Type *t = julia_struct_to_llvm(bt, NULL);
+    Type *t = julia_struct_to_llvm(bt, NULL, NULL);
     if (type_is_ghost(t))
         return UndefValue::get(NoopType);
     if (t->isVectorTy())
@@ -268,9 +293,12 @@ static jl_cgval_t ghostValue(jl_value_t *ty);
 // emit code to unpack a raw value from a box into registers or a stack slot
 static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest, bool volatile_store)
 {
-    assert(to != T_pjlvalue);
+    assert(to != T_void);
     // TODO: fully validate that x.typ == jt?
     if (x.isghost) {
+        // this can happen when a branch yielding a different type ends
+        // up being dead code, and type inference knows that the other
+        // branch's type is the only one that matters.
         if (type_is_ghost(to)) {
             return NULL;
         }
@@ -282,23 +310,30 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d
     if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion
         Value *unboxed = c ? c : x.V;
         Type *ty = unboxed->getType();
-        // bools are stored internally as int8 (for now)
-        if (ty == T_int1 && to == T_int8)
-            unboxed = builder.CreateZExt(unboxed, T_int8);
-        else if (ty->isPointerTy() && !to->isPointerTy())
-            unboxed = builder.CreatePtrToInt(unboxed, to);
-        else if (!ty->isPointerTy() && to->isPointerTy())
+        assert(ty != T_void);
+        bool frompointer = ty->isPointerTy();
+        bool topointer = to->isPointerTy();
+        if (frompointer && topointer) {
+            unboxed = emit_bitcast(unboxed, to);
+        }
+        else if (frompointer) {
+            Type *INTT_to = INTT(to);
+            unboxed = builder.CreatePtrToInt(unboxed, INTT_to);
+            if (INTT_to != to)
+                unboxed = builder.CreateBitCast(unboxed, to);
+        }
+        else if (topointer) {
+            Type *INTT_to = INTT(to);
+            if (to != INTT_to)
+                unboxed = builder.CreateBitCast(unboxed, INTT_to);
             unboxed = builder.CreateIntToPtr(unboxed, to);
-        else if (ty->isPointerTy() && to->isPointerTy())
-            // pointer types are going away anyways, and this can come up in ccall argument conversion
-            unboxed = builder.CreatePointerCast(unboxed, to);
-        else if (ty != to) {
-            // this can happen when a branch yielding a different type ends
-            // up being dead code, and type inference knows that the other
-            // branch's type is the only one that matters.
-            // assert(ty == T_void);
-            //emit_error("emit_unbox: a type mismatch error in occurred during codegen", ctx);
-            unboxed = UndefValue::get(to); // type mismatch error
+        }
+        else if (ty == T_int1 && to == T_int8) {
+            // bools may be stored internally as int8
+            unboxed = builder.CreateZExt(unboxed, T_int8);
+        }
+        else {
+            unboxed = builder.CreateBitCast(unboxed, to);
         }
         if (!dest)
             return unboxed;
@@ -359,112 +394,56 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *d
     }
 }
 
-// unbox, trying to determine correct bitstype automatically
-// returns some sort of raw, unboxed numeric type (e.g. in registers)
-static Value *auto_unbox(const jl_cgval_t &v, jl_codectx_t *ctx)
-{
-    jl_value_t *bt = v.typ;
-    if (!jl_is_bitstype(bt)) {
-        // This can be reached with a direct invalid call to an Intrinsic, such as:
-        //   Intrinsics.neg_int("")
-        emit_error("auto_unbox: unable to determine argument type", ctx);
-        return UndefValue::get(T_void);
-    }
-    bool isboxed;
-    Type *to = julia_type_to_llvm(v.typ, &isboxed);
-    if (to == NULL || isboxed) {
-        // might be some sort of incomplete (but valid) Ptr{T} type, for example
-        unsigned int nb = jl_datatype_nbits(bt);
-        to = IntegerType::get(jl_LLVMContext, nb);
-    }
-    if (type_is_ghost(to)) {
-        return NULL;
-    }
-    assert(!to->isAggregateType()); // expecting some sort of jl_bitstype
-    return emit_unbox(to, v, bt);
-}
-static Value *auto_unbox(jl_value_t *x, jl_codectx_t *ctx)
-{
-    jl_cgval_t v = emit_expr(x, ctx);
-    return auto_unbox(v, ctx);
-}
-
-static jl_value_t *staticeval_bitstype(jl_value_t *targ, const char *fname, jl_codectx_t *ctx)
+static jl_value_t *staticeval_bitstype(const jl_cgval_t &targ)
 {
     // evaluate an argument at compile time to determine what type it is
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
-    if (!bt || !jl_is_bitstype(bt)) {
-        emit_error("expected bits type as first argument", ctx);
-        return NULL;
+    if (jl_is_type_type(targ.typ)) {
+        jl_value_t *bt = jl_tparam0(targ.typ);
+        if (jl_is_bitstype(bt))
+            return bt;
     }
-    return bt;
+    return NULL;
 }
 
-static Type *staticeval_bitstype(jl_value_t *bt)
+static jl_cgval_t emit_runtime_call(JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs, jl_codectx_t *ctx)
 {
-    assert(jl_is_bitstype(bt));
-    bool isboxed;
-    Type *to = julia_type_to_llvm(bt, &isboxed);
-    if (to == NULL || isboxed) {
-        unsigned int nb = jl_datatype_nbits(bt);
-        to = IntegerType::get(jl_LLVMContext, nb);
+    Value *func = prepare_call(runtime_func[f]);
+    Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argvalues[i] = boxed(argv[i], ctx);
     }
-    assert(!to->isAggregateType()); // expecting a bits type
-    return to;
+    Value *r = builder.CreateCall(func, makeArrayRef(argvalues, nargs));
+    return mark_julia_type(r, true, (jl_value_t*)jl_any_type, ctx);
 }
 
-// figure out how many bits a bitstype has at compile time
-static int get_bitstype_nbits(jl_value_t *bt)
+// put a bits type tag on some value (despite the name, this doesn't necessarily actually change anything about the value however)
+static jl_cgval_t generic_bitcast(const jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    assert(jl_is_bitstype(bt));
-    return jl_datatype_nbits(bt);
-}
+    // Give the arguments names //
+    const jl_cgval_t &bt_value = argv[0];
+    const jl_cgval_t &v = argv[1];
+    jl_value_t *bt = staticeval_bitstype(bt_value);
 
-// put a bits type tag on some value (despite the name, this doesn't necessarily actually "box" the value however)
-static jl_cgval_t generic_box(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
-{
-    // Examine the first argument //
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_cgval_t v = emit_expr(x, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
-
-    if (!bt || !jl_is_bitstype(bt)) {
-        // it's easier to throw a good error from C than llvm
-        Value *arg1 = boxed(bt_value, ctx);
-        Value *arg2 = boxed(v, ctx);
-        Value *func = prepare_call(runtime_func[reinterpret]);
-#if JL_LLVM_VERSION >= 30700
-        Value *r = builder.CreateCall(func, {arg1, arg2});
-#else
-        Value *r = builder.CreateCall2(func, arg1, arg2);
-#endif
-        jl_value_t *et = expr_type(targ, ctx);
-        return mark_julia_type(r, true, jl_is_type_type(et) ? jl_tparam0(et) : (jl_value_t*)jl_any_type, ctx);
-    }
+    // it's easier to throw a good error from C than llvm
+    if (!bt)
+        return emit_runtime_call(bitcast, argv, 2, ctx);
 
-    Type *llvmt = staticeval_bitstype(bt);
+    Type *llvmt = bitstype_to_llvm(bt);
     int nb = jl_datatype_size(bt);
 
     // Examine the second argument //
     bool isboxed;
     Type *vxt = julia_type_to_llvm(v.typ, &isboxed);
 
-    if (!jl_is_datatype(v.typ)
-        || !jl_is_bitstype(v.typ)
-        || jl_datatype_size(v.typ) != nb) {
+    if (!jl_is_bitstype(v.typ) || jl_datatype_size(v.typ) != nb) {
         Value *typ = emit_typeof_boxed(v, ctx);
         if (!jl_is_bitstype(v.typ)) {
             if (isboxed) {
                 Value *isbits = emit_datatype_isbitstype(typ);
-                error_unless(isbits, "reinterpret: expected bitstype value for second argument", ctx);
+                error_unless(isbits, "bitcast: expected bitstype value for second argument", ctx);
             }
             else {
-                emit_error("reinterpret: expected bitstype value for second argument", ctx);
+                emit_error("bitcast: expected bitstype value for second argument", ctx);
                 return jl_cgval_t();
             }
         }
@@ -472,10 +451,10 @@ static jl_cgval_t generic_box(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx
             if (isboxed) {
                 Value *size = emit_datatype_size(typ);
                 error_unless(builder.CreateICmpEQ(size, ConstantInt::get(T_int32, nb)),
-                            "reinterpret: argument size does not match size of target type", ctx);
+                            "bitcast: argument size does not match size of target type", ctx);
             }
             else {
-                emit_error("reinterpret: argument size does not match size of target type", ctx);
+                emit_error("bitcast: argument size does not match size of target type", ctx);
                 return jl_cgval_t();
             }
         }
@@ -520,191 +499,143 @@ static jl_cgval_t generic_box(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx
             true, bt, ctx);
 }
 
-// put a bits type tag on some value
-static jl_cgval_t generic_unbox(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static jl_cgval_t generic_cast(
+        intrinsic f, Value *(*generic)(Type*, Value*, jl_codectx_t*),
+        const jl_cgval_t *argv, jl_codectx_t *ctx, bool toint, bool fromint)
 {
-    // Examine the first argument //
-    jl_cgval_t bt_value = emit_expr(targ, ctx);
-    jl_value_t *bt = NULL;
-    if (jl_is_type_type(bt_value.typ))
-        bt = jl_tparam0(bt_value.typ);
+    const jl_cgval_t &targ = argv[0];
+    const jl_cgval_t &v = argv[1];
+    jl_value_t *jlto = staticeval_bitstype(targ);
+    if (!jlto || !jl_is_bitstype(v.typ))
+        return emit_runtime_call(f, argv, 2, ctx);
+    Type *to = bitstype_to_llvm(jlto);
+    Type *vt = bitstype_to_llvm(v.typ);
+    if (toint)
+        to = INTT(to);
+    else
+        to = FLOATT(to);
+    if (fromint)
+        vt = INTT(vt);
+    else
+        vt = FLOATT(vt);
+    if (!to || !vt)
+        return emit_runtime_call(f, argv, 2, ctx);
+    Value *from = emit_unbox(vt, v, v.typ);
+    Value *ans = generic(to, from, ctx);
+    return mark_julia_type(ans, false, jlto, ctx);
+}
 
-    // Examine the second argument //
-    jl_cgval_t v = emit_expr(x, ctx);
-
-    if (bt == NULL || !jl_is_leaf_type(bt)) {
-        // dynamically-determined type; evaluate.
-        int nb, alignment;
-        Type *llvmt;
-        if (bt && jl_is_bitstype(bt)) {
-            // always fixed size
-            nb = jl_datatype_size(bt);
-            llvmt = staticeval_bitstype(bt);
-            alignment = ((jl_datatype_t*)bt)->layout->alignment;
-        }
-        else {
-            bt = v.typ;
-            if (!jl_is_leaf_type(bt) && !jl_is_bitstype(bt)) {
-                // TODO: currently doesn't handle the case where the type of neither argument is understood at compile time
-                // since codegen has no idea what size it might have
-                jl_error("codegen: failed during evaluation of a call to unbox");
-                return jl_cgval_t();
-            }
-            nb = jl_datatype_size(bt);
-            llvmt = staticeval_bitstype(bt);
-            alignment = ((jl_datatype_t*)bt)->layout->alignment;
-        }
-        Value *runtime_bt = boxed(bt_value, ctx);
-        // XXX: emit type validity check on runtime_bt (bitstype of size nb)
+static Value *generic_trunc(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateTrunc(x, to);
+}
 
-        Value *newobj = emit_allocobj(ctx, nb, runtime_bt);
-        if (!v.ispointer()) {
-            tbaa_decorate(tbaa_value, builder.CreateAlignedStore(emit_unbox(llvmt, v, v.typ), builder.CreatePointerCast(newobj, llvmt->getPointerTo()), alignment));
-        }
-        else {
-            prepare_call(builder.CreateMemCpy(newobj, data_pointer(v, ctx, T_pint8), nb, alignment)->getCalledValue());
-            mark_gc_use(v);
-        }
-        return mark_julia_type(newobj, true, bt ? bt : (jl_value_t*)jl_any_type, ctx);
-    }
+static Value *generic_trunc_uchecked(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    Value *ans = builder.CreateTrunc(x, to);
+    Value *back = builder.CreateZExt(ans, x->getType());
+    raise_exception_unless(builder.CreateICmpEQ(back, x),
+                           literal_pointer_val(jl_inexact_exception), ctx);
+    return ans;
+}
 
-    if (!jl_is_bitstype(bt)) {
-        // TODO: to accept arbitrary types, replace this function with a call to llvm_type_rewrite
-        emit_error("unbox: expected bits type as first argument", ctx);
-        return jl_cgval_t();
-    }
+static Value *generic_trunc_schecked(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    Value *ans = builder.CreateTrunc(x, to);
+    Value *back = builder.CreateSExt(ans, x->getType());
+    raise_exception_unless(builder.CreateICmpEQ(back, x),
+                           literal_pointer_val(jl_inexact_exception), ctx);
+    return ans;
+}
 
-    Type *llvmt = staticeval_bitstype(bt);
-    if (v.typ == bt)
-        return v;
+static Value *generic_sext(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateSExt(x, to);
+}
 
-    Value *vx;
-    if (v.ispointer()) {
-        vx = tbaa_decorate(v.tbaa, builder.CreateLoad(data_pointer(v, ctx, llvmt->getPointerTo())));
-    }
-    else {
-        vx = v.V;
-        if (!jl_is_bitstype(v.typ)) {
-            emit_error("unbox: expected bits type value for second argument", ctx);
-            return jl_cgval_t();
-        }
-    }
+static Value *generic_zext(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateZExt(x, to);
+}
 
-    Type *vxt = vx->getType();
-    if (llvmt == T_int1) {
-        vx = builder.CreateTrunc(vx, llvmt);
-    }
-    else if (vxt == T_int1 && llvmt == T_int8) {
-        vx = builder.CreateZExt(vx, llvmt);
-    }
-    else if (vxt != llvmt) {
-        // getPrimitiveSizeInBits() == 0 for pointers
-        // PtrToInt and IntToPtr ignore size differences
-        if (vxt->getPrimitiveSizeInBits() != llvmt->getPrimitiveSizeInBits() &&
-            !(vxt->isPointerTy() && llvmt->getPrimitiveSizeInBits() == sizeof(void*)*8) &&
-            !(llvmt->isPointerTy() && vxt->getPrimitiveSizeInBits() == sizeof(void*)*8)) {
-            emit_error("unbox: argument is of incorrect size", ctx);
-            return jl_cgval_t();
-        }
-        if (vxt->isPointerTy() && !llvmt->isPointerTy())
-            vx = builder.CreatePtrToInt(vx, llvmt);
-        else if (!vxt->isPointerTy() && llvmt->isPointerTy())
-            vx = builder.CreateIntToPtr(vx, llvmt);
-        else
-            vx = emit_bitcast(vx, llvmt);
-    }
+static Value *generic_uitofp(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateUIToFP(x, to);
+}
 
-    return mark_julia_type(vx, false, bt, ctx);
+static Value *generic_sitofp(Type *to, Value *x, jl_codectx_t *ctx)
+{
+    return builder.CreateSIToFP(x, to);
 }
 
-// NOTE: signd (signed) only relevant if check == true
-static jl_cgval_t generic_trunc(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx, bool check, bool signd)
+static Value *generic_fptoui(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "trunc_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateTrunc(ix, to);
-    if (check) {
-        Value *back = signd ? builder.CreateSExt(ans, ix->getType()) :
-            builder.CreateZExt(ans, ix->getType());
-        raise_exception_unless(builder.CreateICmpEQ(back, ix),
-                               literal_pointer_val(jl_inexact_exception), ctx);
-    }
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPToUI(x, to);
 }
 
-static jl_cgval_t generic_sext(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static Value *generic_fptosi(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "sext_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateSExt(ix, to);
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPToSI(x, to);
 }
 
-static jl_cgval_t generic_zext(jl_value_t *targ, jl_value_t *x, jl_codectx_t *ctx)
+static Value *generic_fptrunc(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_value_t *jlto = staticeval_bitstype(targ, "zext_int", ctx);
-    if (!jlto) return jl_cgval_t(); // jlto threw an error
-    Type *to = staticeval_bitstype(jlto);
-    Value *ix = JL_INT(auto_unbox(x, ctx));
-    if (ix->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-    Value *ans = builder.CreateZExt(ix, to);
-    return mark_julia_type(ans, false, jlto, ctx);
+    return builder.CreateFPTrunc(x, to);
 }
 
-static jl_cgval_t emit_runtime_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static Value *generic_fpext(Type *to, Value *x, jl_codectx_t *ctx)
 {
-    jl_cgval_t parg = emit_expr(e, ctx);
-    Value *iarg = boxed(emit_expr(i, ctx), ctx);
-    Value *alignarg = boxed(emit_expr(align, ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-    Value *ret = builder.CreateCall(prepare_call(jlpref_func), { boxed(parg, ctx), iarg, alignarg });
-#else
-    Value *ret = builder.CreateCall3(prepare_call(jlpref_func), boxed(parg, ctx), iarg, alignarg);
+#ifdef JL_NEED_FLOATTEMP_VAR
+    // Target platform might carry extra precision.
+    // Force rounding to single precision first. The reason is that it's
+    // fine to keep working in extended precision as long as it's
+    // understood that everything is implicitly rounded to 23 bits,
+    // but if we start looking at more bits we need to actually do the
+    // rounding first instead of carrying around incorrect low bits.
+    Value *jlfloattemp_var = emit_static_alloca(x->getType());
+    builder.CreateStore(x, jlfloattemp_var);
+    x  = builder.CreateLoad(jlfloattemp_var, true);
 #endif
-    jl_value_t *ety;
-    if (jl_is_cpointer_type(parg.typ)) {
-        ety = jl_tparam0(parg.typ);
-    }
-    else {
-        ety = (jl_value_t*)jl_any_type;
-    }
-    return mark_julia_type(ret, true, ety, ctx);
+    return builder.CreateFPExt(x, to);
 }
 
-static jl_cgval_t emit_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_runtime_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_value_t *aty = expr_type(e, ctx);
+    return emit_runtime_call(pointerref, argv, 3, ctx);
+}
+
+static jl_cgval_t emit_pointerref(jl_cgval_t *argv, jl_codectx_t *ctx)
+{
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &i = argv[1];
+    const jl_cgval_t &align = argv[2];
+
+    if (align.constant == NULL || !jl_is_long(align.constant))
+        return emit_runtime_pointerref(argv, ctx);
+    unsigned align_nb = jl_unbox_long(align.constant);
+
+    if (i.typ != (jl_value_t*)jl_long_type)
+        return emit_runtime_pointerref(argv, ctx);
+    jl_value_t *aty = e.typ;
     if (!jl_is_cpointer_type(aty))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: expected pointer type as first argument");
+        return emit_runtime_pointerref(argv, ctx);
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid pointer");
-    if (expr_type(i, ctx) != (jl_value_t*)jl_long_type)
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid index type");
-    jl_cgval_t align_val = emit_expr(align, ctx);
-    if (align_val.constant == NULL || !jl_is_long(align_val.constant))
-        return emit_runtime_pointerref(e, i, align, ctx);
-        //jl_error("pointerref: invalid or non-statically evaluatable alignment")
-    Value *thePtr = auto_unbox(e,ctx);
-    Value *idx = emit_unbox(T_size, emit_expr(i, ctx), (jl_value_t*)jl_long_type);
+        return emit_runtime_pointerref(argv, ctx);
+    if (!jl_is_datatype(ety))
+        ety = (jl_value_t*)jl_any_type;
+
+    Value *idx = emit_unbox(T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = builder.CreateSub(idx, ConstantInt::get(T_size, 1));
+
     if (!jl_isbits(ety)) {
-        if (ety == (jl_value_t*)jl_any_type)
+        if (ety == (jl_value_t*)jl_any_type) {
+            Value *thePtr = emit_unbox(T_ppjlvalue, e, e.typ);
             return mark_julia_type(
-                    builder.CreateAlignedLoad(builder.CreateGEP(
-                        emit_bitcast(thePtr, T_ppjlvalue),
-                        im1), jl_unbox_long(align_val.constant)),
+                    builder.CreateAlignedLoad(builder.CreateGEP(thePtr, im1), align_nb),
                     true,
                     ety, ctx);
+        }
         if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_leaf_type(ety)) {
             emit_error("pointerref: invalid pointer type", ctx);
             return jl_cgval_t();
@@ -715,78 +646,74 @@ static jl_cgval_t emit_pointerref(jl_value_t *e, jl_value_t *i, jl_value_t *alig
                                      literal_pointer_val((jl_value_t*)ety));
         im1 = builder.CreateMul(im1, ConstantInt::get(T_size,
                     LLT_ALIGN(size, ((jl_datatype_t*)ety)->layout->alignment)));
+        Value *thePtr = emit_unbox(T_pint8, e, e.typ);
         thePtr = builder.CreateGEP(emit_bitcast(thePtr, T_pint8), im1);
-        prepare_call(builder.CreateMemCpy(emit_bitcast(strct, T_pint8),
-                             thePtr, size, 1)->getCalledValue());
+        builder.CreateMemCpy(emit_bitcast(strct, T_pint8), thePtr, size, 1);
         return mark_julia_type(strct, true, ety, ctx);
     }
-    return typed_load(thePtr, im1, ety, ctx, tbaa_data, jl_unbox_long(align_val.constant));
+
+    bool isboxed;
+    Type *ptrty = julia_type_to_llvm(e.typ, &isboxed);
+    assert(!isboxed);
+    Value *thePtr = emit_unbox(ptrty, e, e.typ);
+    return typed_load(thePtr, im1, ety, ctx, tbaa_data, align_nb);
 }
 
-static jl_cgval_t emit_runtime_pointerset(jl_value_t *e, jl_value_t *x, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_runtime_pointerset(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_cgval_t parg = emit_expr(e, ctx);
-    Value *xarg = boxed(emit_expr(x, ctx), ctx);
-    Value *iarg = boxed(emit_expr(i, ctx), ctx);
-    Value *alignarg = boxed(emit_expr(align, ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-    builder.CreateCall(prepare_call(jlpset_func), { boxed(parg, ctx), xarg, iarg, alignarg });
-#else
-    builder.CreateCall4(prepare_call(jlpset_func), boxed(parg, ctx), xarg, iarg, alignarg);
-#endif
-    return parg;
+    return emit_runtime_call(pointerset, argv, 4, ctx);
 }
 
 // e[i] = x
-static jl_cgval_t emit_pointerset(jl_value_t *e, jl_value_t *x, jl_value_t *i, jl_value_t *align, jl_codectx_t *ctx)
+static jl_cgval_t emit_pointerset(jl_cgval_t *argv, jl_codectx_t *ctx)
 {
-    jl_value_t *aty = expr_type(e, ctx);
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &x = argv[1];
+    const jl_cgval_t &i = argv[2];
+    const jl_cgval_t &align = argv[3];
+
+    if (align.constant == NULL || !jl_is_long(align.constant))
+        return emit_runtime_pointerset(argv, ctx);
+    unsigned align_nb = jl_unbox_long(align.constant);
+
+    if (i.typ != (jl_value_t*)jl_long_type)
+        return emit_runtime_pointerset(argv, ctx);
+    jl_value_t *aty = e.typ;
     if (!jl_is_cpointer_type(aty))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: expected pointer type as first argument");
+        return emit_runtime_pointerset(argv, ctx);
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid pointer");
-    jl_value_t *xty = expr_type(x, ctx);
-    jl_cgval_t val;
-    bool emitted = false;
-    if (!jl_subtype(xty, ety)) {
-        emitted = true;
-        val = emit_expr(x, ctx);
-        emit_typecheck(val, ety, "pointerset: type mismatch in assign", ctx);
-    }
-    if (expr_type(i, ctx) != (jl_value_t*)jl_long_type)
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid index type");
-    jl_cgval_t align_val = emit_expr(align, ctx);
-    if (align_val.constant == NULL || !jl_is_long(align_val.constant))
-        return emit_runtime_pointerset(e, x, i, align, ctx);
-        //jl_error("pointerset: invalid or non-statically evaluatable alignment")
-    Value *idx = emit_unbox(T_size, emit_expr(i, ctx),(jl_value_t*)jl_long_type);
+        return emit_runtime_pointerset(argv, ctx);
+    if (align.constant == NULL || !jl_is_long(align.constant))
+        return emit_runtime_pointerset(argv, ctx);
+    if (!jl_is_datatype(ety))
+        ety = (jl_value_t*)jl_any_type;
+    jl_value_t *xty = x.typ;
+    if (!jl_subtype(xty, ety))
+        emit_typecheck(x, ety, "pointerset: type mismatch in assign", ctx);
+
+    Value *idx = emit_unbox(T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = builder.CreateSub(idx, ConstantInt::get(T_size, 1));
-    Value *thePtr = auto_unbox(e,ctx);
+
+    Value *thePtr;
     if (!jl_isbits(ety) && ety != (jl_value_t*)jl_any_type) {
         if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_leaf_type(ety)) {
             emit_error("pointerset: invalid pointer type", ctx);
             return jl_cgval_t();
         }
-        if (!emitted)
-            val = emit_expr(x, ctx);
-        assert(val.isboxed);
-        assert(jl_is_datatype(ety));
+        thePtr = emit_unbox(T_pint8, e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = builder.CreateMul(im1, ConstantInt::get(T_size,
                     LLT_ALIGN(size, ((jl_datatype_t*)ety)->layout->alignment)));
-        prepare_call(builder.CreateMemCpy(builder.CreateGEP(emit_bitcast(thePtr, T_pint8), im1),
-                             data_pointer(val, ctx, T_pint8), size, jl_unbox_long(align_val.constant))->getCalledValue());
+        builder.CreateMemCpy(builder.CreateGEP(thePtr, im1),
+                             data_pointer(x, ctx, T_pint8), size, align_nb);
     }
     else {
-        if (!emitted) {
-            val = emit_expr(x, ctx);
-        }
-        assert(jl_is_datatype(ety));
-        typed_store(thePtr, im1, val, ety, ctx, tbaa_data, NULL, jl_unbox_long(align_val.constant));
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(e.typ, &isboxed);
+        assert(!isboxed);
+        thePtr = emit_unbox(ptrty, e, e.typ);
+        typed_store(thePtr, im1, x, ety, ctx, tbaa_data, NULL, align_nb);
     }
     return mark_julia_type(thePtr, false, aty, ctx);
 }
@@ -843,8 +770,9 @@ struct math_builder {
     }
 };
 
-static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z, size_t nargs,
-                                     jl_codectx_t *ctx, jl_datatype_t **newtyp, jl_value_t* xtyp);
+static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t nargs,
+                                     jl_codectx_t *ctx, jl_datatype_t **newtyp, jl_value_t *xtyp);
+
 static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
                                  jl_codectx_t *ctx)
 {
@@ -860,166 +788,99 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         jl_errorf("intrinsic #%d %s: wrong number of arguments", f, JL_I::jl_intrinsic_name((int)f));
     }
 
+    if (f == llvmcall)
+        return emit_llvmcall(args, nargs, ctx);
+    if (f == cglobal_auto || f == cglobal)
+        return emit_cglobal(args, nargs, ctx);
+
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argv[i] = emit_expr(args[i + 1], ctx);
+    }
+
+    // this forces everything to use runtime-intrinsics (e.g. for testing)
+    // return emit_runtime_call(f, argv, nargs, ctx);
+
     switch (f) {
-    case ccall: return emit_ccall(args, nargs, ctx);
-    case cglobal_auto:
-    case cglobal: return emit_cglobal(args, nargs, ctx);
-    case llvmcall: return emit_llvmcall(args, nargs, ctx);
     case arraylen:
-        return mark_julia_type(emit_arraylen(emit_expr(args[1], ctx), args[1], ctx), false,
-                               jl_long_type, ctx);
-#if 0 // this section enables runtime-intrinsics (e.g. for testing), and disables their llvm counterparts
-    default:
-        Value *r;
-        Value *func = prepare_call(runtime_func[f]);
-        if (nargs == 1) {
-            Value *x = boxed(emit_expr(args[1], ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-            r = builder.CreateCall(func, {x});
-#else
-            r = builder.CreateCall(func, x);
-#endif
-        }
-        else if (nargs == 2) {
-            Value *x = boxed(emit_expr(args[1], ctx), ctx);
-            Value *y = boxed(emit_expr(args[2], ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-            r = builder.CreateCall(func, {x, y});
-#else
-            r = builder.CreateCall2(func, x, y);
-#endif
-        }
-        else if (nargs == 3) {
-            Value *x = boxed(emit_expr(args[1], ctx), ctx);
-            Value *y = boxed(emit_expr(args[2], ctx), ctx);
-            Value *z = boxed(emit_expr(args[3], ctx), ctx);
-#if JL_LLVM_VERSION >= 30700
-            r = builder.CreateCall(func, {x, y, z});
-#else
-            r = builder.CreateCall3(func, x, y, z);
-#endif
-        }
-        else {
-            assert(0);
-        }
-        return mark_julia_type(r, true, (jl_value_t*)jl_any_type, ctx);
-#else
+        return mark_julia_type(emit_arraylen(argv[0], args[1], ctx), false, jl_long_type, ctx);
     case pointerref:
-        return emit_pointerref(args[1], args[2], args[3], ctx);
+        return emit_pointerref(argv, ctx);
     case pointerset:
-        return emit_pointerset(args[1], args[2], args[3], args[4], ctx);
-    case box:
-        return generic_box(args[1], args[2], ctx);
-    case unbox:
-        return generic_unbox(args[1], args[2], ctx); // TODO: replace with generic_box
+        return emit_pointerset(argv, ctx);
+    case bitcast:
+        return generic_bitcast(argv, ctx);
     case trunc_int:
-        return generic_trunc(args[1], args[2], ctx, false, false);
-    case checked_trunc_sint:
-        return generic_trunc(args[1], args[2], ctx, true, true);
+        return generic_cast(f, generic_trunc, argv, ctx, true, true);
     case checked_trunc_uint:
-        return generic_trunc(args[1], args[2], ctx, true, false);
+        return generic_cast(f, generic_trunc_uchecked, argv, ctx, true, true);
+    case checked_trunc_sint:
+        return generic_cast(f, generic_trunc_schecked, argv, ctx, true, true);
     case sext_int:
-        return generic_sext(args[1], args[2], ctx);
+        return generic_cast(f, generic_sext, argv, ctx, true, true);
     case zext_int:
-        return generic_zext(args[1], args[2], ctx);
-
-    case uitofp: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "uitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xi = JL_INT(auto_unbox(args[2],ctx));
-        if (xi->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateUIToFP(xi, FTnbits(nb)), false, bt, ctx);
-    }
-
-    case sitofp: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xi = JL_INT(auto_unbox(args[2],ctx));
-        if (xi->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateSIToFP(xi, FTnbits(nb)), false, bt, ctx);
-    }
+        return generic_cast(f, generic_zext, argv, ctx, true, true);
+    case uitofp:
+        return generic_cast(f, generic_uitofp, argv, ctx, false, true);
+    case sitofp:
+        return generic_cast(f, generic_sitofp, argv, ctx, false, true);
+    case fptoui:
+        return generic_cast(f, generic_fptoui, argv, ctx, true, false);
+    case fptosi:
+        return generic_cast(f, generic_fptosi, argv, ctx, true, false);
+    case fptrunc:
+        return generic_cast(f, generic_fptrunc, argv, ctx, false, false);
+    case fpext:
+        return generic_cast(f, generic_fpext, argv, ctx, false, false);
 
     case fptoui_auto: {
-        Value *x = FP(auto_unbox(args[1], ctx));
-        if (x->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(
-                builder.CreateFPToUI(FP(x), JL_INTT(x->getType())),
-                false,
-                JL_JLUINTT(x->getType()), ctx);
-    }
-    case fptoui: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPToUI(xf, Type::getIntNTy(jl_LLVMContext, nb)), false, bt, ctx);
+        // deprecated
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = bitstype_to_llvm(x.typ);
+        Type *to = INTT(xt);
+        xt = FLOATT(xt);
+        if (!xt)
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans = builder.CreateFPToUI(from, to);
+        return mark_julia_type(ans, false, JL_JLUINTT(xt), ctx);
     }
 
     case fptosi_auto: {
-        Value *x = FP(auto_unbox(args[1], ctx));
-        return mark_julia_type(
-                builder.CreateFPToSI(FP(x), JL_INTT(x->getType())),
-                false,
-                JL_JLSINTT(x->getType()), ctx);
-    }
-    case fptosi: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPToSI(xf, Type::getIntNTy(jl_LLVMContext, nb)), false, bt, ctx);
-    }
-
-    case fptrunc: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *xf = FP(auto_unbox(args[2],ctx));
-        if (xf->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-        return mark_julia_type(builder.CreateFPTrunc(xf, FTnbits(nb)), false, bt, ctx);
-    }
-
-    case fpext: {
-        jl_value_t *bt = staticeval_bitstype(args[1], "sitofp", ctx);
-        if (!bt) return jl_cgval_t();
-        int nb = get_bitstype_nbits(bt);
-        Value *x = auto_unbox(args[2],ctx);
-        if (x->getType() == T_void) return jl_cgval_t(); // auto_unbox threw an error
-#ifdef JL_NEED_FLOATTEMP_VAR
-        // Target platform might carry extra precision.
-        // Force rounding to single precision first. The reason is that it's
-        // fine to keep working in extended precision as long as it's
-        // understood that everything is implicitly rounded to 23 bits,
-        // but if we start looking at more bits we need to actually do the
-        // rounding first instead of carrying around incorrect low bits.
-        Value *jlfloattemp_var = emit_static_alloca(FT(x->getType()));
-        builder.CreateStore(FP(x), jlfloattemp_var);
-        x  = builder.CreateLoad(jlfloattemp_var, true);
-#endif
-        return mark_julia_type(builder.CreateFPExt(x, FTnbits(nb)), false, bt, ctx);
+        // deprecated
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = bitstype_to_llvm(x.typ);
+        Type *to = INTT(xt);
+        xt = FLOATT(xt);
+        if (!xt)
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans = builder.CreateFPToSI(from, to);
+        return mark_julia_type(ans, false, JL_JLSINTT(xt), ctx);
     }
 
     case select_value: {
-        Value *isfalse = emit_condition(args[1], "select_value", ctx); // emit the first argument
-        jl_value_t *t1 = expr_type(args[2], ctx);
-        jl_value_t *t2 = expr_type(args[3], ctx);
-        bool isboxed;
-        Type *llt1 = julia_type_to_llvm(t1, &isboxed);
-        Value *ifelse_result;
+        Value *isfalse = emit_condition(argv[0], "select_value", ctx); // emit the first argument
         // emit X and Y arguments
-        jl_cgval_t x = emit_expr(args[2], ctx);
-        jl_cgval_t y = emit_expr(args[3], ctx);
+        const jl_cgval_t &x = argv[1];
+        const jl_cgval_t &y = argv[2];
+        jl_value_t *t1 = x.typ;
+        jl_value_t *t2 = y.typ;
         // check the return value was valid
-        if (x.typ == jl_bottom_type && y.typ == jl_bottom_type)
+        if (t1 == jl_bottom_type && t2 == jl_bottom_type)
             return jl_cgval_t(); // undefined
-        if (x.typ == jl_bottom_type)
+        if (t1 == jl_bottom_type)
             return y;
-        if (y.typ == jl_bottom_type)
+        if (t2 == jl_bottom_type)
             return x;
+
+        Value *ifelse_result;
+        bool isboxed;
+        Type *llt1 = julia_type_to_llvm(t1, &isboxed);
         if (t1 != t2)
             isboxed = true;
         if (!isboxed) {
@@ -1040,129 +901,165 @@ static jl_cgval_t emit_intrinsic(intrinsic f, jl_value_t **args, size_t nargs,
         return mark_julia_type(ifelse_result, isboxed, jt, ctx);
     }
 
+    case not_int: {
+        const jl_cgval_t &x = argv[0];
+        if (!jl_is_bitstype(x.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = INTT(bitstype_to_llvm(x.typ));
+        Value *from = emit_unbox(xt, x, x.typ);
+        Value *ans;
+        if (x.typ == (jl_value_t*)jl_bool_type)
+            ans = builder.CreateXor(from, ConstantInt::get(T_int8, 1, true));
+        else
+            ans = builder.CreateXor(from, ConstantInt::get(xt, -1, true));
+        return mark_julia_type(ans, false, x.typ, ctx);
+    }
+
+    case powi_llvm: {
+        const jl_cgval_t &x = argv[0];
+        const jl_cgval_t &y = argv[1];
+        if (!jl_is_bitstype(x.typ) || !jl_is_bitstype(y.typ) || jl_datatype_size(y.typ) != 4)
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xt = FLOATT(bitstype_to_llvm(x.typ));
+        Type *yt = T_int32;
+        if (!xt)
+            return emit_runtime_call(f, argv, nargs, ctx);
+
+        Value *xv = emit_unbox(xt, x, x.typ);
+        Value *yv = emit_unbox(yt, y, y.typ);
+#if JL_LLVM_VERSION >= 30600
+        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi, makeArrayRef(xt));
+#if JL_LLVM_VERSION >= 30700
+        Value *ans = builder.CreateCall(powi, {xv, yv});
+#else
+        Value *ans = builder.CreateCall2(powi, xv, yv);
+#endif
+#else
+        // issue #6506
+        Value *ans = builder.CreateCall2(prepare_call(xt == T_float64 ? jlpow_func : jlpowf_func),
+                xv, builder.CreateSIToFP(yv, xt));
+#endif
+        return mark_julia_type(ans, false, x.typ, ctx);
+    }
+
     default: {
-        if (nargs < 1) jl_error("invalid intrinsic call");
-        jl_cgval_t xinfo = emit_expr(args[1], ctx);
-        Value *x = auto_unbox(xinfo, ctx);
-        if (!x || type_is_ghost(x->getType())) {
-            emit_error("invalid intrinsic argument at 1", ctx);
-            return jl_cgval_t();
+        assert(nargs >= 1 && "invalid nargs for intrinsic call");
+        const jl_cgval_t &xinfo = argv[0];
+
+        // verify argument types
+        if (!jl_is_bitstype(xinfo.typ))
+            return emit_runtime_call(f, argv, nargs, ctx);
+        Type *xtyp = bitstype_to_llvm(xinfo.typ);
+        if (float_func[f])
+            xtyp = FLOATT(xtyp);
+        else
+            xtyp = INTT(xtyp);
+        if (!xtyp)
+            return emit_runtime_call(f, argv, nargs, ctx);
+
+        Type **argt = (Type**)alloca(sizeof(Type*) * nargs);
+        argt[0] = xtyp;
+
+        if (f == shl_int || f == lshr_int || f == ashr_int) {
+            if (!jl_is_bitstype(argv[1].typ))
+                return emit_runtime_call(f, argv, nargs, ctx);
+            argt[1] = INTT(bitstype_to_llvm(argv[1].typ));
         }
-        Value *y = NULL;
-        if (nargs>1) {
-            y = auto_unbox(args[2], ctx);
-            if (!y || type_is_ghost(y->getType())) {
-                emit_error("invalid intrinsic argument at 2", ctx);
-                return jl_cgval_t();
+        else {
+            for (size_t i = 1; i < nargs; ++i) {
+                if (xinfo.typ != argv[i].typ)
+                    return emit_runtime_call(f, argv, nargs, ctx);
+                argt[i] = xtyp;
             }
         }
-        Value *z = NULL;
-        if (nargs>2) {
-            z = auto_unbox(args[3], ctx);
-            if (!z || type_is_ghost(z->getType())) {
-                emit_error("invalid intrinsic argument at 3", ctx);
-                return jl_cgval_t();
-            }
+
+        // unbox the arguments
+        Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
+        for (size_t i = 0; i < nargs; ++i) {
+            argvalues[i] = emit_unbox(argt[i], argv[i], argv[i].typ);
         }
-        jl_value_t *newtyp = NULL;
-        // TODO: compare the type validity of x,y,z before emitting the intrinsic
-        Value *r;
-        if (f == not_int && xinfo.typ == (jl_value_t*)jl_bool_type)
-            r = builder.CreateXor(x, ConstantInt::get(T_int8, 1, true));
-        else
-            r = emit_untyped_intrinsic(f, x, y, z, nargs, ctx, (jl_datatype_t**)&newtyp, xinfo.typ);
 
-        if (!newtyp && r->getType() != x->getType())
-            // cast back to the exact original type (e.g. float vs. int) before remarking as a julia type
-            r = emit_bitcast(r, x->getType());
+        // call the intrinsic
+        jl_value_t *newtyp = NULL;
+        Value *r = emit_untyped_intrinsic(f, argvalues, nargs, ctx, (jl_datatype_t**)&newtyp, xinfo.typ);
         if (r->getType() == T_int1)
             r = builder.CreateZExt(r, T_int8);
         return mark_julia_type(r, false, newtyp ? newtyp : xinfo.typ, ctx);
     }
-#endif
     }
-    assert(0);
+    assert(0 && "unreachable");
 }
 
-static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z, size_t nargs,
-                                     jl_codectx_t *ctx, jl_datatype_t **newtyp, jl_value_t* xtyp)
+static Value *emit_untyped_intrinsic(intrinsic f, Value **argvalues, size_t nargs,
+                                     jl_codectx_t *ctx, jl_datatype_t **newtyp, jl_value_t *xtyp)
 {
+    Value *x = nargs > 0 ? argvalues[0] : NULL;
+    Value *y = nargs > 1 ? argvalues[1] : NULL;
+    Value *z = nargs > 2 ? argvalues[2] : NULL;
     Type *t = x->getType();
-    Value *fy;
-    Value *den;
-    Value *typemin;
+
     switch (f) {
     case neg_int:
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateNeg(JL_INT(x));
+        return builder.CreateNeg(x);
 #else
-        return builder.CreateSub(ConstantInt::get(t, 0), JL_INT(x));
+        return builder.CreateSub(ConstantInt::get(t, 0), x);
 #endif
-    case add_int: return builder.CreateAdd(JL_INT(x), JL_INT(y));
-    case sub_int: return builder.CreateSub(JL_INT(x), JL_INT(y));
-    case mul_int: return builder.CreateMul(JL_INT(x), JL_INT(y));
-    case sdiv_int: return builder.CreateSDiv(JL_INT(x), JL_INT(y));
-    case udiv_int: return builder.CreateUDiv(JL_INT(x), JL_INT(y));
-    case srem_int: return builder.CreateSRem(JL_INT(x), JL_INT(y));
-    case urem_int: return builder.CreateURem(JL_INT(x), JL_INT(y));
+    case add_int: return builder.CreateAdd(x, y);
+    case sub_int: return builder.CreateSub(x, y);
+    case mul_int: return builder.CreateMul(x, y);
+    case sdiv_int: return builder.CreateSDiv(x, y);
+    case udiv_int: return builder.CreateUDiv(x, y);
+    case srem_int: return builder.CreateSRem(x, y);
+    case urem_int: return builder.CreateURem(x, y);
 
 // Implements IEEE negate. Unfortunately there is no compliant way
 // to implement this in LLVM 3.4, though there are two different idioms
 // that do the correct thing on LLVM <= 3.3 and >= 3.5 respectively.
 // See issue #7868
 #if JL_LLVM_VERSION >= 30500
-    case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(FT(t), -0.0), FP(x));
-    case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(FP(x));
+    case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(t, -0.0), x);
+    case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
 #else
     case neg_float:
-        return math_builder(ctx)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x));
+        return math_builder(ctx)().CreateFMul(ConstantFP::get(t, -1.0), x);
     case neg_float_fast:
-        return math_builder(ctx, true)().CreateFMul(ConstantFP::get(FT(t), -1.0), FP(x));
+        return math_builder(ctx, true)().CreateFMul(ConstantFP::get(t, -1.0), x);
 #endif
-    case add_float: return math_builder(ctx)().CreateFAdd(FP(x), FP(y));
-    case sub_float: return math_builder(ctx)().CreateFSub(FP(x), FP(y));
-    case mul_float: return math_builder(ctx)().CreateFMul(FP(x), FP(y));
-    case div_float: return math_builder(ctx)().CreateFDiv(FP(x), FP(y));
-    case rem_float: return math_builder(ctx)().CreateFRem(FP(x), FP(y));
-    case add_float_fast: return math_builder(ctx, true)().CreateFAdd(FP(x), FP(y));
-    case sub_float_fast: return math_builder(ctx, true)().CreateFSub(FP(x), FP(y));
-    case mul_float_fast: return math_builder(ctx, true)().CreateFMul(FP(x), FP(y));
-    case div_float_fast: return math_builder(ctx, true)().CreateFDiv(FP(x), FP(y));
-    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(FP(x), FP(y));
+    case add_float: return math_builder(ctx)().CreateFAdd(x, y);
+    case sub_float: return math_builder(ctx)().CreateFSub(x, y);
+    case mul_float: return math_builder(ctx)().CreateFMul(x, y);
+    case div_float: return math_builder(ctx)().CreateFDiv(x, y);
+    case rem_float: return math_builder(ctx)().CreateFRem(x, y);
+    case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
+    case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
+    case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);
+    case div_float_fast: return math_builder(ctx, true)().CreateFDiv(x, y);
+    case rem_float_fast: return math_builder(ctx, true)().CreateFRem(x, y);
     case fma_float: {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
-        Value *fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma,
-                                   ArrayRef<Type*>(x->getType()));
+        Value *fmaintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fma, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(fmaintr,{ FP(x), FP(y), FP(z) });
+        return builder.CreateCall(fmaintr, {x, y, z});
 #else
-        return builder.CreateCall3(fmaintr, FP(x), FP(y), FP(z));
+        return builder.CreateCall3(fmaintr, x, y, z);
 #endif
     }
-    case muladd_float:
+    case muladd_float: {
 #if JL_LLVM_VERSION >= 30400
-    {
         assert(y->getType() == x->getType());
         assert(z->getType() == y->getType());
+        Value *muladdintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fmuladd, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall
+        return builder.CreateCall(muladdintr, {x, y, z});
 #else
-        return builder.CreateCall3
+        return builder.CreateCall3(muladdintr, x, y, z);
 #endif
-            (Intrinsic::getDeclaration(jl_Module, Intrinsic::fmuladd,
-                                       ArrayRef<Type*>(x->getType())),
-#if JL_LLVM_VERSION >= 30700
-             {FP(x), FP(y), FP(z)}
 #else
-             FP(x), FP(y), FP(z)
+        return math_builder(ctx, true)().CreateFAdd(builder.CreateFMul(x, y), z);
 #endif
-            );
     }
-#else
-        return math_builder(ctx, true)().
-            CreateFAdd(builder.CreateFMul(FP(x), FP(y)), FP(z));
-#endif
 
     case checked_sadd_int:
     case checked_uadd_int:
@@ -1170,26 +1067,24 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
     case checked_usub_int:
     case checked_smul_int:
     case checked_umul_int: {
-        Value *ix = JL_INT(x); Value *iy = JL_INT(y);
-        assert(ix->getType() == iy->getType());
-        Value *intr =
-            Intrinsic::getDeclaration(jl_Module,
-                f==checked_sadd_int ?
-                Intrinsic::sadd_with_overflow :
-                (f==checked_uadd_int ?
-                 Intrinsic::uadd_with_overflow :
-                 (f==checked_ssub_int ?
-                  Intrinsic::ssub_with_overflow :
-                  (f==checked_usub_int ?
-                   Intrinsic::usub_with_overflow :
-                   (f==checked_smul_int ?
-                    Intrinsic::smul_with_overflow :
-                    Intrinsic::umul_with_overflow)))),
-                ArrayRef<Type*>(ix->getType()));
+        assert(x->getType() == y->getType());
+        Intrinsic::ID intr_id =
+            (f == checked_sadd_int ?
+             Intrinsic::sadd_with_overflow :
+             (f == checked_uadd_int ?
+              Intrinsic::uadd_with_overflow :
+              (f == checked_ssub_int ?
+               Intrinsic::ssub_with_overflow :
+               (f == checked_usub_int ?
+                Intrinsic::usub_with_overflow :
+                (f == checked_smul_int ?
+                 Intrinsic::smul_with_overflow :
+                 Intrinsic::umul_with_overflow)))));
+        Value *intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t));
 #if JL_LLVM_VERSION >= 30700
-        Value *res = builder.CreateCall(intr,{ix, iy});
+        Value *res = builder.CreateCall(intr, {x, y});
 #else
-        Value *res = builder.CreateCall2(intr, ix, iy);
+        Value *res = builder.CreateCall2(intr, x, y);
 #endif
         Value *val = builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
         Value *obit = builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
@@ -1198,7 +1093,7 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
         jl_value_t *params[2];
         params[0] = xtyp;
         params[1] = (jl_value_t*)jl_bool_type;
-        jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params,2);
+        jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
         *newtyp = tuptyp;
 
         Value *tupval;
@@ -1208,285 +1103,205 @@ static Value *emit_untyped_intrinsic(intrinsic f, Value *x, Value *y, Value *z,
         return tupval;
     }
 
-    case checked_sdiv_int:
-        den = JL_INT(y);
-        t = den->getType();
-        x = JL_INT(x);
-
-        typemin = builder.CreateShl(ConstantInt::get(t,1),
-                                    x->getType()->getPrimitiveSizeInBits()-1);
-        raise_exception_unless(builder.
-                               CreateAnd(builder.
-                                         CreateICmpNE(den, ConstantInt::get(t,0)),
-                                         builder.
-                                         CreateOr(builder.
-                                                  CreateICmpNE(den,
-                                                               ConstantInt::get(t,-1,true)),
-                                                  builder.CreateICmpNE(x, typemin))),
-                               literal_pointer_val(jl_diverror_exception), ctx);
+    case checked_sdiv_int: {
+        Value *typemin = builder.CreateShl(ConstantInt::get(t, 1), t->getPrimitiveSizeInBits() - 1);
+        raise_exception_unless(
+                builder.CreateAnd(
+                    builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
+                    builder.CreateOr(
+                        builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)),
+                        builder.CreateICmpNE(x, typemin))),
+                literal_pointer_val(jl_diverror_exception), ctx);
 
-        return builder.CreateSDiv(x, den);
+        return builder.CreateSDiv(x, y);
+    }
     case checked_udiv_int:
-        den = JL_INT(y);
-        t = den->getType();
-        raise_exception_unless(builder.CreateICmpNE(den, ConstantInt::get(t,0)),
+        raise_exception_unless(builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
                                literal_pointer_val(jl_diverror_exception), ctx);
-        return builder.CreateUDiv(JL_INT(x), den);
+        return builder.CreateUDiv(x, y);
 
     case checked_srem_int:
-        return emit_checked_srem_int(JL_INT(x), JL_INT(y), ctx);
+        return emit_checked_srem_int(x, y, ctx);
 
     case checked_urem_int:
-        den = JL_INT(y);
-        t = den->getType();
-        raise_exception_unless(builder.CreateICmpNE(den, ConstantInt::get(t,0)),
+        raise_exception_unless(builder.CreateICmpNE(y, ConstantInt::get(t, 0)),
                                literal_pointer_val(jl_diverror_exception), ctx);
-        return builder.CreateURem(JL_INT(x), den);
+        return builder.CreateURem(x, y);
 
     case check_top_bit:
         // raise InexactError if argument's top bit is set
-        x = JL_INT(x);
-        raise_exception_if(builder.
-                           CreateTrunc(builder.
-                                       CreateLShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits()-1)),
-                                       T_int1),
-                           literal_pointer_val(jl_inexact_exception), ctx);
+        raise_exception_if(
+                builder.CreateTrunc(
+                    builder.CreateLShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
+                    T_int1),
+                literal_pointer_val(jl_inexact_exception), ctx);
         return x;
 
-    case eq_int:  *newtyp = jl_bool_type; return builder.CreateICmpEQ(JL_INT(x), JL_INT(y));
-    case ne_int:  *newtyp = jl_bool_type; return builder.CreateICmpNE(JL_INT(x), JL_INT(y));
-    case slt_int: *newtyp = jl_bool_type; return builder.CreateICmpSLT(JL_INT(x), JL_INT(y));
-    case ult_int: *newtyp = jl_bool_type; return builder.CreateICmpULT(JL_INT(x), JL_INT(y));
-    case sle_int: *newtyp = jl_bool_type; return builder.CreateICmpSLE(JL_INT(x), JL_INT(y));
-    case ule_int: *newtyp = jl_bool_type; return builder.CreateICmpULE(JL_INT(x), JL_INT(y));
+    case eq_int:  *newtyp = jl_bool_type; return builder.CreateICmpEQ(x, y);
+    case ne_int:  *newtyp = jl_bool_type; return builder.CreateICmpNE(x, y);
+    case slt_int: *newtyp = jl_bool_type; return builder.CreateICmpSLT(x, y);
+    case ult_int: *newtyp = jl_bool_type; return builder.CreateICmpULT(x, y);
+    case sle_int: *newtyp = jl_bool_type; return builder.CreateICmpSLE(x, y);
+    case ule_int: *newtyp = jl_bool_type; return builder.CreateICmpULE(x, y);
 
-    case eq_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOEQ(FP(x), FP(y));
-    case ne_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpUNE(FP(x), FP(y));
-    case lt_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLT(FP(x), FP(y));
-    case le_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLE(FP(x), FP(y));
+    case eq_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOEQ(x, y);
+    case ne_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpUNE(x, y);
+    case lt_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLT(x, y);
+    case le_float: *newtyp = jl_bool_type; return math_builder(ctx)().CreateFCmpOLE(x, y);
 
-    case eq_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOEQ(FP(x), FP(y));
-    case ne_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpUNE(FP(x), FP(y));
-    case lt_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLT(FP(x), FP(y));
-    case le_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLE(FP(x), FP(y));
+    case eq_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOEQ(x, y);
+    case ne_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpUNE(x, y);
+    case lt_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLT(x, y);
+    case le_float_fast: *newtyp = jl_bool_type; return math_builder(ctx, true)().CreateFCmpOLE(x, y);
 
     case fpiseq: {
         *newtyp = jl_bool_type;
-        Value *xi = JL_INT(x);
-        Value *yi = JL_INT(y);
-        x = FP(x);
-        fy = FP(y);
+        Type *it = INTT(t);
+        Value *xi = builder.CreateBitCast(x, it);
+        Value *yi = builder.CreateBitCast(y, it);
         return builder.CreateOr(builder.CreateAnd(builder.CreateFCmpUNO(x, x),
-                                                  builder.CreateFCmpUNO(fy, fy)),
+                                                  builder.CreateFCmpUNO(y, y)),
                                 builder.CreateICmpEQ(xi, yi));
     }
 
     case fpislt: {
         *newtyp = jl_bool_type;
-        Value *xi = JL_INT(x);
-        Value *yi = JL_INT(y);
-        x = FP(x);
-        fy = FP(y);
+        Type *it = INTT(t);
+        Value *xi = builder.CreateBitCast(x, it);
+        Value *yi = builder.CreateBitCast(y, it);
         return builder.CreateOr(
             builder.CreateAnd(
                 builder.CreateFCmpORD(x, x),
-                builder.CreateFCmpUNO(fy, fy)
-            ),
+                builder.CreateFCmpUNO(y, y)),
             builder.CreateAnd(
-                builder.CreateFCmpORD(x, fy),
+                builder.CreateFCmpORD(x, y),
                 builder.CreateOr(
                     builder.CreateAnd(
-                        builder.CreateICmpSGE(xi, ConstantInt::get(xi->getType(), 0)),
-                        builder.CreateICmpSLT(xi, yi)
-                    ),
+                        builder.CreateICmpSGE(xi, ConstantInt::get(it, 0)),
+                        builder.CreateICmpSLT(xi, yi)),
                     builder.CreateAnd(
-                        builder.CreateICmpSLT(xi, ConstantInt::get(xi->getType(), 0)),
-                        builder.CreateICmpUGT(xi, yi)
-                    )
-                )
-            )
-        );
+                        builder.CreateICmpSLT(xi, ConstantInt::get(it, 0)),
+                        builder.CreateICmpUGT(xi, yi)))));
     }
 
-    case and_int: return builder.CreateAnd(JL_INT(x), JL_INT(y));
-    case or_int:  return builder.CreateOr(JL_INT(x), JL_INT(y));
-    case xor_int: return builder.CreateXor(JL_INT(x), JL_INT(y));
-    case not_int: return builder.CreateXor(JL_INT(x), ConstantInt::get(t, -1, true));
+    case and_int: return builder.CreateAnd(x, y);
+    case or_int:  return builder.CreateOr(x, y);
+    case xor_int: return builder.CreateXor(x, y);
+
     case shl_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         ConstantInt::get(x->getType(),0),
-                         builder.CreateShl(x, uint_cnvt(t,y)));
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                ConstantInt::get(t, 0),
+                builder.CreateShl(x, uint_cnvt(t, y)));
     case lshr_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         ConstantInt::get(x->getType(),0),
-                         builder.CreateLShr(x, uint_cnvt(t,y)));
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                ConstantInt::get(t, 0),
+                builder.CreateLShr(x, uint_cnvt(t, y)));
     case ashr_int:
-        x = JL_INT(x); y = JL_INT(y);
-        return builder.
-            CreateSelect(builder.
-                         CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                           x->getType()->getPrimitiveSizeInBits())),
-                         builder.CreateAShr(x, ConstantInt::get(x->getType(),
-                                                                x->getType()->getPrimitiveSizeInBits()-1)),
-                         builder.CreateAShr(x, uint_cnvt(t,y)));
-    case bswap_int:
-        x = JL_INT(x);
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap,
-                                      ArrayRef<Type*>(x->getType())), x);
-    case ctpop_int:
-        x = JL_INT(x);
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop,
-                                      ArrayRef<Type*>(x->getType())), x);
+        return builder.CreateSelect(
+                builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                          t->getPrimitiveSizeInBits())),
+                builder.CreateAShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
+                builder.CreateAShr(x, uint_cnvt(t, y)));
+
+    case bswap_int: {
+        Value *bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
+        return builder.CreateCall(bswapintr, x);
+    }
+    case ctpop_int: {
+        Value *ctpopintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctpop, makeArrayRef(t));
+        return builder.CreateCall(ctpopintr, x);
+    }
     case ctlz_int: {
-        x = JL_INT(x);
-        Type *types[1] = {x->getType()};
-        Value *ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz,
-                                      ArrayRef<Type*>(types));
+        Value *ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
+        y = ConstantInt::get(T_int1, 0);
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(ctlz, {x, ConstantInt::get(T_int1,0)});
+        return builder.CreateCall(ctlz, {x, y});
 #else
-        return builder.CreateCall2(ctlz, x, ConstantInt::get(T_int1,0));
+        return builder.CreateCall2(ctlz, x, y);
 #endif
     }
     case cttz_int: {
-        x = JL_INT(x);
-        Type *types[1] = {x->getType()};
-        Value *cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, ArrayRef<Type*>(types));
+        Value *cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
+        y = ConstantInt::get(T_int1, 0);
 #if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(cttz, {x, ConstantInt::get(T_int1, 0)});
+        return builder.CreateCall(cttz, {x, y});
 #else
-        return builder.CreateCall2(cttz, x, ConstantInt::get(T_int1, 0));
+        return builder.CreateCall2(cttz, x, y);
 #endif
     }
 
-    case abs_float:
-    {
-        x = FP(x);
+    case abs_float: {
 #if JL_LLVM_VERSION >= 30400
-        return builder.CreateCall(
-            Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *absintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::fabs, makeArrayRef(t));
+        return builder.CreateCall(absintr, x);
 #else
-        Type *intt = JL_INTT(x->getType());
-        Value *bits = emit_bitcast(FP(x), intt);
+        Type *intt = INTT(t);
+        Value *bits = builder.CreateBitCast(x, intt);
         Value *absbits =
             builder.CreateAnd(bits,
-                              ConstantInt::get(intt, APInt::getSignedMaxValue(((IntegerType*)intt)->getBitWidth())));
-        return emit_bitcast(absbits, x->getType());
+                              ConstantInt::get(intt, APInt::getSignedMaxValue(cast<IntegerType>(intt)->getBitWidth())));
+        return builder.CreateBitCast(absbits, t);
 #endif
     }
-    case copysign_float:
-    {
-        x = FP(x);
-        fy = FP(y);
-        Type *intt = JL_INTT(x->getType());
-        Value *bits = emit_bitcast(x, intt);
-        Value *sbits = emit_bitcast(fy, intt);
-        unsigned nb = ((IntegerType*)intt)->getBitWidth();
+    case copysign_float: {
+        Value *bits = builder.CreateBitCast(x, t);
+        Value *sbits = builder.CreateBitCast(y, t);
+        unsigned nb = cast<IntegerType>(t)->getBitWidth();
         APInt notsignbit = APInt::getSignedMaxValue(nb);
-        APInt signbit0(nb, 0); signbit0.setBit(nb-1);
-        Value *rbits =
-            builder.CreateOr(builder.CreateAnd(bits,
-                                               ConstantInt::get(intt,
-                                                                notsignbit)),
-                             builder.CreateAnd(sbits,
-                                               ConstantInt::get(intt,
-                                                                signbit0)));
-        return emit_bitcast(rbits, x->getType());
+        APInt signbit0(nb, 0); signbit0.setBit(nb - 1);
+        return builder.CreateOr(
+                    builder.CreateAnd(bits, ConstantInt::get(t, notsignbit)),
+                    builder.CreateAnd(sbits, ConstantInt::get(t, signbit0)));
     }
-    case flipsign_int:
-    {
-        x = JL_INT(x);
-        fy = JL_INT(y);
-        Type *intt = x->getType();
+    case flipsign_int: {
         ConstantInt *cx = dyn_cast<ConstantInt>(x);
-        ConstantInt *cy = dyn_cast<ConstantInt>(fy);
+        ConstantInt *cy = dyn_cast<ConstantInt>(y);
         if (cx && cy) {
             APInt ix = cx->getValue();
             APInt iy = cy->getValue();
-            return ConstantInt::get(intt, iy.isNonNegative() ? ix : -ix);
+            return ConstantInt::get(t, iy.isNonNegative() ? ix : -ix);
         }
         if (cy) {
             APInt iy = cy->getValue();
-            return iy.isNonNegative() ? x : builder.CreateSub(ConstantInt::get(intt,0), x);
+            return iy.isNonNegative() ? x : builder.CreateSub(ConstantInt::get(t, 0), x);
         }
-        Value *tmp = builder.CreateAShr(fy, ConstantInt::get(intt,((IntegerType*)intt)->getBitWidth()-1));
-        return builder.CreateXor(builder.CreateAdd(x,tmp),tmp);
+        Value *tmp = builder.CreateAShr(y, ConstantInt::get(t, cast<IntegerType>(t)->getBitWidth() - 1));
+        return builder.CreateXor(builder.CreateAdd(x, tmp), tmp);
     }
     case ceil_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *ceilintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::ceil, makeArrayRef(t));
+        return builder.CreateCall(ceilintr, x);
     }
     case floor_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::floor,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *floorintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::floor, makeArrayRef(t));
+        return builder.CreateCall(floorintr, x);
     }
     case trunc_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *truncintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::trunc, makeArrayRef(t));
+        return builder.CreateCall(truncintr, x);
     }
     case rint_llvm: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::rint,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *rintintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::rint, makeArrayRef(t));
+        return builder.CreateCall(rintintr, x);
     }
-    case sqrt_llvm: {
-        x = FP(x);
-        raise_exception_unless(builder.CreateFCmpUGE(x, ConstantFP::get(x->getType(),0.0)),
+    case sqrt_llvm:
+        raise_exception_unless(builder.CreateFCmpUGE(x, ConstantFP::get(t, 0.0)),
                                literal_pointer_val(jl_domain_exception), ctx);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
-    }
-    case powi_llvm: {
-        x = FP(x);
-        y = JL_INT(y);
-        Type *tx = x->getType(); // TODO: LLVM expects this to be i32
-#if JL_LLVM_VERSION >= 30600
-        Type *ts[1] = { tx };
-        Value *powi = Intrinsic::getDeclaration(jl_Module, Intrinsic::powi,
-            ArrayRef<Type*>(ts));
-#if JL_LLVM_VERSION >= 30700
-        return builder.CreateCall(powi, {x, y});
-#else
-        return builder.CreateCall2(powi, x, y);
-#endif
-#else
-        // issue #6506
-        return builder.CreateCall2(prepare_call(tx == T_float64 ? jlpow_func : jlpowf_func),
-                x, builder.CreateSIToFP(y, tx));
-#endif
-    }
+        // fall-through
     case sqrt_llvm_fast: {
-        x = FP(x);
-        return builder.CreateCall(Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt,
-                                                            ArrayRef<Type*>(x->getType())),
-                                  x);
+        Value *sqrtintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sqrt, makeArrayRef(t));
+        return builder.CreateCall(sqrtintr, x);
     }
 
     default:
-        assert(false);
+        assert(0 && "invalid intrinsic");
+        abort();
     }
-    assert(false);
-    return NULL;
+    assert(0 && "unreachable");
 }
 
 #define BOX_F(ct,jl_ct)                                                 \
diff --git a/src/intrinsics.h b/src/intrinsics.h
index d4d8bc3ff61ce..479b048b940a5 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -2,8 +2,7 @@
 
 #define INTRINSICS \
     /*  wrap and unwrap */ \
-    ALIAS(box, reinterpret) \
-    ALIAS(unbox, reinterpret) \
+    ADD_I(bitcast, 2) \
     /*  arithmetic */ \
     ADD_I(neg_int, 1) \
     ADD_I(add_int, 2) \
@@ -98,7 +97,6 @@
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
     /* c interface */ \
-    ALIAS(ccall, ccall) \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
     /* object access */ \
@@ -116,8 +114,7 @@ enum intrinsic {
 #undef ADD_I
 #undef ADD_HIDDEN
 #undef ALIAS
-    num_intrinsics,
-    reinterpret = box
+    num_intrinsics
 };
 
 #ifdef __cplusplus
@@ -140,9 +137,6 @@ JL_DLLEXPORT const char *jl_intrinsic_name(int f)
 static void (*runtime_fp[num_intrinsics])(void);
 static unsigned intrinsic_nargs[num_intrinsics];
 
-typedef jl_value_t *(*intrinsic_call_1_arg)(jl_value_t*);
-typedef jl_value_t *(*intrinsic_call_2_arg)(jl_value_t*, jl_value_t*);
-typedef jl_value_t *(*intrinsic_call_3_arg)(jl_value_t*, jl_value_t*, jl_value_t*);
 #define jl_is_intrinsic(v)       jl_typeis(v,jl_intrinsic_type)
 
 #ifdef __cplusplus
@@ -163,13 +157,24 @@ JL_CALLABLE(jl_f_intrinsic_call)
     if (!fargs)
         jl_error("this intrinsic must be compiled to be called");
     JL_NARGS(intrinsic_call, fargs, fargs);
+
+    union {
+        void (*fptr)(void);
+        jl_value_t *(*call1)(jl_value_t*);
+        jl_value_t *(*call2)(jl_value_t*, jl_value_t*);
+        jl_value_t *(*call3)(jl_value_t*, jl_value_t*, jl_value_t*);
+        jl_value_t *(*call4)(jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*);
+    } fptr;
+    fptr.fptr = runtime_fp[f];
     switch (fargs) {
         case 1:
-            return ((intrinsic_call_1_arg)runtime_fp[f])(args[0]);
+            return fptr.call1(args[0]);
         case 2:
-            return ((intrinsic_call_2_arg)runtime_fp[f])(args[0], args[1]);
+            return fptr.call2(args[0], args[1]);
         case 3:
-            return ((intrinsic_call_3_arg)runtime_fp[f])(args[0], args[1], args[2]);
+            return fptr.call3(args[0], args[1], args[2]);
+        case 4:
+            return fptr.call4(args[0], args[1], args[2], args[3]);
         default:
             assert(0 && "unexpected number of arguments to an intrinsic function");
     }
@@ -199,7 +204,6 @@ void jl_init_intrinsic_properties(void)
 #define ADD_I(name, nargs) add_intrinsic_properties(name, nargs, (void(*)(void))&jl_##name);
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) add_intrinsic_properties(alias, intrinsic_nargs[base], runtime_fp[base]);
-    ADD_HIDDEN(reinterpret, 2);
     INTRINSICS
 #undef ADD_I
 #undef ADD_HIDDEN
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 0c4ba6a9bafcc..0d478b9c86713 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -228,18 +228,16 @@ void addOptimizationPasses(PassManager *PM)
 #endif
     PM->add(createJumpThreadingPass());         // Thread jumps
     PM->add(createDeadStoreEliminationPass());  // Delete dead stores
-#if !defined(INSTCOMBINE_BUG)
+#if JL_LLVM_VERSION >= 30500
     if (jl_options.opt_level >= 3) {
         PM->add(createSLPVectorizerPass());     // Vectorize straight-line code
     }
 #endif
 
     PM->add(createAggressiveDCEPass());         // Delete dead instructions
-#if !defined(INSTCOMBINE_BUG)
+#if JL_LLVM_VERSION >= 30500
     if (jl_options.opt_level >= 3)
         PM->add(createInstructionCombiningPass());   // Clean up after SLP loop vectorizer
-#endif
-#if JL_LLVM_VERSION >= 30500
     PM->add(createLoopVectorizePass());         // Vectorize loops
     PM->add(createInstructionCombiningPass());  // Clean up after loop vectorizer
 #endif
diff --git a/src/jlapi.c b/src/jlapi.c
index 06d7920d59835..bc5dc09f4cdc7 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -267,6 +267,14 @@ JL_DLLEXPORT int jl_is_debugbuild(void)
 #endif
 }
 
+JL_DLLEXPORT int8_t jl_is_memdebug() {
+#ifdef MEMDEBUG
+    return 1;
+#else
+    return 0;
+#endif
+}
+
 JL_DLLEXPORT jl_value_t *jl_get_julia_home(void)
 {
     return jl_cstr_to_string(jl_options.julia_home);
diff --git a/src/jltypes.c b/src/jltypes.c
index eef3ff32cdfdd..4701e49ffc7aa 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -178,7 +178,7 @@ JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v)
 }
 
 // test whether a type has vars bound by the given environment
-JL_DLLEXPORT int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env)
+static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env)
 {
     if (jl_typeis(v, jl_tvar_type))
         return typeenv_has(env, (jl_tvar_t*)v);
@@ -218,6 +218,21 @@ JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v)
     return jl_has_bound_typevars(t, &env);
 }
 
+static int _jl_has_typevar_from_ua(jl_value_t *t, jl_unionall_t *ua, jl_typeenv_t *prev)
+{
+    jl_typeenv_t env = { ua->var, NULL, prev };
+    if (jl_is_unionall(ua->body))
+        return _jl_has_typevar_from_ua(t, (jl_unionall_t*)ua->body, &env);
+    else
+        return jl_has_bound_typevars(t, &env);
+}
+
+JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
+{
+    return _jl_has_typevar_from_ua(t, ua, NULL);
+}
+
+
 JL_DLLEXPORT int (jl_is_leaf_type)(jl_value_t *v)
 {
     if (jl_is_datatype(v)) {
@@ -1347,6 +1362,27 @@ jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n)
     return instantiate_with(t, env, n, NULL, NULL);
 }
 
+static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev)
+{
+    jl_typeenv_t en = { env->var, vals[0], prev };
+    if (jl_is_unionall(env->body))
+        return _jl_instantiate_type_in_env(ty, (jl_unionall_t*)env->body, vals + 1, &en);
+    else
+        return inst_type_w_(ty, &en, NULL, 1);
+}
+
+JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals)
+{
+    jl_value_t *typ;
+    JL_TRY {
+        typ = _jl_instantiate_type_in_env(ty, env, vals, NULL);
+    }
+    JL_CATCH {
+        typ = jl_bottom_type;
+    }
+    return typ;
+}
+
 jl_datatype_t *jl_wrap_Type(jl_value_t *t)
 {
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index 392490f28e709..d4fb2c9aa5aca 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -113,7 +113,7 @@
 
 (define initial-reserved-words '(begin while if for try return break continue
                          function macro quote let local global const
-                         abstract typealias type bitstype immutable ccall do
+                         abstract typealias type bitstype immutable do
                          module baremodule using import export importall))
 
 (define initial-reserved-word? (Set initial-reserved-words))
@@ -1360,17 +1360,6 @@
           (if (length= imports 1)
               (car imports)
               (cons 'toplevel imports))))
-       ((ccall)
-        (if (not (eqv? (peek-token s) #\())
-            (error "invalid \"ccall\" syntax")
-            (begin
-              (take-token s)
-              (let ((al (parse-arglist s #\))))
-                (if (and (length> al 1)
-                         (memq (cadr al) '(cdecl stdcall fastcall thiscall)))
-                    ;; place (callingconv) at end of arglist
-                    `(ccall ,(car al) ,@(cddr al) (,(cadr al)))
-                    `(ccall ,.al))))))
        ((do)
         (error "invalid \"do\" syntax"))
        (else (error "unhandled reserved word")))))))
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index f6020ae9ce7ef..64aa7d46cf2be 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -533,10 +533,10 @@
                                      ,@(if (null? vararg) '()
                                            (list `(... ,(arg-name (car vararg))))))
                               ;; otherwise add to rest keywords
-                              `(ccall 'jl_array_ptr_1d_push Void (tuple Any Any)
-                                      ,rkw (tuple ,elt
+                              `(foreigncall 'jl_array_ptr_1d_push (core Void) (call (core svec) Any Any)
+                                      ,rkw 0 (tuple ,elt
                                                   (call (core arrayref) ,kw
-                                                        (call (top +) ,ii 1)))))
+                                                        (call (top +) ,ii 1))) 0))
                           (map list vars vals flags))))
             ;; set keywords that weren't present to their default values
             ,@(apply append
@@ -897,7 +897,7 @@
     (if (or (null? F) (null? A))
         `(block
           ,.(reverse! stmts)
-          (call (core ccall) ,name ,RT (call (core svec) ,@(dots->vararg atypes))
+          (foreigncall ,name ,RT (call (core svec) ,@(dots->vararg atypes))
                 ,.(reverse! C)
                 ,@A))
         (let* ((a     (car A))
@@ -1206,7 +1206,7 @@
                         (= ,err true)))
                   (= ,finally-exception (the_exception))
                   ,finalb
-                  (if ,err (ccall 'jl_rethrow_other Void (tuple Any) ,finally-exception))
+                  (if ,err (foreigncall 'jl_rethrow_other (core Void) (call (core svec) Any) ,finally-exception 0))
                   ,(if hasret
                        (if ret
                            `(if ,ret (return ,retval) ,val)
@@ -1411,20 +1411,20 @@
                    (if (null? stmts)
                        (loop (cdr kw) (list* (caddr arg) `(quote ,(cadr arg)) initial-kw) stmts #t)
                        (loop (cdr kw) initial-kw
-                             (cons `(ccall 'jl_array_ptr_1d_push2 Void (tuple Any Any Any)
-                                           ,container
-                                           (|::| (quote ,(cadr arg)) (core Symbol))
-                                           ,(caddr arg))
+                             (cons `(foreigncall 'jl_array_ptr_1d_push2 (core Void) (call (core svec) Any Any Any)
+                                           ,container 0
+                                           (|::| (quote ,(cadr arg)) (core Symbol)) 0
+                                           ,(caddr arg) 0)
                                    stmts)
                              #t)))
                   (else
                    (loop (cdr kw) initial-kw
                          (cons (let* ((k (make-ssavalue))
                                       (v (make-ssavalue))
-                                      (push-expr `(ccall 'jl_array_ptr_1d_push2 Void (tuple Any Any Any)
-                                                         ,container
-                                                         (|::| ,k (core Symbol))
-                                                         ,v)))
+                                      (push-expr `(foreigncall 'jl_array_ptr_1d_push2 (core Void) (call (core svec) Any Any Any)
+                                                         ,container 0
+                                                         (|::| ,k (core Symbol)) 0
+                                                         ,v 0)))
                                  (if (vararg? arg)
                                      `(for (= (tuple ,k ,v) ,(cadr arg))
                                            ,push-expr)
@@ -1957,6 +1957,24 @@
          (let ((f (cadr e)))
            (cond ((dotop? f)
                   (expand-fuse-broadcast '() `(|.| ,(undotop f) (tuple ,@(cddr e)))))
+                 ((and (eq? f 'ccall) (length> e 4))
+                  (let* ((cconv (cadddr e))
+                         (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall)))
+                         (after-cconv (if have-cconv (cddddr e) (cdddr e)))
+                         (name (caddr e))
+                         (RT   (car after-cconv))
+                         (argtypes (cadr after-cconv))
+                         (args (cddr after-cconv)))
+                        (begin
+                          (if (not (and (pair? argtypes)
+                                        (eq? (car argtypes) 'tuple)))
+                              (if (and (pair? RT)
+                                       (eq? (car RT) 'tuple))
+                                  (error "ccall argument types must be a tuple; try \"(T,)\" and check if you specified a correct return type")
+                                  (error "ccall argument types must be a tuple; try \"(T,)\"")))
+                          (expand-forms
+                           (lower-ccall name RT (cdr argtypes)
+                            (if have-cconv (append args (list (list cconv))) args)))))) ;; place (callingconv) at end of arglist
                  ((and (pair? (caddr e))
                        (eq? (car (caddr e)) 'parameters))
                   ;; (call f (parameters . kwargs) ...)
@@ -2171,6 +2189,7 @@
 
    'ccall
    (lambda (e)
+     (syntax-deprecation #f "Expr(:ccall)" "Expr(:call, :ccall)")
      (if (length> e 3)
          (let ((name (cadr e))
                (RT   (caddr e))
@@ -3243,7 +3262,7 @@ f(x) = yt(x)
     ;; from the current function.
     (define (compile e break-labels value tail)
       (if (or (not (pair? e)) (memq (car e) '(null ssavalue quote inert top core copyast the_exception $
-                                                   globalref outerref cdecl stdcall fastcall thiscall)))
+                                                   globalref outerref cdecl stdcall fastcall thiscall llvmcall)))
           (let ((e (if (and arg-map (symbol? e))
                        (get arg-map e e)
                        e)))
@@ -3255,16 +3274,15 @@ f(x) = yt(x)
                   ((and (pair? e) (eq? (car e) 'globalref)) (emit e) #f) ;; keep globals for undefined-var checking
                   (else #f)))
           (case (car e)
-            ((call new)
-             (let* ((ccall? (and (eq? (car e) 'call) (equal? (cadr e) '(core ccall))))
-                    (args (if ccall?
+            ((call new foreigncall)
+             (let* ((args (if (eq? (car e) 'foreigncall)
                               ;; NOTE: 2nd and 3rd arguments of ccall must be left in place
                               ;;       the 1st should be compiled if an atom.
-                              (append (list (cadr e))
-                                      (cond (atom? (caddr e) (compile-args (list (caddr e)) break-labels))
-                                            (else (caddr e)))
-                                      (list-head (cdddr e) 2)
-                                      (compile-args (list-tail e 5) break-labels))
+                              (append (list)
+                                      (cond (atom? (cadr e) (compile-args (list (cadr e)) break-labels))
+                                            (else (cadr e)))
+                                      (list-head (cddr e) 2)
+                                      (compile-args (list-tail e 4) break-labels))
                               (compile-args (cdr e) break-labels)))
                     (callex (cons (car e) args)))
                (cond (tail (emit-return callex))
diff --git a/src/julia.h b/src/julia.h
index e77e821f3c6b9..d2f4832c41859 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -256,10 +256,6 @@ typedef struct _jl_method_t {
     int32_t called;  // bit flags: whether each of the first 8 arguments is called
     uint8_t isva;
     uint8_t isstaged;
-    // if there are intrinsic calls, sparams are probably required to compile successfully,
-    // and so unspecialized will be created for each linfo instead of using linfo->def->template
-    // 0 = no, 1 = yes, 2 = not yet known
-    uint8_t needs_sparam_vals_ducttape;
 
 // hidden fields:
     uint8_t traced;
@@ -974,6 +970,7 @@ JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v);
 JL_DLLEXPORT int jl_is_leaf_type(jl_value_t *v);
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v);
 JL_DLLEXPORT int jl_has_typevar(jl_value_t *t, jl_tvar_t *v);
+JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua);
 JL_DLLEXPORT int jl_subtype_env_size(jl_value_t *t);
 JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env, int envsz);
 JL_DLLEXPORT int jl_isa(jl_value_t *a, jl_value_t *t);
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 4d4784b620f70..60b931afedddc 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -337,6 +337,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
 void jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_t fptr);
 jl_value_t *jl_type_intersection_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
+JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_datatype_t *super,
@@ -370,7 +371,6 @@ jl_method_instance_t *jl_method_lookup(jl_methtable_t *mt, jl_value_t **args, si
 jl_value_t *jl_gf_invoke(jl_tupletype_t *types, jl_value_t **args, size_t nargs);
 
 JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes);
-int jl_has_intrinsics(jl_method_instance_t *li, jl_value_t *v, jl_module_t *m);
 
 jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i);
 void jl_compute_field_offsets(jl_datatype_t *st);
@@ -594,7 +594,7 @@ extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
 // -- Runtime intrinsics -- //
 JL_DLLEXPORT const char *jl_intrinsic_name(int f);
 
-JL_DLLEXPORT jl_value_t *jl_reinterpret(jl_value_t *ty, jl_value_t *v);
+JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
 JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *align, jl_value_t *i);
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty);
@@ -838,7 +838,7 @@ extern jl_sym_t *lambda_sym;  extern jl_sym_t *assign_sym;
 extern jl_sym_t *method_sym;  extern jl_sym_t *slot_sym;
 extern jl_sym_t *enter_sym;   extern jl_sym_t *leave_sym;
 extern jl_sym_t *exc_sym;     extern jl_sym_t *new_sym;
-extern jl_sym_t *compiler_temp_sym;
+extern jl_sym_t *compiler_temp_sym; extern jl_sym_t *foreigncall_sym;
 extern jl_sym_t *const_sym;   extern jl_sym_t *thunk_sym;
 extern jl_sym_t *anonymous_sym;  extern jl_sym_t *underscore_sym;
 extern jl_sym_t *abstracttype_sym; extern jl_sym_t *bitstype_sym;
diff --git a/src/llvm-gcroot.cpp b/src/llvm-gcroot.cpp
index 6baddb9b16ad3..5a36eb594cf0f 100644
--- a/src/llvm-gcroot.cpp
+++ b/src/llvm-gcroot.cpp
@@ -1009,7 +1009,7 @@ void JuliaGCAllocator::allocate_frame()
     DIBuilder dbuilder(M, false);
 #endif
     unsigned argSpaceSize = 0;
-    for(BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) {
+    for (BasicBlock::iterator I = gcframe->getParent()->begin(), E(gcframe); I != E; ) {
         Instruction* inst = &*I;
         ++I;
         if (CallInst* callInst = dyn_cast<CallInst>(inst)) {
@@ -1059,6 +1059,7 @@ void JuliaGCAllocator::allocate_frame()
         }
         else if (AllocaInst *allocaInst = dyn_cast<AllocaInst>(inst)) {
             if (allocaInst->getAllocatedType() == V_null->getType()) {
+                // TODO: this is overly aggressive at zeroing allocas that may not actually need to be zeroed
                 StoreInst *store = new StoreInst(V_null, allocaInst);
                 store->insertAfter(allocaInst);
             }
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index 36fec359a1b18..430ac31d12c07 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -203,7 +203,7 @@
    m inarg))
 
 (define (resolve-expansion-vars- e env m inarg)
-  (cond ((or (eq? e 'true) (eq? e 'false) (eq? e 'end))
+  (cond ((or (eq? e 'true) (eq? e 'false) (eq? e 'end) (eq? e 'ccall))
          e)
         ((symbol? e)
          (let ((a (assq e env)))
diff --git a/src/method.c b/src/method.c
index de966813e0b96..cd84612be11a5 100644
--- a/src/method.c
+++ b/src/method.c
@@ -16,7 +16,7 @@ extern "C" {
 #endif
 
 extern jl_value_t *jl_builtin_getfield;
-jl_value_t *jl_resolve_globals(jl_value_t *expr, jl_module_t *module)
+jl_value_t *jl_resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals)
 {
     if (jl_is_symbol(expr)) {
         if (module == NULL)
@@ -29,6 +29,7 @@ jl_value_t *jl_resolve_globals(jl_value_t *expr, jl_module_t *module)
             e->head == global_sym || e->head == quote_sym || e->head == inert_sym ||
             e->head == line_sym || e->head == meta_sym || e->head == inbounds_sym ||
             e->head == boundscheck_sym || e->head == simdloop_sym) {
+            // ignore these
         }
         else {
             if (e->head == call_sym && jl_expr_nargs(e) == 3 &&
@@ -66,13 +67,40 @@ jl_value_t *jl_resolve_globals(jl_value_t *expr, jl_module_t *module)
                     }
                 }
             }
-            size_t i = 0;
+            size_t i = 0, nargs = jl_array_len(e->args);
+            if (e->head == foreigncall_sym) {
+                JL_NARGSV(ccall method definition, 3); // (fptr, rt, at)
+                jl_value_t *rt = jl_exprarg(e, 1);
+                jl_value_t *at = jl_exprarg(e, 2);
+                if (!jl_is_type(rt)) {
+                    rt = jl_interpret_toplevel_expr_in(module, rt, NULL, sparam_vals);
+                    jl_exprargset(e, 1, rt);
+                }
+                if (!jl_is_svec(at)) {
+                    at = jl_interpret_toplevel_expr_in(module, at, NULL, sparam_vals);
+                    jl_exprargset(e, 2, at);
+                }
+                if (jl_is_svec(rt))
+                    jl_error("ccall: missing return type");
+                JL_TYPECHK(ccall method definition, type, rt);
+                JL_TYPECHK(ccall method definition, simplevector, at);
+                size_t nargt = jl_svec_len(at);
+                int isVa = (nargt > 0 && jl_is_vararg_type(jl_svecref(at, nargt - 1)));
+                if (nargs % 2 == 0) // ignore calling-convention arg, if present
+                    nargs -= 1;
+                if ((!isVa && nargt    != (nargs - 2) / 2) ||
+                    ( isVa && nargt - 1 > (nargs - 2) / 2)) {
+                    jl_printf(JL_STDERR, "WARNING: ccall: wrong number of arguments to C function in %s\n",
+                            jl_symbol_name(module->name)); // TODO: make this an error
+                }
+            }
             if (e->head == method_sym || e->head == abstracttype_sym || e->head == compositetype_sym ||
-                e->head == bitstype_sym || e->head == module_sym)
+                e->head == bitstype_sym || e->head == module_sym) {
                 i++;
-            for (; i < jl_array_len(e->args); i++) {
+            }
+            for (; i < nargs; i++) {
                 // TODO: this should be making a copy, not mutating the source
-                jl_exprargset(e, i, jl_resolve_globals(jl_exprarg(e, i), module));
+                jl_exprargset(e, i, jl_resolve_globals(jl_exprarg(e, i), module, sparam_vals));
             }
         }
     }
@@ -293,7 +321,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
         jl_array_t *stmts = (jl_array_t*)func->code;
         for (i = 0, l = jl_array_len(stmts); i < l; i++) {
-            jl_array_ptr_set(stmts, i, jl_resolve_globals(jl_array_ptr_ref(stmts, i), linfo->def->module));
+            jl_array_ptr_set(stmts, i, jl_resolve_globals(jl_array_ptr_ref(stmts, i), linfo->def->module, env));
         }
         ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
@@ -348,11 +376,15 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     }
     m->called = called;
 
+    jl_array_t *copy = NULL;
+    jl_svec_t *sparam_vars = m->tvars;
+    if (!jl_is_svec(sparam_vars))
+        sparam_vars = jl_svec1(sparam_vars);
+    JL_GC_PUSH2(&copy, &sparam_vars);
     assert(jl_typeis(src->code, jl_array_any_type));
     jl_array_t *stmts = (jl_array_t*)src->code;
     size_t i, n = jl_array_len(stmts);
-    jl_array_t *copy = jl_alloc_vec_any(n);
-    JL_GC_PUSH1(&copy);
+    copy = jl_alloc_vec_any(n);
     int set_lineno = 0;
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
@@ -365,7 +397,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
             }
         }
         else {
-            st = jl_resolve_globals(st, m->module);
+            st = jl_resolve_globals(st, m->module, sparam_vars);
         }
         jl_array_ptr_set(copy, i, st);
     }
@@ -399,7 +431,6 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(void)
     m->isstaged = 0;
     m->isva = 0;
     m->nargs = 0;
-    m->needs_sparam_vals_ducttape = 2;
     m->traced = 0;
     m->min_world = 1;
     m->max_world = ~(size_t)0;
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 628220785b17f..d69b1be21e6db 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -14,16 +14,16 @@
 
 const unsigned int host_char_bit = 8;
 
-// run time version of box/unbox intrinsic
-JL_DLLEXPORT jl_value_t *jl_reinterpret(jl_value_t *ty, jl_value_t *v)
+// run time version of bitcast intrinsic
+JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v)
 {
-    JL_TYPECHK(reinterpret, datatype, ty);
+    JL_TYPECHK(bitcast, datatype, ty);
     if (!jl_is_leaf_type(ty) || !jl_is_bitstype(ty))
-        jl_error("reinterpret: target type not a leaf bitstype");
+        jl_error("bitcast: target type not a leaf bitstype");
     if (!jl_is_bitstype(jl_typeof(v)))
-        jl_error("reinterpret: value not a bitstype");
+        jl_error("bitcast: value not a bitstype");
     if (jl_datatype_size(jl_typeof(v)) != jl_datatype_size(ty))
-        jl_error("reinterpret: argument size does not match size of target type");
+        jl_error("bitcast: argument size does not match size of target type");
     if (ty == jl_typeof(v))
         return v;
     if (ty == (jl_value_t*)jl_bool_type)
@@ -90,7 +90,7 @@ JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
         v = jl_fieldref(v, 0);
 
     if (jl_is_pointer(v))
-        return jl_reinterpret(rt, v);
+        return jl_bitcast(rt, v);
 
     char *f_lib = NULL;
     if (jl_is_tuple(v) && jl_nfields(v) > 1) {
@@ -989,5 +989,6 @@ JL_DLLEXPORT jl_value_t *jl_select_value(jl_value_t *isfalse, jl_value_t *a, jl_
 
 JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
 {
+    JL_TYPECHK(arraylen, array, a);
     return jl_box_long(jl_array_len((jl_array_t*)a));
 }
diff --git a/src/toplevel.c b/src/toplevel.c
index 4201041b1ffbc..7a4d90fae8c24 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -271,91 +271,19 @@ JL_DLLEXPORT jl_module_t *jl_base_relative_to(jl_module_t *m)
     return jl_top_module;
 }
 
-// try to statically evaluate, NULL if not possible
-// remove this once jl_has_intrinsics is deleted
-extern jl_value_t *jl_builtin_getfield;
-static jl_value_t *jl_static_eval(jl_value_t *ex, jl_module_t *mod,
-                                  jl_method_instance_t *linfo, int sparams)
+int jl_has_intrinsics(jl_value_t *v)
 {
-    if (jl_is_symbol(ex)) {
-        jl_sym_t *sym = (jl_sym_t*)ex;
-        if (jl_is_const(mod, sym))
-            return jl_get_global(mod, sym);
-        return NULL;
-    }
-    if (jl_is_slot(ex))
-        return NULL;
-    if (jl_is_ssavalue(ex))
-        return NULL;
-    if (jl_is_quotenode(ex))
-        return jl_fieldref(ex, 0);
-    if (jl_is_method_instance(ex))
-        return NULL;
-    jl_module_t *m = NULL;
-    jl_sym_t *s = NULL;
-    if (jl_is_globalref(ex)) {
-        jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), jl_globalref_name(ex));
-        if (b && b->constp) {
-            return b->value;
-        }
-        return NULL;
-    }
-    if (jl_is_expr(ex)) {
-        jl_expr_t *e = (jl_expr_t*)ex;
-        if (e->head == call_sym) {
-            jl_value_t *f = jl_static_eval(jl_exprarg(e, 0), mod, linfo, sparams);
-            if (f) {
-                if (jl_array_dim0(e->args) == 3 && f==jl_builtin_getfield) {
-                    m = (jl_module_t*)jl_static_eval(jl_exprarg(e, 1), mod, linfo, sparams);
-                    s = (jl_sym_t*)jl_static_eval(jl_exprarg(e, 2), mod, linfo, sparams);
-                    if (m && jl_is_module(m) && s && jl_is_symbol(s)) {
-                        jl_binding_t *b = jl_get_binding(m, s);
-                        if (b && b->constp) {
-                            return b->value;
-                        }
-                    }
-                }
-            }
-        }
-        else if (e->head == static_parameter_sym) {
-            size_t idx = jl_unbox_long(jl_exprarg(e, 0));
-            if (linfo && idx <= jl_svec_len(linfo->sparam_vals)) {
-                jl_value_t *e = jl_svecref(linfo->sparam_vals, idx - 1);
-                if (jl_is_typevar(e))
-                    return NULL;
-                return e;
-            }
-        }
-        return NULL;
-    }
-    return ex;
-}
-
-
-int jl_has_intrinsics(jl_method_instance_t *li, jl_value_t *v, jl_module_t *m)
-{
-    if (!jl_is_expr(v)) return 0;
-    jl_expr_t *e = (jl_expr_t*)v;
-    if (jl_array_len(e->args) == 0)
+    if (!jl_is_expr(v))
         return 0;
+    jl_expr_t *e = (jl_expr_t*)v;
     if (e->head == toplevel_sym || e->head == copyast_sym)
         return 0;
-    jl_value_t *e0 = jl_exprarg(e, 0);
-    if (e->head == call_sym) {
-        jl_value_t *sv = jl_static_eval(e0, m, li, li != NULL);
-        if (sv && jl_typeis(sv, jl_intrinsic_type))
-            return 1;
-    }
-    if (0 && e->head == assign_sym && jl_is_ssavalue(e0)) { // code branch needed for *very-linear-mode*, but not desirable otherwise
-        jl_value_t *e1 = jl_exprarg(e, 1);
-        jl_value_t *sv = jl_static_eval(e1, m, li, li != NULL);
-        if (sv && jl_typeis(sv, jl_intrinsic_type))
-            return 1;
-    }
+    if (e->head == foreigncall_sym)
+        return 1;
     int i;
-    for (i=0; i < jl_array_len(e->args); i++) {
-        jl_value_t *a = jl_exprarg(e,i);
-        if (jl_is_expr(a) && jl_has_intrinsics(li, a, m))
+    for (i = 0; i < jl_array_len(e->args); i++) {
+        jl_value_t *a = jl_exprarg(e, i);
+        if (jl_is_expr(a) && jl_has_intrinsics(a))
             return 1;
     }
     return 0;
@@ -396,7 +324,8 @@ static int jl_eval_with_compiler_p(jl_code_info_t *src, jl_array_t *body, int co
                 }
             }
         }
-        if (jl_has_intrinsics(NULL, stmt, m)) return 1;
+        if (jl_has_intrinsics(stmt))
+            return 1;
     }
     return 0;
 }
@@ -405,7 +334,7 @@ static int jl_eval_expr_with_compiler_p(jl_value_t *e, int compileloops, jl_modu
 {
     if (jl_is_expr(e) && ((jl_expr_t*)e)->head == body_sym)
         return jl_eval_with_compiler_p(NULL, ((jl_expr_t*)e)->args, compileloops, m);
-    if (jl_has_intrinsics(NULL, e, m))
+    if (jl_has_intrinsics(e))
         return 1;
     return 0;
 }
@@ -513,11 +442,17 @@ int jl_is_toplevel_only_expr(jl_value_t *e)
          ((jl_expr_t*)e)->head == toplevel_sym);
 }
 
-static jl_method_instance_t *jl_new_thunk(jl_code_info_t *src)
+jl_value_t *jl_resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals);
+static jl_method_instance_t *jl_new_thunk(jl_code_info_t *src, jl_module_t *module)
 {
     jl_method_instance_t *li = jl_new_method_instance_uninit();
     li->inferred = (jl_value_t*)src;
     li->specTypes = jl_typeof(jl_emptytuple);
+    jl_array_t *stmts = (jl_array_t*)src->code;
+    size_t i, l;
+    for (i = 0, l = jl_array_len(stmts); i < l; i++) {
+        jl_array_ptr_set(stmts, i, jl_resolve_globals(jl_array_ptr_ref(stmts, i), module, NULL));
+    }
     return li;
 }
 
@@ -645,7 +580,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_value_t *e, int fast, int expanded)
     }
 
     if (ewc) {
-        li = jl_new_thunk(thk);
+        li = jl_new_thunk(thk, ptls->current_module);
         size_t world = jl_get_ptls_states()->world_age;
         jl_type_infer(&li, world, 0);
         jl_value_t *dummy_f_arg = NULL;
diff --git a/test/ccall.jl b/test/ccall.jl
index 3450ef2c2dbc5..9c5943616ebb1 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -15,7 +15,10 @@ ccall_test_func(x) = ccall((:testUcharX, libccalltest), Int32, (UInt8,), x % UIn
 
 
 # Test for proper round-trip of Ref{T} type
-ccall_echo_func{T,U}(x, ::Type{T}, ::Type{U}) = ccall((:test_echo_p, libccalltest), T, (U,), x)
+function ccall_echo_func(x::ANY, T::Type, U::Type)
+    f = (@eval (x) -> ccall((:test_echo_p, libccalltest), $T, ($U,), x))
+    return @eval $f($x)
+end
 # Make sure object x is still valid (rooted as argument)
 # when loading the pointer. This works as long as we still keep the argument
 # rooted but might fail if we are smarter about eliminating dead root.
@@ -63,7 +66,7 @@ let a, ci_ary, x
 
     x = ccall((:cptest_static, libccalltest), Ptr{Complex{Int}}, (Ptr{Complex{Int}},), &a)
     @test unsafe_load(x) == a
-    Libc.free(convert(Ptr{Void},x))
+    Libc.free(convert(Ptr{Void}, x))
 end
 
 let a, b, x
@@ -123,7 +126,11 @@ function test_struct1{Struct}(::Type{Struct})
     b = Float32(123.456)
 
     a2 = copy(a)
-    x = ccall((:test_1, libccalltest), Struct, (Struct, Float32), a2, b)
+    if Struct === Struct1
+        x = ccall((:test_1, libccalltest), Struct1, (Struct1, Float32), a2, b)
+    else
+        x = ccall((:test_1, libccalltest), Struct1I, (Struct1I, Float32), a2, b)
+    end
 
     @test a2.x == a.x && a2.y == a.y
     @test !(a2 === x)
@@ -202,7 +209,11 @@ function test_struct4{Struct}(::Type{Struct})
     a = Struct(-512275808,882558299,-2133022131)
     b = Int32(42)
 
-    x = ccall((:test_4, libccalltest), Struct, (Struct, Int32), a, b)
+    if Struct === Struct4
+        x = ccall((:test_4, libccalltest), Struct4, (Struct4, Int32), a, b)
+    else
+        x = ccall((:test_4, libccalltest), Struct4I, (Struct4I, Int32), a, b)
+    end
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
@@ -228,7 +239,11 @@ function test_struct5{Struct}(::Type{Struct})
     a = Struct(1771319039, 406394736, -1269509787, -745020976)
     b = Int32(42)
 
-    x = ccall((:test_5, libccalltest), Struct, (Struct, Int32), a, b)
+    if Struct === Struct5
+        x = ccall((:test_5, libccalltest), Struct5, (Struct5, Int32), a, b)
+    else
+        x = ccall((:test_5, libccalltest), Struct5I, (Struct5I, Int32), a, b)
+    end
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
@@ -253,7 +268,11 @@ function test_struct6{Struct}(::Type{Struct})
     a = Struct(-654017936452753226, -5573248801240918230, -983717165097205098)
     b = Int64(42)
 
-    x = ccall((:test_6, libccalltest), Struct, (Struct, Int64), a, b)
+    if Struct === Struct6
+        x = ccall((:test_6, libccalltest), Struct6, (Struct6, Int64), a, b)
+    else
+        x = ccall((:test_6, libccalltest), Struct6I, (Struct6I, Int64), a, b)
+    end
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
@@ -275,7 +294,11 @@ function test_struct7{Struct}(::Type{Struct})
     a = Struct(-384082741977533896, 'h')
     b = Int8(42)
 
-    x = ccall((:test_7, libccalltest), Struct, (Struct, Int8), a, b)
+    if Struct === Struct7
+        x = ccall((:test_7, libccalltest), Struct7, (Struct7, Int8), a, b)
+    else
+        x = ccall((:test_7, libccalltest), Struct7I, (Struct7I, Int8), a, b)
+    end
 
     @test x.x == a.x+Int(b)*1
     @test x.y == a.y-Int(b)*2
@@ -296,7 +319,11 @@ function test_struct8{Struct}(::Type{Struct})
     a = Struct(-384082896, 'h')
     b = Int8(42)
 
-    r8 = ccall((:test_8, libccalltest), Struct, (Struct, Int8), a, b)
+    if Struct === Struct8
+        r8 = ccall((:test_8, libccalltest), Struct8, (Struct8, Int8), a, b)
+    else
+        r8 = ccall((:test_8, libccalltest), Struct8I, (Struct8I, Int8), a, b)
+    end
 
     @test r8.x == a.x+b*1
     @test r8.y == a.y-b*2
@@ -317,7 +344,11 @@ function test_struct9{Struct}(::Type{Struct})
     a = Struct(-394092996, -3840)
     b = Int16(42)
 
-    x = ccall((:test_9, libccalltest), Struct, (Struct, Int16), a, b)
+    if Struct === Struct9
+        x = ccall((:test_9, libccalltest), Struct9, (Struct9, Int16), a, b)
+    else
+        x = ccall((:test_9, libccalltest), Struct9I, (Struct9I, Int16), a, b)
+    end
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
@@ -342,7 +373,11 @@ function test_struct10{Struct}(::Type{Struct})
     a = Struct('0', '1', '2', '3')
     b = Int8(2)
 
-    x = ccall((:test_10, libccalltest), Struct, (Struct, Int8), a, b)
+    if Struct === Struct10
+        x = ccall((:test_10, libccalltest), Struct10, (Struct10, Int8), a, b)
+    else
+        x = ccall((:test_10, libccalltest), Struct10I, (Struct10I, Int8), a, b)
+    end
 
     @test x.x == a.x+b*1
     @test x.y == a.y-b*2
@@ -363,7 +398,11 @@ function test_struct11{Struct}(::Type{Struct})
     a = Struct(0.8877077f0 + 0.4591081f0im)
     b = Float32(42)
 
-    x = ccall((:test_11, libccalltest), Struct, (Struct, Float32), a, b)
+    if Struct === Struct11
+        x = ccall((:test_11, libccalltest), Struct11, (Struct11, Float32), a, b)
+    else
+        x = ccall((:test_11, libccalltest), Struct11I, (Struct11I, Float32), a, b)
+    end
 
     @test x.x ≈ a.x + b*1 - b*2im
 end
@@ -383,7 +422,11 @@ function test_struct12{Struct}(::Type{Struct})
     a = Struct(0.8877077f5 + 0.4591081f2im, 0.0004842868f0 - 6982.3265f3im)
     b = Float32(42)
 
-    x = ccall((:test_12, libccalltest), Struct, (Struct, Float32), a, b)
+    if Struct === Struct12
+        x = ccall((:test_12, libccalltest), Struct12, (Struct12, Float32), a, b)
+    else
+        x = ccall((:test_12, libccalltest), Struct12I, (Struct12I, Float32), a, b)
+    end
 
     @test x.x ≈ a.x + b*1 - b*2im
     @test x.y ≈ a.y + b*3 - b*4im
@@ -402,7 +445,11 @@ function test_struct13{Struct}(::Type{Struct})
     a = Struct(42968.97560380495 - 803.0576845153616im)
     b = Float64(42)
 
-    x = ccall((:test_13, libccalltest), Struct, (Struct, Float64), a, b)
+    if Struct === Struct13
+        x = ccall((:test_13, libccalltest), Struct13, (Struct13, Float64), a, b)
+    else
+        x = ccall((:test_13, libccalltest), Struct13I, (Struct13I, Float64), a, b)
+    end
 
     @test x.x ≈ a.x + b*1 - b*2im
 end
@@ -422,7 +469,11 @@ function test_struct14{Struct}(::Type{Struct})
     a = Struct(0.024138331f0, 0.89759064f32)
     b = Float32(42)
 
-    x = ccall((:test_14, libccalltest), Struct, (Struct, Float32), a, b)
+    if Struct === Struct14
+        x = ccall((:test_14, libccalltest), Struct14, (Struct14, Float32), a, b)
+    else
+        x = ccall((:test_14, libccalltest), Struct14I, (Struct14I, Float32), a, b)
+    end
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
@@ -443,7 +494,11 @@ function test_struct15{Struct}(::Type{Struct})
     a = Struct(4.180997967273657, -0.404218594294923)
     b = Float64(42)
 
-    x = ccall((:test_15, libccalltest), Struct, (Struct, Float64), a, b)
+    if Struct === Struct15
+        x = ccall((:test_15, libccalltest), Struct15, (Struct15, Float64), a, b)
+    else
+        x = ccall((:test_15, libccalltest), Struct15I, (Struct15I, Float64), a, b)
+    end
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
@@ -473,7 +528,11 @@ function test_struct16{Struct}(::Type{Struct})
                0.6460273620993535, 0.9472692581106656, 0.47328535437352093)
     b = Float32(42)
 
-    x = ccall((:test_16, libccalltest), Struct, (Struct, Float32), a, b)
+    if Struct === Struct16
+        x = ccall((:test_16, libccalltest), Struct16, (Struct16, Float32), a, b)
+    else
+        x = ccall((:test_16, libccalltest), Struct16I, (Struct16I, Float32), a, b)
+    end
 
     @test x.x ≈ a.x + b*1
     @test x.y ≈ a.y - b*2
@@ -498,7 +557,11 @@ function test_struct17{Struct}(::Type{Struct})
     a = Struct(2, 10)
     b = Int8(2)
 
-    x = ccall((:test_17, libccalltest), Struct, (Struct, Int8), a, b)
+    if Struct === Struct17
+        x = ccall((:test_17, libccalltest), Struct17, (Struct17, Int8), a, b)
+    else
+        x = ccall((:test_17, libccalltest), Struct17I, (Struct17I, Int8), a, b)
+    end
 
     @test x.a == a.a + b * 1
     @test x.b == a.b - b * 2
@@ -521,7 +584,11 @@ function test_struct18{Struct}(::Type{Struct})
     a = Struct(2, 10, -3)
     b = Int8(2)
 
-    x = ccall((:test_18, libccalltest), Struct, (Struct, Int8), a, b)
+    if Struct === Struct18
+        x = ccall((:test_18, libccalltest), Struct18, (Struct18, Int8), a, b)
+    else
+        x = ccall((:test_18, libccalltest), Struct18I, (Struct18I, Int8), a, b)
+    end
 
     @test x.a == a.a + b * 1
     @test x.b == a.b - b * 2
@@ -557,7 +624,11 @@ function test_struct_big{Struct}(::Type{Struct})
     a = Struct(424,-5,Int8('Z'))
     a2 = copy(a)
 
-    x = ccall((:test_big, libccalltest), Struct, (Struct,), a2)
+    if Struct == Struct_Big
+        x = ccall((:test_big, libccalltest), Struct_Big, (Struct_Big,), a2)
+    else
+        x = ccall((:test_big, libccalltest), Struct_BigI, (Struct_BigI,), a2)
+    end
 
     @test a2.x == a.x && a2.y == a.y && a2.z == a.z
     @test x.x == a.x + 1
@@ -890,28 +961,35 @@ type Struct_huge5_ppc64_hva
 end
 
 if Sys.ARCH === :x86_64
-    function test_sse(a1::V4xF32,a2::V4xF32,a3::V4xF32,a4::V4xF32)
-        ccall((:test_m128, libccalltest), V4xF32, (V4xF32,V4xF32,V4xF32,V4xF32), a1, a2, a3, a4)
+    function test_sse(a1::V4xF32, a2::V4xF32, a3::V4xF32, a4::V4xF32)
+        ccall((:test_m128, libccalltest), V4xF32, (V4xF32, V4xF32, V4xF32, V4xF32), a1, a2, a3, a4)
     end
 
-    function test_sse(a1::V4xI32,a2::V4xI32,a3::V4xI32,a4::V4xI32)
-        ccall((:test_m128i, libccalltest), V4xI32, (V4xI32,V4xI32,V4xI32,V4xI32), a1, a2, a3, a4)
+    function test_sse(a1::V4xI32, a2::V4xI32, a3::V4xI32, a4::V4xI32)
+        ccall((:test_m128i, libccalltest), V4xI32, (V4xI32, V4xI32, V4xI32, V4xI32), a1, a2, a3, a4)
     end
 
-    foo_ams(a1, a2, a3, a4) = VecReg(ntuple(i->VecElement(a1[i].value+a2[i].value*(a3[i].value-a4[i].value)),4))
+    foo_ams(a1, a2, a3, a4) = VecReg(ntuple(i -> VecElement(a1[i].value + a2[i].value * (a3[i].value - a4[i].value)), 4))
 
-    rt_sse{T}(a1::T,a2::T,a3::T,a4::T) = ccall(cfunction(foo_ams,T,(T,T,T,T)), T, (T,T,T,T), a1, a2, a3,a4)
+    for s in [Float32, Int32]
+        T = NTuple{4, VecElement{s}}
+        @eval function rt_sse(a1::$T, a2::$T, a3::$T, a4::$T)
+            return ccall(
+                cfunction(foo_ams, $T, ($T, $T, $T, $T)),
+                $T,
+                ($T, $T, $T, $T),
+                a1,  a2,  a3, a4)
+        end
 
-    for s in [Float32,Int32]
-        a1 = VecReg(ntuple(i->VecElement(s(1i)),4))
-        a2 = VecReg(ntuple(i->VecElement(s(2i)),4))
-        a3 = VecReg(ntuple(i->VecElement(s(3i)),4))
-        a4 = VecReg(ntuple(i->VecElement(s(4i)),4))
-        r = VecReg(ntuple(i->VecElement(s(1i+2i*(3i-4i))),4))
-        @test test_sse(a1,a2,a3,a4) == r
+        a1 = VecReg(ntuple(i -> VecElement(s(1i)), 4))
+        a2 = VecReg(ntuple(i -> VecElement(s(2i)), 4))
+        a3 = VecReg(ntuple(i -> VecElement(s(3i)), 4))
+        a4 = VecReg(ntuple(i -> VecElement(s(4i)), 4))
+        r = VecReg(ntuple(i -> VecElement(s(1i + 2i * (3i - 4i))), 4))
+        @test test_sse(a1, a2, a3, a4) == r
 
         # cfunction round-trip
-        @test rt_sse(a1,a2,a3,a4) == r
+        @test rt_sse(a1, a2, a3, a4) == r
     end
 
 elseif Sys.ARCH === :aarch64
diff --git a/test/choosetests.jl b/test/choosetests.jl
index b02f75e6437a6..67f32c8d0d190 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -17,7 +17,7 @@ function choosetests(choices = [])
     testnames = [
         "linalg", "subarray", "core", "inference", "worlds",
         "keywordargs", "numbers", "subtype",
-        "printf", "char", "strings", "triplequote", "unicode",
+        "printf", "char", "strings", "triplequote", "unicode", "intrinsics",
         "dates", "dict", "hashing", "iobuffer", "staged", "offsetarray",
         "arrayops", "tuple", "reduce", "reducedim", "random", "abstractarray",
         "intfuncs", "simdloop", "vecelement", "blas", "sparse",
@@ -29,7 +29,7 @@ function choosetests(choices = [])
         "floatapprox", "datafmt", "reflection", "regex", "float16",
         "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
         "euler", "show", "lineedit", "replcompletions", "repl",
-        "replutil", "sets", "test", "goto", "llvmcall", "grisu",
+        "replutil", "sets", "test", "goto", "llvmcall", "llvmcall2", "grisu",
         "nullable", "meta", "stacktraces", "profile", "libgit2", "docs",
         "markdown", "base64", "serialize", "misc", "threads",
         "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int",
diff --git a/test/compile.jl b/test/compile.jl
index 0270dc56d13c1..718c24f6f7a24 100644
--- a/test/compile.jl
+++ b/test/compile.jl
@@ -106,7 +106,7 @@ try
               let some_method = @which Base.include("string")
                     # global const some_method // FIXME: support for serializing a direct reference to an external Method not implemented
                   global const some_linfo =
-                      ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any, UInt),
+                      ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt),
                           some_method, Tuple{typeof(Base.include), String}, Core.svec(), typemax(UInt))
               end
           end
@@ -175,7 +175,7 @@ try
             0:25)
         some_method = @which Base.include("string")
         some_linfo =
-                ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any, UInt),
+                ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt),
                     some_method, Tuple{typeof(Base.include), String}, Core.svec(), typemax(UInt))
         @test Foo.some_linfo::Core.MethodInstance === some_linfo
 
diff --git a/test/core.jl b/test/core.jl
index e89a66c4a74bf..b1328be2f9fa6 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -209,10 +209,6 @@ let x = (2,3)
     @test +(x...) == 5
 end
 
-# bits types
-@test isa((()->Core.Intrinsics.box(Ptr{Int8}, Core.Intrinsics.unbox(Int, 0)))(), Ptr{Int8})
-@test isa(convert(Char,65), Char)
-
 # conversions
 function fooo()
     local x::Int8
@@ -348,13 +344,6 @@ glotest()
 @test glob_x == 88
 @test loc_x == 10
 
-# runtime intrinsics
-
-let f = Any[Core.Intrinsics.add_int, Core.Intrinsics.sub_int]
-    @test f[1](1, 1) == 2
-    @test f[2](1, 1) == 0
-end
-
 # issue #7234
 begin
     glob_x2 = 24
@@ -1317,25 +1306,6 @@ f4518(x, y::Union{Int32,Int64}) = 0
 f4518(x::String, y::Union{Int32,Int64}) = 1
 @test f4518("",1) == 1
 
-# issue #4581
-bitstype 64 Date4581{T}
-let
-    x = Core.Intrinsics.box(Date4581{Int}, Core.Intrinsics.unbox(Int64,Int64(1234)))
-    xs = Date4581[x]
-    ys = copy(xs)
-    @test ys !== xs
-    @test ys == xs
-end
-
-# issue #6591
-function f6591(d)
-    Core.Intrinsics.box(Int64, d)
-    (f->f(d))(identity)
-end
-let d = Core.Intrinsics.box(Date4581{Int}, Int64(1))
-    @test isa(f6591(d), Date4581)
-end
-
 # issue #4645
 i4645(x) = (println(zz); zz = x; zz)
 @test_throws UndefVarError i4645(4)
@@ -1447,7 +1417,7 @@ f5150(T) = Array{Rational{T}}(1)
 
 # issue #5165
 bitstype 64 T5165{S}
-make_t(x::Int64) = Base.box(T5165{Void}, Base.unbox(Int64, x))
+make_t(x::Int64) = Core.Intrinsics.bitcast(T5165{Void}, x)
 xs5165 = T5165[make_t(Int64(1))]
 b5165 = IOBuffer()
 for x in xs5165
@@ -1766,8 +1736,8 @@ obj6387 = ObjMember(DateRange6387{Int64}())
 
 function v6387{T}(r::Range{T})
     a = Array{T}(1)
-    a[1] = Core.Intrinsics.box(Date6387{Int64}, Core.Intrinsics.unbox(Int64,Int64(1)))
-    a
+    a[1] = Core.Intrinsics.bitcast(Date6387{Int64}, Int64(1))
+    return a
 end
 
 function day_in(obj::ObjMember)
@@ -2256,20 +2226,6 @@ f7221(::BitArray) = 2
 f7221(::AbstractVecOrMat) = 3
 @test f7221(trues(1)) == 2
 
-# test functionality of non-power-of-2 bitstype constants
-bitstype 24 Int24
-Int24(x::Int) = Core.Intrinsics.box(Int24,Core.Intrinsics.trunc_int(Int24,Core.Intrinsics.unbox(Int,x)))
-Int(x::Int24) = Core.Intrinsics.box(Int,Core.Intrinsics.zext_int(Int,Core.Intrinsics.unbox(Int24,x)))
-let x,y,f
-    x = Int24(Int(0x12345678)) # create something (via truncation)
-    @test Int(0x345678) === Int(x)
-    function f() Int24(Int(0x02468ace)) end
-    y = f() # invoke llvm constant folding
-    @test Int(0x468ace) === Int(y)
-    @test x !== y
-    @test string(y) == "$(curmod_prefix)Int24(0x468ace)"
-end
-
 # issue #10570
 immutable Array_512_Uint8
     d1::UInt8
diff --git a/test/inference.jl b/test/inference.jl
index 67c31880cbe02..e275f093d1ceb 100644
--- a/test/inference.jl
+++ b/test/inference.jl
@@ -429,24 +429,20 @@ end
 @inferred cat10880(Tuple{Int8,Int16}, Tuple{Int32})
 
 # issue #19348
-function is_intrinsic_expr(e::Expr)
-    if e.head === :call
-        return Base.is_intrinsic_expr(e.args[1])
-    elseif e.head == :invoke
-        return false
-    elseif e.head === :new
-        return false
-    elseif e.head === :copyast
-        return false
-    elseif e.head === :inert
-        return false
+function is_typed_expr(e::Expr)
+    if e.head === :call ||
+       e.head === :invoke ||
+       e.head === :new ||
+       e.head === :copyast ||
+       e.head === :inert
+        return true
     end
-    return true
+    return false
 end
 test_inferred_static(other::ANY) = true
 test_inferred_static(slot::TypedSlot) = @test isleaftype(slot.typ)
 function test_inferred_static(expr::Expr)
-    if !is_intrinsic_expr(expr)
+    if is_typed_expr(expr)
         @test isleaftype(expr.typ)
     end
     for a in expr.args
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
new file mode 100644
index 0000000000000..dedc4662e1b1c
--- /dev/null
+++ b/test/intrinsics.jl
@@ -0,0 +1,84 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+# intrinsic functions
+const curmod = current_module()
+const curmod_name = fullname(curmod)
+const curmod_prefix = "$(["$m." for m in curmod_name]...)"
+
+# bits types
+@test isa((() -> Core.Intrinsics.bitcast(Ptr{Int8}, 0))(), Ptr{Int8})
+@test isa(convert(Char, 65), Char)
+
+# runtime intrinsics
+let f = Any[Core.Intrinsics.add_int, Core.Intrinsics.sub_int]
+    @test f[1](1, 1) == 2
+    @test f[2](1, 1) == 0
+end
+
+# issue #4581
+bitstype 64 Date4581{T}
+let
+    x = Core.Intrinsics.bitcast(Date4581{Int}, Int64(1234))
+    xs = Date4581[x]
+    ys = copy(xs)
+    @test ys !== xs
+    @test ys == xs
+end
+
+# issue #6591
+function f6591(d)
+    Core.Intrinsics.bitcast(Int64, d)
+    return (f -> f(d))(identity)
+end
+let d = Core.Intrinsics.bitcast(Date4581{Int}, Int64(1))
+    @test isa(f6591(d), Date4581)
+end
+
+# test functionality of non-power-of-2 bitstype constants
+bitstype 24 Int24
+Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
+Int(x::Int24) = Core.Intrinsics.zext_int(Int, x)
+let x, y, f
+    x = Int24(Int(0x12345678)) # create something (via truncation)
+    @test Int(0x345678) === Int(x)
+    f() = Int24(Int(0x02468ace))
+    y = f() # invoke llvm constant folding
+    @test Int(0x468ace) === Int(y)
+    @test x !== y
+    @test string(y) == "$(curmod_prefix)Int24(0x468ace)"
+end
+
+# test nonsensical valid conversions and errors
+
+compiled_addi(x, y) = Core.Intrinsics.add_int(x, y)
+@test compiled_addi(C_NULL, C_NULL) === C_NULL
+@test_throws ErrorException compiled_addi(C_NULL, 1)
+@test_throws ErrorException ((x)->compiled_addi(Float64(x), x))(1)
+@test ((x)->compiled_addi(Float64(x), Float64(x)))(2) === -0.0
+@test compiled_addi(0.5, 5.0e-323) === 0.5000000000000011
+@test_throws ErrorException compiled_addi(Int8(1), UInt8(1))
+@test compiled_addi(UInt8(1), UInt8(2)) === UInt8(3)
+@test_throws ErrorException compiled_addi(UInt8(1), UInt16(2))
+@test compiled_addi(Float32(.125), Float32(10)) === 2.1267648f38
+@test compiled_addi(true, true) === false
+
+compiled_addf(x, y) = Core.Intrinsics.add_float(x, y)
+@test compiled_addf(C_NULL, C_NULL) === C_NULL
+@test_throws ErrorException compiled_addf(C_NULL, 1)
+@test compiled_addf(0.5, 5.0e-323) === 0.5
+@test_throws ErrorException compiled_addf(im, im)
+@test_throws ErrorException compiled_addf(true, true)
+
+function compiled_conv{T}(::Type{T}, x)
+    t = Core.Intrinsics.trunc_int(T, x)
+    z = Core.Intrinsics.zext_int(typeof(x), t)
+    s = Core.Intrinsics.sext_int(typeof(x), t)
+    fpt = Core.Intrinsics.fptrunc(T, x)
+    fpe = Core.Intrinsics.fpext(typeof(x), fpt)
+    return (t, z, s, fpt, fpe)
+end
+@test compiled_conv(UInt32, Int64(0x8000_0000)) ==
+    (0x80000000, Int64(0x80000000), -Int64(0x80000000), 0x00000000, 0)
+@test compiled_conv(UInt32, UInt64(0xC000_BA98_8765_4321)) ==
+    (0x87654321, 0x0000000087654321, 0xffffffff87654321, 0xc005d4c4, 0xc000ba9880000000)
+@test_throws ErrorException compiled_conv(Bool, im)
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index 2b29c7e3d43ca..b17012ce38fd8 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -64,10 +64,10 @@ baremodule PlusTest
 end
 
 # issue #11800
-@test eval(Expr(:call,Core.Intrinsics.llvmcall,
+@test_throws ErrorException eval(Expr(:call,Core.Intrinsics.llvmcall,
     """%3 = add i32 %1, %0
        ret i32 %3""", Int32, Tuple{Int32, Int32},
-        Int32(1), Int32(2))) == 3
+        Int32(1), Int32(2))) # llvmcall must be compiled to be called
 
 # Test whether declarations work properly
 function undeclared_ceil(x::Float64)
diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl
new file mode 100644
index 0000000000000..e7ad0e714e7c8
--- /dev/null
+++ b/test/llvmcall2.jl
@@ -0,0 +1,36 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+function declared_floor(x::Float64)
+    return ccall("llvm.floor.f64", llvmcall, Float64, (Float64,), x)
+end
+@test declared_floor(4.2) == 4.0
+ir = sprint(io->code_llvm(io, declared_floor, Tuple{Float64}))
+@test contains(ir, "call double @llvm.floor.f64") # should be inlined
+
+function doubly_declared_floor(x::Float64)
+    a = ccall("llvm.floor.f64", llvmcall, Float64, (Float64,), x)
+    b = ccall("llvm.floor.f64", llvmcall, Float64, (Float64,), x + 1) - 1
+    return a + b
+end
+@test doubly_declared_floor(4.2) == 8.0
+
+function doubly_declared2_trunc(x::Float64)
+    a = ccall("llvm.trunc.f64", llvmcall, Float64, (Float64,), x)
+    b = ccall("llvm.trunc.f64", llvmcall, Float64, (Float64,), x + 1) - 1
+    return a + b
+end
+@test doubly_declared2_trunc(4.2) == 8.0
+
+# Test for single line
+function declared_ceil(x::Float64)
+    return ccall("llvm.ceil.f64", llvmcall, Float64, (Float64,), x)
+end
+@test declared_ceil(4.2) == 5.0
+
+# Test for multiple lines
+function ceilfloor(x::Float64)
+    a = ccall("llvm.ceil.f64", llvmcall, Float64, (Float64,), x)
+    b = ccall("llvm.floor.f64", llvmcall, Float64, (Float64,), a)
+    return b
+end
+@test ceilfloor(7.4) == 8.0
diff --git a/test/parse.jl b/test/parse.jl
index 2b8d6c887cec7..437354b0e4df8 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -924,3 +924,7 @@ let
 end
 @test c8925 == 3 && isconst(:c8925)
 @test d8925 == 4 && isconst(:d8925)
+
+# issue #18754: parse ccall as a regular function
+@test parse("ccall([1], 2)[3]") == Expr(:ref, Expr(:call, :ccall, Expr(:vect, 1), 2), 3)
+@test parse("ccall(a).member") == Expr(:., Expr(:call, :ccall, :a), QuoteNode(:member))
diff --git a/test/reflection.jl b/test/reflection.jl
index 401ee3587c087..19530ff08c477 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -95,12 +95,11 @@ str = String(take!(iob))
 module ImportIntrinsics15819
 # Make sure changing the lookup path of an intrinsic doesn't break
 # the heuristic for type instability warning.
-# This can be any intrinsic that needs boxing
-import Core.Intrinsics: sqrt_llvm, box, unbox
+import Core.Intrinsics: sqrt_llvm, bitcast
 # Use import
-sqrt15819(x::Float64) = box(Float64, sqrt_llvm(unbox(Float64, x)))
+sqrt15819(x::Float64) = bitcast(Float64, sqrt_llvm(x))
 # Use fully qualified name
-sqrt15819(x::Float32) = box(Float32, Core.Intrinsics.sqrt_llvm(unbox(Float32, x)))
+sqrt15819(x::Float32) = bitcast(Float32, Core.Intrinsics.sqrt_llvm(x))
 end
 foo11122(x) = @fastmath x - 1.0