Skip to content

Commit

Permalink
optimization: hoist array metadata access out of loops (sometimes)
Browse files Browse the repository at this point in the history
this helps most matrix inner loop code, the laplace benchmark most of all.
  • Loading branch information
JeffBezanson committed Mar 25, 2013
1 parent 50258e5 commit 57e010e
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 28 deletions.
60 changes: 54 additions & 6 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,18 @@ static Value *emit_arraysize(Value *t, Value *dim)
return builder.CreatePtrToInt(dbits, T_size);
}

static Value *emit_arraysize(Value *t, int dim)
{
return emit_arraysize(t, ConstantInt::get(T_int32, dim));
static jl_arrayvar_t *arrayvar_for(jl_value_t *ex, jl_codectx_t *ctx)
{
if (ex == NULL) return NULL;
char *aname=NULL;
if (jl_is_symbol(ex))
aname = ((jl_sym_t*)ex)->name;
else if (jl_is_symbolnode(ex))
aname = jl_symbolnode_sym(ex)->name;
if (aname && ctx->arrayvars->find(aname) != ctx->arrayvars->end()) {
return &(*ctx->arrayvars)[aname];
}
return NULL;
}

static Value *emit_arraylen(Value *t)
Expand All @@ -588,18 +597,57 @@ static Value *emit_arraylen(Value *t)
return builder.CreatePtrToInt(lenbits, T_size);
}

static Value *emit_arraylen(Value *t, jl_value_t *ex, jl_codectx_t *ctx)
{
jl_arrayvar_t *av = arrayvar_for(ex, ctx);
if (av!=NULL)
return builder.CreateLoad(av->len);
return emit_arraylen(t);
}

static Value *emit_arrayptr(Value *t)
{
return emit_nthptr(t, 1);
}

static Value *emit_arrayptr(Value *t, jl_value_t *ex, jl_codectx_t *ctx)
{
jl_arrayvar_t *av = arrayvar_for(ex, ctx);
if (av!=NULL)
return builder.CreateLoad(av->dataptr);
return emit_arrayptr(t);
}

static Value *emit_arraysize(Value *t, int dim)
{
return emit_arraysize(t, ConstantInt::get(T_int32, dim));
}

static Value *emit_arraysize(Value *t, jl_value_t *ex, int dim, jl_codectx_t *ctx)
{
if (dim == 1) {
jl_arrayvar_t *av = arrayvar_for(ex, ctx);
if (av!=NULL)
return builder.CreateLoad(av->nr);
}
return emit_arraysize(t, dim);
}

static void assign_arrayvar(jl_arrayvar_t &av, Value *ar)
{
builder.CreateStore(builder.CreateBitCast(emit_arrayptr(ar),T_pint8),
av.dataptr);
builder.CreateStore(emit_arraylen(ar), av.len);
builder.CreateStore(emit_arraysize(ar,1), av.nr);
}

static Value *data_pointer(Value *x)
{
return builder.CreateGEP(builder.CreateBitCast(x, jl_ppvalue_llvmt),
ConstantInt::get(T_size, 1));
}

static Value *emit_array_nd_index(Value *a, size_t nd, jl_value_t **args,
static Value *emit_array_nd_index(Value *a, jl_value_t *ex, size_t nd, jl_value_t **args,
size_t nidxs, jl_codectx_t *ctx)
{
Value *i = ConstantInt::get(T_size, 0);
Expand All @@ -614,7 +662,7 @@ static Value *emit_array_nd_index(Value *a, size_t nd, jl_value_t **args,
i = builder.CreateAdd(i, builder.CreateMul(ii, stride));
if (k < nidxs-1) {
Value *d =
k >= nd ? ConstantInt::get(T_size, 1) : emit_arraysize(a, k+1);
k >= nd ? ConstantInt::get(T_size, 1) : emit_arraysize(a, ex, k+1, ctx);
#if CHECK_BOUNDS==1
BasicBlock *okBB = BasicBlock::Create(getGlobalContext(), "ib");
// if !(i < d) goto error
Expand All @@ -626,7 +674,7 @@ static Value *emit_array_nd_index(Value *a, size_t nd, jl_value_t **args,
}
}
#if CHECK_BOUNDS==1
Value *alen = emit_arraylen(a);
Value *alen = emit_arraylen(a, ex, ctx);
// if !(i < alen) goto error
builder.CreateCondBr(builder.CreateICmpULT(i, alen), endBB, failBB);

Expand Down
89 changes: 67 additions & 22 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,12 @@ void *jl_function_ptr(jl_function_t *f, jl_value_t *rt, jl_value_t *argt)
return NULL;
}

typedef struct {
Value *dataptr;
Value *len;
Value *nr;
} jl_arrayvar_t;

// information about the context of a piece of code: its enclosing
// function and module, and visible local variables and labels.
typedef struct {
Expand All @@ -350,6 +356,7 @@ typedef struct {
std::map<std::string, bool> *isAssigned;
std::map<std::string, bool> *isCaptured;
std::map<std::string, bool> *escapes;
std::map<std::string, jl_arrayvar_t> *arrayvars;
std::set<jl_sym_t*> *volatilevars;
std::map<std::string, jl_value_t*> *declTypes;
std::map<int, BasicBlock*> *labels;
Expand Down Expand Up @@ -1262,7 +1269,7 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs,
// todo: also allow e.g. Union of several array types
Value *arg1 = emit_expr(args[1], ctx);
JL_GC_POP();
return emit_arraylen(arg1);
return emit_arraylen(arg1, args[1], ctx);
}
}
else if (f->fptr == &jl_f_arraysize && nargs==2) {
Expand All @@ -1277,7 +1284,7 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs,
uint32_t idx = (uint32_t)jl_unbox_long(args[2]);
if (idx > 0 && idx <= ndims) {
JL_GC_POP();
return emit_arraysize(ary, idx);
return emit_arraysize(ary, args[1], idx, ctx);
}
else if (idx > ndims) {
JL_GC_POP();
Expand Down Expand Up @@ -1331,14 +1338,14 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs,
if (jl_is_long(ndp) || nargs==2) {
Value *ary = emit_expr(args[1], ctx);
size_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : 1;
Value *idx = emit_array_nd_index(ary, nd, &args[2], nargs-1, ctx);
Value *idx = emit_array_nd_index(ary, args[1], nd, &args[2], nargs-1, ctx);
JL_GC_POP();
if (jl_array_store_unboxed(ety) &&
((jl_datatype_t*)ety)->size == 0) {
jl_new_struct_uninit((jl_datatype_t*)ety);
return literal_pointer_val(((jl_datatype_t*)ety)->instance);
}
return typed_load(emit_arrayptr(ary), idx, ety, ctx);
return typed_load(emit_arrayptr(ary, args[1], ctx), idx, ety, ctx);
}
}
}
Expand All @@ -1361,13 +1368,13 @@ static Value *emit_known_call(jl_value_t *ff, jl_value_t **args, size_t nargs,
if (jl_is_long(ndp) || nargs==3) {
Value *ary = emit_expr(args[1], ctx);
size_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : 1;
Value *idx = emit_array_nd_index(ary, nd, &args[3], nargs-2, ctx);
Value *idx = emit_array_nd_index(ary, args[1], nd, &args[3], nargs-2, ctx);
if (jl_array_store_unboxed(ety) &&
((jl_datatype_t*)ety)->size == 0) {
// no-op
}
else {
typed_store(emit_arrayptr(ary), idx,
typed_store(emit_arrayptr(ary,args[1],ctx), idx,
ety==(jl_value_t*)jl_any_type ? emit_expr(args[2],ctx) : emit_unboxed(args[2],ctx),
ety, ctx);
}
Expand Down Expand Up @@ -1678,20 +1685,35 @@ static void emit_assignment(jl_value_t *l, jl_value_t *r, jl_codectx_t *ctx)
assert(false);
jl_binding_t *bnd=NULL;
Value *bp = var_binding_pointer(s, &bnd, true, ctx);
Value *rval;
if (bnd) {
rval = boxed(emit_expr(r, ctx, true));
builder.CreateCall2(jlcheckassign_func,
literal_pointer_val((void*)bnd),
boxed(emit_expr(r, ctx, true)));
rval);
}
else {
bool vol = ctx->volatilevars->find(s) != ctx->volatilevars->end();
Type *vt = bp->getType();
if (vt->isPointerTy() && vt->getContainedType(0)!=jl_pvalue_llvmt)
builder.CreateStore(emit_unbox(vt->getContainedType(0), vt,
emit_unboxed(r, ctx)),
bp, vol);
else
builder.CreateStore(boxed(emit_expr(r, ctx, true)), bp, vol);
jl_value_t *rt = expr_type(r,ctx);
if ((jl_is_symbol(r) || jl_is_symbolnode(r)) && rt == jl_bottom_type) {
// sometimes x = y::None occurs
if (builder.GetInsertBlock()->getTerminator() != NULL)
return;
}
if (vt->isPointerTy() && vt->getContainedType(0)!=jl_pvalue_llvmt) {
rval = emit_unbox(vt->getContainedType(0), vt, emit_unboxed(r, ctx));
}
else {
rval = boxed(emit_expr(r, ctx, true));
}
if (builder.GetInsertBlock()->getTerminator() == NULL) {
builder.CreateStore(rval, bp, vol);
jl_arrayvar_t *av = arrayvar_for(l, ctx);
if (av!=NULL) {
assign_arrayvar(*av, rval);
}
}
}
}

Expand Down Expand Up @@ -2018,6 +2040,23 @@ static AllocaInst *alloc_local(char *name, jl_codectx_t *ctx)
return lv;
}

static void maybe_alloc_arrayvar(char *name, jl_codectx_t *ctx)
{
jl_value_t *jt = (*ctx->declTypes)[name];
if (jl_is_array_type(jt) && jl_is_leaf_type(jt) &&
jl_unbox_long(jl_tparam1(jt)) != 1) {
// TODO: this optimization does not yet work with 1-d arrays, since the
// length and data pointer can change at any time via push!
// we could make it work by reloading the metadata when the array is
// passed to an external function (ideally only impure functions)
jl_arrayvar_t av;
av.dataptr = builder.CreateAlloca(T_pint8);
av.len = builder.CreateAlloca(T_size);
av.nr = builder.CreateAlloca(T_size);
(*ctx->arrayvars)[name] = av;
}
}

// --- generate function bodies ---

extern char *jl_stack_lo;
Expand Down Expand Up @@ -2088,6 +2127,7 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
std::map<std::string, bool> isAssigned;
std::map<std::string, bool> isCaptured;
std::map<std::string, bool> escapes;
std::map<std::string, jl_arrayvar_t> arrayvars;
std::set<jl_sym_t*> volvars;
std::map<std::string, jl_value_t*> declTypes;
std::map<int, BasicBlock*> labels;
Expand All @@ -2100,6 +2140,7 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
ctx.isAssigned = &isAssigned;
ctx.isCaptured = &isCaptured;
ctx.escapes = &escapes;
ctx.arrayvars = &arrayvars;
ctx.volatilevars = &volvars;
ctx.declTypes = &declTypes;
ctx.labels = &labels;
Expand Down Expand Up @@ -2296,6 +2337,7 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
else if (isAssigned[argname] || (va && i==largslen-1)) {
n_roots++;
}
maybe_alloc_arrayvar(argname, &ctx);
}
for(i=0; i < lvarslen; i++) {
char *varname = ((jl_sym_t*)jl_cellref(lvars,i))->name;
Expand All @@ -2305,6 +2347,7 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
else {
n_roots++;
}
maybe_alloc_arrayvar(varname, &ctx);
}

// fetch env out of function object if we need it
Expand Down Expand Up @@ -2454,22 +2497,19 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
argPtr = builder.CreateGEP(argArray, ConstantInt::get(T_size, i));
}

Value *theArg;
if (specsig)
theArg = argPtr;
else
theArg = builder.CreateLoad(argPtr, false);
Value *lv = localVars[argname];
if (lv == NULL) {
// if this argument hasn't been given space yet, we've decided
// to leave it in the input argument array.
if (specsig)
passedArgumentMap[argname] = argPtr;
else
passedArgumentMap[argname] = builder.CreateLoad(argPtr, false);
passedArgumentMap[argname] = theArg;
//argumentMap[argname] = argPtr;
}
else {
Value *theArg;
if (specsig)
theArg = argPtr;
else
theArg = builder.CreateLoad(argPtr, false);
// keep track of original (boxed) value to avoid re-boxing
passedArgumentMap[argname] = theArg;
if (isBoxed(argname, &ctx)) {
Expand All @@ -2487,6 +2527,11 @@ static Function *emit_function(jl_lambda_info_t *lam, bool cstyle)
theArg),
lv);
}
// get arrayvar data if applicable
if (arrayvars.find(argname) != arrayvars.end()) {
jl_arrayvar_t av = arrayvars[argname];
assign_arrayvar(av, theArg);
}
}

// step 13. allocate rest argument if necessary
Expand Down

0 comments on commit 57e010e

Please sign in to comment.