Skip to content

Commit

Permalink
A few cleanup/optimizations related to tag handling in codegen
Browse files Browse the repository at this point in the history
* Create `julia.typeof` and `julia.write_barrier` intrinsics

  So that tag access is easier for the llvm passes to handle.
  This also completely hides the tag access from LLVM before lowering of these intrinsics
  which makes it possible to mark the write barrier as `inaccessiblememonly` and enable
  store to load forwarding on mutable types.

* Mark `jl_gc_queue_root` as `inaccessiblemem_or_argmemonly`.

  Should also have a positive impact on LLVM memory access optimizations.

* In non-imaging mode, use the pointer literals directly for union type.

* Remove old code that check whether the child is NULL before running the write barrier

  This is not needed anymore.

* Mark functions returning julia object as `nonnull` return.

  This can avoid `NULL` check together with the improved store to load forwarding.

* Remove `jlnewbits_func` which is not used in codegen anymore.

* Fix `LateLowerGCFrame` initialization

  `runOnFunction` shouldn't modify the `Module`.

The change should also make it easier to do more allocation optmization in LLVM for objects
with object reference as fields.
  • Loading branch information
yuyichao committed Oct 23, 2017
1 parent fc6b2ba commit 06002c9
Show file tree
Hide file tree
Showing 6 changed files with 306 additions and 141 deletions.
141 changes: 53 additions & 88 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ static Function *function_proto(Function *F, Module *M = nullptr)
F->setPersonalityFn(nullptr);
}

// FunctionType does not include any attributes. Copy them over manually
// as codegen may make decisions based on the presence of certain attributes
NewF->copyAttributesFrom(F);
// FunctionType does not include any attributes. Copy them over manually
// as codegen may make decisions based on the presence of certain attributes
NewF->copyAttributesFrom(F);

if (OldPersonalityFn)
F->setPersonalityFn(OldPersonalityFn);
Expand Down Expand Up @@ -720,34 +720,12 @@ static Value *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode
return tbaa_decorate(tbaa, ctx.builder.CreateLoad(emit_bitcast(ctx, vptr, ptype)));
}

static Value *emit_typeptr_addr(jl_codectx_t &ctx, Value *p)
{
ssize_t offset = (sizeof(jl_taggedvalue_t) -
offsetof(jl_taggedvalue_t, type)) / sizeof(jl_value_t*);
return emit_nthptr_addr(ctx, p, -offset);
}

static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v);

static Value* mask_gc_bits(jl_codectx_t &ctx, Value *tag)
{
return ctx.builder.CreateIntToPtr(ctx.builder.CreateAnd(
ctx.builder.CreatePtrToInt(tag, T_size),
ConstantInt::get(T_size, ~(uintptr_t)15)),
tag->getType());
}

static Value *emit_typeof(jl_codectx_t &ctx, Value *tt)
{
assert(tt != NULL && !isa<AllocaInst>(tt) && "expected a conditionally boxed value");
// given p, a jl_value_t*, compute its type tag
// The raw address is not GC-safe to load from as it may have mask bits set.
// Note that this gives the optimizer license to not root this value. That
// is fine however, since leaf types are not GCed at the moment. Should
// that ever change, this may have to go through a special intrinsic.
Value *addr = emit_bitcast(ctx, emit_typeptr_addr(ctx, tt), T_ppjlvalue);
tt = tbaa_decorate(tbaa_tag, ctx.builder.CreateLoad(T_pjlvalue, addr));
return maybe_decay_untracked(mask_gc_bits(ctx, tt));
return ctx.builder.CreateCall(prepare_call(jl_typeof_func), {tt});
}

static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
Expand All @@ -760,36 +738,57 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
}
if (p.TIndex) {
Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(T_int8, 0x7f));
Value *pdatatype;
unsigned counter;
counter = 0;
unsigned counter = 0;
bool allunboxed = for_each_uniontype_small(
[&](unsigned idx, jl_datatype_t *jt) { },
p.typ,
counter);
if (allunboxed)
pdatatype = decay_derived(Constant::getNullValue(T_ppjlvalue));
else {
// See note above in emit_typeof(Value*), we can't tell the system
// about this until we've cleared the GC bits.
pdatatype = emit_bitcast(ctx, emit_typeptr_addr(ctx, p.Vboxed), T_ppjlvalue);
}
Value *datatype_or_p = (imaging_mode ? Constant::getNullValue(T_ppjlvalue) :
Constant::getNullValue(T_prjlvalue));
counter = 0;
for_each_uniontype_small(
[&](unsigned idx, jl_datatype_t *jt) {
Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
pdatatype = ctx.builder.CreateSelect(cmp,
decay_derived(literal_pointer_val_slot(ctx, (jl_value_t*)jt)),
pdatatype);
},
p.typ,
counter);
Value *datatype = tbaa_decorate(allunboxed ? tbaa_const : tbaa_tag,
ctx.builder.CreateLoad(T_pjlvalue, pdatatype));
if (!allunboxed)
datatype = mask_gc_bits(ctx, datatype);
datatype = maybe_decay_untracked(datatype);
return mark_julia_type(ctx, datatype, true, jl_datatype_type);
[&](unsigned idx, jl_datatype_t *jt) {
Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
Value *ptr;
if (imaging_mode) {
ptr = literal_pointer_val_slot(ctx, (jl_value_t*)jt);
}
else {
ptr = maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jt));
}
datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p);
},
p.typ,
counter);
auto emit_unboxty = [&] () -> Value* {
if (imaging_mode)
return maybe_decay_untracked(
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_pjlvalue, datatype_or_p)));
return datatype_or_p;
};
Value *res;
if (!allunboxed) {
Value *isnull = ctx.builder.CreateIsNull(datatype_or_p);
BasicBlock *boxBB = BasicBlock::Create(jl_LLVMContext, "boxed", ctx.f);
BasicBlock *unboxBB = BasicBlock::Create(jl_LLVMContext, "unboxed", ctx.f);
BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge", ctx.f);
ctx.builder.CreateCondBr(isnull, boxBB, unboxBB);
ctx.builder.SetInsertPoint(boxBB);
auto boxTy = emit_typeof(ctx, p.Vboxed);
ctx.builder.CreateBr(mergeBB);
ctx.builder.SetInsertPoint(unboxBB);
auto unboxTy = emit_unboxty();
ctx.builder.CreateBr(mergeBB);
ctx.builder.SetInsertPoint(mergeBB);
auto phi = ctx.builder.CreatePHI(T_prjlvalue, 2);
phi->addIncoming(boxTy, boxBB);
phi->addIncoming(unboxTy, unboxBB);
res = phi;
}
else {
res = emit_unboxty();
}
return mark_julia_type(ctx, res, true, jl_datatype_type);
}
jl_value_t *aty = p.typ;
if (jl_is_type_type(aty)) {
Expand Down Expand Up @@ -2281,43 +2280,9 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
// if ptr is NULL this emits a write barrier _back_
static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
{
Value *parenttag = emit_bitcast(ctx, emit_typeptr_addr(ctx, parent), T_psize);
Value *parent_type = tbaa_decorate(tbaa_tag, ctx.builder.CreateLoad(parenttag));
Value *parent_bits = ctx.builder.CreateAnd(parent_type, 3);

// the branch hint does not seem to make it to the generated code
Value *parent_old_marked = ctx.builder.CreateICmpEQ(parent_bits,
ConstantInt::get(T_size, 3));

BasicBlock *cont = BasicBlock::Create(jl_LLVMContext, "cont");
BasicBlock *barrier_may_trigger = BasicBlock::Create(jl_LLVMContext, "wb_may_trigger", ctx.f);
BasicBlock *barrier_trigger = BasicBlock::Create(jl_LLVMContext, "wb_trigger", ctx.f);
ctx.builder.CreateCondBr(parent_old_marked, barrier_may_trigger, cont);

ctx.builder.SetInsertPoint(barrier_may_trigger);
Value *ptr_mark_bit = ctx.builder.CreateAnd(tbaa_decorate(tbaa_tag,
ctx.builder.CreateLoad(emit_bitcast(ctx, emit_typeptr_addr(ctx, ptr), T_psize))), 1);
Value *ptr_not_marked = ctx.builder.CreateICmpEQ(ptr_mark_bit, ConstantInt::get(T_size, 0));
ctx.builder.CreateCondBr(ptr_not_marked, barrier_trigger, cont);
ctx.builder.SetInsertPoint(barrier_trigger);
ctx.builder.CreateCall(prepare_call(queuerootfun), maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue)));
ctx.builder.CreateBr(cont);
ctx.f->getBasicBlockList().push_back(cont);
ctx.builder.SetInsertPoint(cont);
}

static void emit_checked_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
{
BasicBlock *cont;
Value *not_null = ctx.builder.CreateICmpNE(mark_callee_rooted(ptr), mark_callee_rooted(V_null));
BasicBlock *if_not_null = BasicBlock::Create(jl_LLVMContext, "wb_not_null", ctx.f);
cont = BasicBlock::Create(jl_LLVMContext, "cont");
ctx.builder.CreateCondBr(not_null, if_not_null, cont);
ctx.builder.SetInsertPoint(if_not_null);
emit_write_barrier(ctx, parent, ptr);
ctx.builder.CreateBr(cont);
ctx.f->getBasicBlockList().push_back(cont);
ctx.builder.SetInsertPoint(cont);
parent = maybe_decay_untracked(emit_bitcast(ctx, parent, T_prjlvalue));
ptr = maybe_decay_untracked(emit_bitcast(ctx, ptr, T_prjlvalue));
ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), {parent, ptr});
}

static void emit_setfield(jl_codectx_t &ctx,
Expand All @@ -2339,7 +2304,7 @@ static void emit_setfield(jl_codectx_t &ctx,
tbaa_decorate(strct.tbaa, ctx.builder.CreateStore(r,
emit_bitcast(ctx, addr, T_pprjlvalue)));
if (wb && strct.isboxed)
emit_checked_write_barrier(ctx, boxed(ctx, strct), r);
emit_write_barrier(ctx, boxed(ctx, strct), r);
}
else if (jl_is_uniontype(jfty)) {
int fsz = jl_field_size(sty, idx0);
Expand Down
Loading

0 comments on commit 06002c9

Please sign in to comment.