Skip to content

Commit

Permalink
Implement MMTk write barrier (#11)
Browse files Browse the repository at this point in the history
* Implement MMTk write barrier
* Check which barrier to use in codegen
  • Loading branch information
qinsoon authored May 4, 2023
1 parent 9dbc8fc commit a760a7e
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@
XX(jl_gc_pool_alloc) \
XX(jl_gc_queue_multiroot) \
XX(jl_gc_queue_root) \
XX(jl_gc_wb1_noinline) \
XX(jl_gc_wb2_noinline) \
XX(jl_gc_safepoint) \
XX(jl_gc_schedule_foreign_sweepfunc) \
XX(jl_gc_set_cb_notify_external_alloc) \
Expand Down
16 changes: 16 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -961,17 +961,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
}

#else // MMTK_GC
// MMTk's write barrier method. This is the full write barier including fastpath and slowpath.
// TODO: We should inline fastpath in the following functions, and only call slowpath.
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT;

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_full(parent, ptr);
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
mmtk_gc_wb_full(ptr, (void*)0);
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_full(parent, (void*)0);
}
#endif // MMTK_GC

Expand Down Expand Up @@ -2268,6 +2274,16 @@ typedef struct {
} jl_cgparams_t;
extern JL_DLLEXPORT int jl_default_debug_info_kind;

#ifdef MMTK_GC
extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr);
}
#endif

#ifdef __cplusplus
}
#endif
Expand Down
5 changes: 5 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty);
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
extern uint8_t mmtk_needs_write_barrier(void);
#endif // MMTK_GC
JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
extern uv_mutex_t gc_perm_lock;
Expand Down Expand Up @@ -616,12 +617,16 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT

#else // MMTK_GC

// TODO: We should inline fastpath in the following functions, and only call slowpath.

STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
{
mmtk_gc_wb_full(bnd, val);
}

STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
{
mmtk_gc_wb_full(parent, (void*)0);
}
#endif // MMTK_GC

Expand Down
49 changes: 48 additions & 1 deletion src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ struct FinalLowerGC: private JuliaPassContext {
Function *queueRootFunc;
Function *poolAllocFunc;
Function *bigAllocFunc;
#ifdef MMTK_GC
Function *writeBarrier1Func;
Function *writeBarrier2Func;
#endif
Instruction *pgcstack;

// Lowers a `julia.new_gc_frame` intrinsic.
Expand All @@ -70,6 +74,11 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.safepoint` intrinsic.
Value *lowerSafepoint(CallInst *target, Function &F);

#ifdef MMTK_GC
Value *lowerWriteBarrier1(CallInst *target, Function &F);
Value *lowerWriteBarrier2(CallInst *target, Function &F);
#endif
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
Expand Down Expand Up @@ -204,6 +213,22 @@ Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
return load;
}

#ifdef MMTK_GC
Value *FinalLowerGC::lowerWriteBarrier1(CallInst *target, Function &F)
{
assert(target->arg_size() == 1);
target->setCalledFunction(writeBarrier1Func);
return target;
}

Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F)
{
assert(target->arg_size() == 2);
target->setCalledFunction(writeBarrier2Func);
return target;
}
#endif

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
++GCAllocBytesCount;
Expand Down Expand Up @@ -311,8 +336,13 @@ bool FinalLowerGC::doInitialization(Module &M) {
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);

#ifdef MMTK_GC
writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1);
writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func};
#else
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
#endif
unsigned j = 0;
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
if (!functionList[i])
Expand All @@ -328,8 +358,13 @@ bool FinalLowerGC::doInitialization(Module &M) {

bool FinalLowerGC::doFinalization(Module &M)
{
#ifdef MMTK_GC
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func};
queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = nullptr;
#else
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr;
#endif
auto used = M.getGlobalVariable("llvm.compiler.used");
if (!used)
return false;
Expand Down Expand Up @@ -399,6 +434,10 @@ bool FinalLowerGC::runOnFunction(Function &F)
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
#ifdef MMTK_GC
auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1);
auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2);
#endif

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
Expand Down Expand Up @@ -432,6 +471,14 @@ bool FinalLowerGC::runOnFunction(Function &F)
else if (callee == queueGCRootFunc) {
replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
}
#ifdef MMTK_GC
else if (callee == writeBarrier1Func) {
replaceInstruction(CI, lowerWriteBarrier1(CI, F), it);
}
else if (callee == writeBarrier2Func) {
replaceInstruction(CI, lowerWriteBarrier2(CI, F), it);
}
#endif
else if (callee == safepointFunc) {
lowerSafepoint(CI, F);
it = CI->eraseFromParent();
Expand Down
44 changes: 44 additions & 0 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2512,6 +2512,50 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
}
IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
#ifndef MMTK_GC
auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
#else
if (CI->getCalledOperand() == write_barrier_func) {
// if (CI->arg_size() == 2) {
// // parent, target
// Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier2);
// builder.CreateCall(wb_func, { parent, CI->getArgOperand(1) }); // We need to be careful about arg1, which may not match the type for wb_func. We probably need a bitcast
// } else {
// // parent and many targets
// Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
// builder.CreateCall(wb_func, { parent });
// }
auto barrier = mmtk_needs_write_barrier();
if (barrier == 1) {
// We only care about parent
Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
builder.CreateCall(wb_func, { parent });
}
}
#endif

auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
Expand Down
67 changes: 67 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ namespace jl_intrinsics {
static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame";
static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root";
static const char *SAFEPOINT_NAME = "julia.safepoint";
#ifdef MMTK_GC
static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline";
static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline";
#endif

// Annotates a function with attributes suitable for GC allocation
// functions. Specifically, the return value is marked noalias and nonnull.
Expand Down Expand Up @@ -223,12 +227,45 @@ namespace jl_intrinsics {
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});

#ifdef MMTK_GC
const IntrinsicDescription writeBarrier1(
WRITE_BARRIER_1_NAME,
[](const JuliaPassContext &context) {
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
WRITE_BARRIER_1_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
const IntrinsicDescription writeBarrier2(
WRITE_BARRIER_2_NAME,
[](const JuliaPassContext &context) {
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, context.T_prjlvalue },
false),
Function::ExternalLinkage,
WRITE_BARRIER_2_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
#endif
}

namespace jl_well_known {
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
#ifdef MMTK_GC
static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline);
static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline);
#endif

using jl_intrinsics::addGCAllocAttributes;

Expand Down Expand Up @@ -276,4 +313,34 @@ namespace jl_well_known {
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

#ifdef MMTK_GC
const WellKnownFunctionDescription GCWriteBarrier1(
GC_WB_1_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
GC_WB_1_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCWriteBarrier2(
GC_WB_2_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, context.T_prjlvalue },
false),
Function::ExternalLinkage,
GC_WB_2_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});
#endif
}
10 changes: 10 additions & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ namespace jl_intrinsics {

// `julia.safepoint`: an intrinsic that triggers a GC safepoint.
extern const IntrinsicDescription safepoint;

#ifdef MMTK_GC
extern const IntrinsicDescription writeBarrier1;
extern const IntrinsicDescription writeBarrier2;
#endif
}

// A namespace for well-known Julia runtime function descriptions.
Expand All @@ -149,6 +154,11 @@ namespace jl_well_known {

// `jl_gc_queue_root`: queues a GC root.
extern const WellKnownFunctionDescription GCQueueRoot;

#ifdef MMTK_GC
extern const WellKnownFunctionDescription GCWriteBarrier1;
extern const WellKnownFunctionDescription GCWriteBarrier2;
#endif
}

#endif
11 changes: 11 additions & 0 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ void objprofile_reset(void)
{
}

// No inline write barrier -- only used for debugging
JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
{
jl_gc_wb_back(parent);
}

JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_gc_wb(parent, ptr);
}

void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
{
jl_ptls_t ptls = jl_current_task->ptls;
Expand Down

0 comments on commit a760a7e

Please sign in to comment.