Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement MMTk write barrier #11

Merged
merged 3 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@
XX(jl_gc_pool_alloc) \
XX(jl_gc_queue_multiroot) \
XX(jl_gc_queue_root) \
XX(jl_gc_wb1_noinline) \
XX(jl_gc_wb2_noinline) \
XX(jl_gc_safepoint) \
XX(jl_gc_schedule_foreign_sweepfunc) \
XX(jl_gc_set_cb_notify_external_alloc) \
Expand Down
16 changes: 16 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -961,17 +961,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
}

#else // MMTK_GC
// MMTk's write barrier method. This is the full write barier including fastpath and slowpath.
// TODO: We should inline fastpath in the following functions, and only call slowpath.
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT;

STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_full(parent, ptr);
}

STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
{
mmtk_gc_wb_full(ptr, (void*)0);
}

STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
{
mmtk_gc_wb_full(parent, (void*)0);
}
#endif // MMTK_GC

Expand Down Expand Up @@ -2268,6 +2274,16 @@ typedef struct {
} jl_cgparams_t;
extern JL_DLLEXPORT int jl_default_debug_info_kind;

#ifdef MMTK_GC
extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_task_t *ct = jl_current_task;
jl_ptls_t ptls = ct->ptls;
mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr);
}
#endif

#ifdef __cplusplus
}
#endif
Expand Down
5 changes: 5 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty);
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
extern uint8_t mmtk_needs_write_barrier(void);
#endif // MMTK_GC
JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
extern uv_mutex_t gc_perm_lock;
Expand Down Expand Up @@ -616,12 +617,16 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT

#else // MMTK_GC

// TODO: We should inline fastpath in the following functions, and only call slowpath.

STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
{
mmtk_gc_wb_full(bnd, val);
}

STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
{
mmtk_gc_wb_full(parent, (void*)0);
}
#endif // MMTK_GC

Expand Down
49 changes: 48 additions & 1 deletion src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ struct FinalLowerGC: private JuliaPassContext {
Function *queueRootFunc;
Function *poolAllocFunc;
Function *bigAllocFunc;
#ifdef MMTK_GC
Function *writeBarrier1Func;
Function *writeBarrier2Func;
#endif
Instruction *pgcstack;

// Lowers a `julia.new_gc_frame` intrinsic.
Expand All @@ -70,6 +74,11 @@ struct FinalLowerGC: private JuliaPassContext {

// Lowers a `julia.safepoint` intrinsic.
Value *lowerSafepoint(CallInst *target, Function &F);

#ifdef MMTK_GC
Value *lowerWriteBarrier1(CallInst *target, Function &F);
Value *lowerWriteBarrier2(CallInst *target, Function &F);
#endif
};

Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
Expand Down Expand Up @@ -204,6 +213,22 @@ Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
return load;
}

#ifdef MMTK_GC
Value *FinalLowerGC::lowerWriteBarrier1(CallInst *target, Function &F)
{
assert(target->arg_size() == 1);
target->setCalledFunction(writeBarrier1Func);
return target;
}

Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F)
{
assert(target->arg_size() == 2);
target->setCalledFunction(writeBarrier2Func);
return target;
}
#endif

Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
{
++GCAllocBytesCount;
Expand Down Expand Up @@ -311,8 +336,13 @@ bool FinalLowerGC::doInitialization(Module &M) {
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);

#ifdef MMTK_GC
writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1);
writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2);
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func};
#else
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
#endif
unsigned j = 0;
for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
if (!functionList[i])
Expand All @@ -328,8 +358,13 @@ bool FinalLowerGC::doInitialization(Module &M) {

bool FinalLowerGC::doFinalization(Module &M)
{
#ifdef MMTK_GC
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func};
queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = nullptr;
#else
GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr;
#endif
auto used = M.getGlobalVariable("llvm.compiler.used");
if (!used)
return false;
Expand Down Expand Up @@ -399,6 +434,10 @@ bool FinalLowerGC::runOnFunction(Function &F)
auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
auto safepointFunc = getOrNull(jl_intrinsics::safepoint);
#ifdef MMTK_GC
auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1);
auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2);
#endif

// Lower all calls to supported intrinsics.
for (BasicBlock &BB : F) {
Expand Down Expand Up @@ -432,6 +471,14 @@ bool FinalLowerGC::runOnFunction(Function &F)
else if (callee == queueGCRootFunc) {
replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
}
#ifdef MMTK_GC
else if (callee == writeBarrier1Func) {
replaceInstruction(CI, lowerWriteBarrier1(CI, F), it);
}
else if (callee == writeBarrier2Func) {
replaceInstruction(CI, lowerWriteBarrier2(CI, F), it);
}
#endif
else if (callee == safepointFunc) {
lowerSafepoint(CI, F);
it = CI->eraseFromParent();
Expand Down
44 changes: 44 additions & 0 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2512,6 +2512,50 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
}
IRBuilder<> builder(CI);
builder.SetCurrentDebugLocation(CI->getDebugLoc());
#ifndef MMTK_GC
auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
builder.SetInsertPoint(mayTrigTerm);
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < CI->arg_size(); i++) {
Value *child = CI->getArgOperand(i);
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
builder.SetInsertPoint(trigTerm);
if (CI->getCalledOperand() == write_barrier_func) {
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
}
else {
assert(false);
}
#else
if (CI->getCalledOperand() == write_barrier_func) {
// if (CI->arg_size() == 2) {
// // parent, target
// Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier2);
// builder.CreateCall(wb_func, { parent, CI->getArgOperand(1) }); // We need to be careful about arg1, which may not match the type for wb_func. We probably need a bitcast
// } else {
// // parent and many targets
// Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
// builder.CreateCall(wb_func, { parent });
// }
auto barrier = mmtk_needs_write_barrier();
if (barrier == 1) {
// We only care about parent
Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
builder.CreateCall(wb_func, { parent });
}
}
#endif

auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3));
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
Expand Down
67 changes: 67 additions & 0 deletions src/llvm-pass-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ namespace jl_intrinsics {
static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame";
static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root";
static const char *SAFEPOINT_NAME = "julia.safepoint";
#ifdef MMTK_GC
static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline";
static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline";
#endif

// Annotates a function with attributes suitable for GC allocation
// functions. Specifically, the return value is marked noalias and nonnull.
Expand Down Expand Up @@ -223,12 +227,45 @@ namespace jl_intrinsics {
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});

#ifdef MMTK_GC
const IntrinsicDescription writeBarrier1(
WRITE_BARRIER_1_NAME,
[](const JuliaPassContext &context) {
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
WRITE_BARRIER_1_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
const IntrinsicDescription writeBarrier2(
WRITE_BARRIER_2_NAME,
[](const JuliaPassContext &context) {
auto intrinsic = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, context.T_prjlvalue },
false),
Function::ExternalLinkage,
WRITE_BARRIER_2_NAME);
intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return intrinsic;
});
#endif
}

namespace jl_well_known {
static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
#ifdef MMTK_GC
static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline);
static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline);
#endif

using jl_intrinsics::addGCAllocAttributes;

Expand Down Expand Up @@ -276,4 +313,34 @@ namespace jl_well_known {
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

#ifdef MMTK_GC
const WellKnownFunctionDescription GCWriteBarrier1(
GC_WB_1_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue },
false),
Function::ExternalLinkage,
GC_WB_1_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});

const WellKnownFunctionDescription GCWriteBarrier2(
GC_WB_2_NAME,
[](const JuliaPassContext &context) {
auto func = Function::Create(
FunctionType::get(
Type::getVoidTy(context.getLLVMContext()),
{ context.T_prjlvalue, context.T_prjlvalue },
false),
Function::ExternalLinkage,
GC_WB_2_NAME);
func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
return func;
});
#endif
}
10 changes: 10 additions & 0 deletions src/llvm-pass-helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ namespace jl_intrinsics {

// `julia.safepoint`: an intrinsic that triggers a GC safepoint.
extern const IntrinsicDescription safepoint;

#ifdef MMTK_GC
extern const IntrinsicDescription writeBarrier1;
extern const IntrinsicDescription writeBarrier2;
#endif
}

// A namespace for well-known Julia runtime function descriptions.
Expand All @@ -149,6 +154,11 @@ namespace jl_well_known {

// `jl_gc_queue_root`: queues a GC root.
extern const WellKnownFunctionDescription GCQueueRoot;

#ifdef MMTK_GC
extern const WellKnownFunctionDescription GCWriteBarrier1;
extern const WellKnownFunctionDescription GCWriteBarrier2;
#endif
}

#endif
11 changes: 11 additions & 0 deletions src/mmtk-gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,17 @@ void objprofile_reset(void)
{
}

// No inline write barrier -- only used for debugging
JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
{
jl_gc_wb_back(parent);
}

JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
jl_gc_wb(parent, ptr);
}

void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
{
jl_ptls_t ptls = jl_current_task->ptls;
Expand Down