From 7e60d460c2cca6aff53917f1ffe8d94107cbed2d Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 2 May 2023 03:23:02 +0000 Subject: [PATCH 1/2] Implement MMTk write barrier --- src/jl_exported_funcs.inc | 2 + src/julia.h | 16 ++++++++ src/julia_internal.h | 4 ++ src/llvm-final-gc-lowering.cpp | 49 ++++++++++++++++++++++++- src/llvm-late-gc-lowering.cpp | 41 +++++++++++++++++++++ src/llvm-pass-helpers.cpp | 67 ++++++++++++++++++++++++++++++++++ src/llvm-pass-helpers.h | 10 +++++ src/mmtk-gc.c | 11 ++++++ 8 files changed, 199 insertions(+), 1 deletion(-) diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index c475184573faa..b51e55510e172 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -186,6 +186,8 @@ XX(jl_gc_pool_alloc) \ XX(jl_gc_queue_multiroot) \ XX(jl_gc_queue_root) \ + XX(jl_gc_wb1_noinline) \ + XX(jl_gc_wb2_noinline) \ XX(jl_gc_safepoint) \ XX(jl_gc_schedule_foreign_sweepfunc) \ XX(jl_gc_set_cb_notify_external_alloc) \ diff --git a/src/julia.h b/src/julia.h index 8a8624360fc7a..710fd11cf9372 100644 --- a/src/julia.h +++ b/src/julia.h @@ -961,17 +961,23 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_ } #else // MMTK_GC +// MMTk's write barrier method. This is the full write barier including fastpath and slowpath. +// TODO: We should inline fastpath in the following functions, and only call slowpath. +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT; STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT { + mmtk_gc_wb_full(parent, ptr); } STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t* { + mmtk_gc_wb_full(ptr, (void*)0); } STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT { + mmtk_gc_wb_full(parent, (void*)0); } #endif // MMTK_GC @@ -2268,6 +2274,16 @@ typedef struct { } jl_cgparams_t; extern JL_DLLEXPORT int jl_default_debug_info_kind; +#ifdef MMTK_GC +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(ptls->mmtk_mutator_ptr, parent, ptr); +} +#endif + #ifdef __cplusplus } #endif diff --git a/src/julia_internal.h b/src/julia_internal.h index b921c63444e86..1d17c957623ab 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -608,12 +608,16 @@ STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOT #else // MMTK_GC +// TODO: We should inline fastpath in the following functions, and only call slowpath. + STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t* { + mmtk_gc_wb_full(bnd, val); } STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t* { + mmtk_gc_wb_full(parent, (void*)0); } #endif // MMTK_GC diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 5b8eeb49f60ad..a41f69d74b1e5 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -48,6 +48,10 @@ struct FinalLowerGC: private JuliaPassContext { Function *queueRootFunc; Function *poolAllocFunc; Function *bigAllocFunc; +#ifdef MMTK_GC + Function *writeBarrier1Func; + Function *writeBarrier2Func; +#endif Instruction *pgcstack; // Lowers a `julia.new_gc_frame` intrinsic. @@ -70,6 +74,11 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. Value *lowerSafepoint(CallInst *target, Function &F); + +#ifdef MMTK_GC + Value *lowerWriteBarrier1(CallInst *target, Function &F); + Value *lowerWriteBarrier2(CallInst *target, Function &F); +#endif }; Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) @@ -204,6 +213,22 @@ Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) return load; } +#ifdef MMTK_GC +Value *FinalLowerGC::lowerWriteBarrier1(CallInst *target, Function &F) +{ + assert(target->arg_size() == 1); + target->setCalledFunction(writeBarrier1Func); + return target; +} + +Value *FinalLowerGC::lowerWriteBarrier2(CallInst *target, Function &F) +{ + assert(target->arg_size() == 2); + target->setCalledFunction(writeBarrier2Func); + return target; +} +#endif + Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; @@ -311,8 +336,13 @@ bool FinalLowerGC::doInitialization(Module &M) { queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); - +#ifdef MMTK_GC + writeBarrier1Func = getOrDeclare(jl_well_known::GCWriteBarrier1); + writeBarrier2Func = getOrDeclare(jl_well_known::GCWriteBarrier2); + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; +#else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; +#endif unsigned j = 0; for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) { if (!functionList[i]) @@ -328,8 +358,13 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { +#ifdef MMTK_GC + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, writeBarrier1Func, writeBarrier2Func}; + queueRootFunc = poolAllocFunc = bigAllocFunc = writeBarrier1Func = writeBarrier2Func = nullptr; +#else GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr; +#endif auto used = M.getGlobalVariable("llvm.compiler.used"); if (!used) return false; @@ -399,6 +434,10 @@ bool FinalLowerGC::runOnFunction(Function &F) auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes); auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot); auto safepointFunc = getOrNull(jl_intrinsics::safepoint); +#ifdef MMTK_GC + auto writeBarrier1Func = getOrNull(jl_intrinsics::writeBarrier1); + auto writeBarrier2Func = getOrNull(jl_intrinsics::writeBarrier2); +#endif // Lower all calls to supported intrinsics. for (BasicBlock &BB : F) { @@ -432,6 +471,14 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == queueGCRootFunc) { replaceInstruction(CI, lowerQueueGCRoot(CI, F), it); } +#ifdef MMTK_GC + else if (callee == writeBarrier1Func) { + replaceInstruction(CI, lowerWriteBarrier1(CI, F), it); + } + else if (callee == writeBarrier2Func) { + replaceInstruction(CI, lowerWriteBarrier2(CI, F), it); + } +#endif else if (callee == safepointFunc) { lowerSafepoint(CI, F); it = CI->eraseFromParent(); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 6837dc505a503..cfb7870afc5ef 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2512,6 +2512,47 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { } IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); +#ifndef MMTK_GC + auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3); + auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); + auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); + builder.SetInsertPoint(mayTrigTerm); + Value *anyChldNotMarked = NULL; + for (unsigned i = 1; i < CI->arg_size(); i++) { + Value *child = CI->getArgOperand(i); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1); + Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0)); + anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; + } + assert(anyChldNotMarked); // handled by all_of test above + MDBuilder MDB(parent->getContext()); + SmallVector Weights{1, 9}; + auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, + MDB.createBranchWeights(Weights)); + builder.SetInsertPoint(trigTerm); + if (CI->getCalledOperand() == write_barrier_func) { + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); + } + else { + assert(false); + } +#else + if (CI->getCalledOperand() == write_barrier_func) { + // if (CI->arg_size() == 2) { + // // parent, target + // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier2); + // builder.CreateCall(wb_func, { parent, CI->getArgOperand(1) }); // We need to be careful about arg1, which may not match the type for wb_func. We probably need a bitcast + // } else { + // // parent and many targets + // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + // builder.CreateCall(wb_func, { parent }); + // } + // We only care about parent + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } +#endif + auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, 3)); auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ea390f01010fd..ff65ec7de3aab 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -117,6 +117,10 @@ namespace jl_intrinsics { static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame"; static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root"; static const char *SAFEPOINT_NAME = "julia.safepoint"; +#ifdef MMTK_GC + static const char *WRITE_BARRIER_1_NAME = "julia.write_barrier1_noinline"; + static const char *WRITE_BARRIER_2_NAME = "julia.write_barrier2_noinline"; +#endif // Annotates a function with attributes suitable for GC allocation // functions. Specifically, the return value is marked noalias and nonnull. @@ -223,12 +227,45 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + +#ifdef MMTK_GC + const IntrinsicDescription writeBarrier1( + WRITE_BARRIER_1_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_1_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); + const IntrinsicDescription writeBarrier2( + WRITE_BARRIER_2_NAME, + [](const JuliaPassContext &context) { + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + WRITE_BARRIER_2_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); +#endif } namespace jl_well_known { static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); +#ifdef MMTK_GC + static const char *GC_WB_1_NAME = XSTR(jl_gc_wb1_noinline); + static const char *GC_WB_2_NAME = XSTR(jl_gc_wb2_noinline); +#endif using jl_intrinsics::addGCAllocAttributes; @@ -276,4 +313,34 @@ namespace jl_well_known { func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return func; }); + +#ifdef MMTK_GC + const WellKnownFunctionDescription GCWriteBarrier1( + GC_WB_1_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_1_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); + + const WellKnownFunctionDescription GCWriteBarrier2( + GC_WB_2_NAME, + [](const JuliaPassContext &context) { + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + { context.T_prjlvalue, context.T_prjlvalue }, + false), + Function::ExternalLinkage, + GC_WB_2_NAME); + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return func; + }); +#endif } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 2b2bd50cd0e4d..7f4d7646829f3 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -129,6 +129,11 @@ namespace jl_intrinsics { // `julia.safepoint`: an intrinsic that triggers a GC safepoint. extern const IntrinsicDescription safepoint; + +#ifdef MMTK_GC + extern const IntrinsicDescription writeBarrier1; + extern const IntrinsicDescription writeBarrier2; +#endif } // A namespace for well-known Julia runtime function descriptions. @@ -149,6 +154,11 @@ namespace jl_well_known { // `jl_gc_queue_root`: queues a GC root. extern const WellKnownFunctionDescription GCQueueRoot; + +#ifdef MMTK_GC + extern const WellKnownFunctionDescription GCWriteBarrier1; + extern const WellKnownFunctionDescription GCWriteBarrier2; +#endif } #endif diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 00cd54c9df920..2fc18d991c5d9 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -480,6 +480,17 @@ void objprofile_reset(void) { } +// No inline write barrier -- only used for debugging +JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT +{ + jl_gc_wb_back(parent); +} + +JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_gc_wb(parent, ptr); +} + #ifdef __cplusplus } #endif From 9b0c32245da1e02262cabd71b4bb548e3c30695a Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 4 May 2023 00:40:17 +0000 Subject: [PATCH 2/2] Check which barrier to use in codegen --- src/julia_internal.h | 1 + src/llvm-late-gc-lowering.cpp | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index bc3516dedc493..65e1966385039 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -335,6 +335,7 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int pool_offset, int osize, void* ty); JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz); extern void post_alloc(void* mutator, void* obj, size_t bytes, int allocator); +extern uint8_t mmtk_needs_write_barrier(void); #endif // MMTK_GC JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT; extern uv_mutex_t gc_perm_lock; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index cfb7870afc5ef..11f807bdca33f 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2547,9 +2547,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); // builder.CreateCall(wb_func, { parent }); // } - // We only care about parent - Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); - builder.CreateCall(wb_func, { parent }); + auto barrier = mmtk_needs_write_barrier(); + if (barrier == 1) { + // We only care about parent + Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1); + builder.CreateCall(wb_func, { parent }); + } } #endif