From 980534db6729e4a8b83623673e3c41bfcd9570cd Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 11 Sep 2024 12:49:52 -0300 Subject: [PATCH] Bump LLVM and GPUCompiler versions + related fixes: Drop abi_call for a plain invoke call Change to use string api for pass manager Drop support for versions before 1.10 --- Project.toml | 6 ++--- src/AllocCheck.jl | 23 +++++++++++++++---- src/compiler.jl | 53 ++++++++++++++----------------------------- src/compiler_utils.jl | 26 +-------------------- test/runtests.jl | 25 ++++++++++++++++---- 5 files changed, 61 insertions(+), 72 deletions(-) diff --git a/Project.toml b/Project.toml index a7959b8..15919a1 100644 --- a/Project.toml +++ b/Project.toml @@ -10,11 +10,11 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" [compat] -GPUCompiler = "0.24, 0.25, 0.26" -LLVM = "6.3" +GPUCompiler = "0.27" +LLVM = "9.1" ExprTools = "0.1" MacroTools = "0.5" -julia = "1.9" +julia = "1.10" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/AllocCheck.jl b/src/AllocCheck.jl index 6229dcd..c1f187f 100644 --- a/src/AllocCheck.jl +++ b/src/AllocCheck.jl @@ -93,12 +93,27 @@ Find all static allocation sites in the provided LLVM IR. This function modifies the LLVM module in-place, effectively trashing it. """ -function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true) +function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true, invoke_entry=false) (; entry, compiled) = meta errors = [] worklist = LLVM.Function[ entry ] seen = LLVM.Function[ entry ] + if invoke_entry + @assert startswith(name(entry), "jfptr") + f = pop!(worklist) + for block in blocks(f) + for inst in instructions(block) + if isa(inst, LLVM.CallInst) + decl = called_operand(inst) + if decl isa LLVM.Function && length(blocks(decl)) > 0 && !in(decl, seen) + push!(worklist, decl) + push!(seen, decl) + end + end + end + end + end while !isempty(worklist) f = pop!(worklist) @@ -202,12 +217,12 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr end source = GPUCompiler.methodinstance(Base._stable_typeof(func), Base.to_tuple_type(types)) target = DefaultCompilerTarget() - job = CompilerJob(source, config) + job = CompilerJob(source, alloc_config(:specfunc)) allocs = JuliaContext() do ctx mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false) - optimize!(job, mod) + optimize!(mod) - allocs = find_allocs!(mod, meta; ignore_throw) + allocs = find_allocs!(mod, meta; ignore_throw, invoke_entry=false) # display(mod) # dispose(mod) allocs diff --git a/src/compiler.jl b/src/compiler.jl index ecba319..a6501f7 100644 --- a/src/compiler.jl +++ b/src/compiler.jl @@ -10,7 +10,7 @@ function __init__() tm[] = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features(); optlevel = llvm_codegen_level(opt_level)) LLVM.asm_verbosity!(tm[], true) - lljit = LLVM.has_julia_ojit() ? LLVM.JuliaOJIT() : LLVM.LLJIT(; tm=tm[]) + lljit = LLVM.JuliaOJIT() jd_main = LLVM.JITDylib(lljit) @@ -35,20 +35,11 @@ function __init__() end end -@static if LLVM.has_julia_ojit() - struct CompilerInstance - jit::LLVM.JuliaOJIT - lctm::Union{LLVM.LazyCallThroughManager, Nothing} - ism::Union{LLVM.IndirectStubsManager, Nothing} - end -else - struct CompilerInstance - jit::LLVM.LLJIT - lctm::Union{LLVM.LazyCallThroughManager, Nothing} - ism::Union{LLVM.IndirectStubsManager, Nothing} - end +struct CompilerInstance + jit::LLVM.JuliaOJIT + lctm::Union{LLVM.LazyCallThroughManager, Nothing} + ism::Union{LLVM.IndirectStubsManager, Nothing} end - struct CompileResult{Success, F, TT, RT} f_ptr::Ptr{Cvoid} arg_types::Type{TT} @@ -65,29 +56,16 @@ const tm = Ref{TargetMachine}() # for opt pipeline # cache of kernel instances const _kernel_instances = Dict{Any, Any}() const compiler_cache = Dict{Any, CompileResult}() -const config = CompilerConfig(DefaultCompilerTarget(), NativeParams(); - kernel=false, entry_abi = :specfunc, always_inline=false) +alloc_config(func_abi::Symbol) = CompilerConfig(DefaultCompilerTarget(), NativeParams(); + kernel=false, entry_abi = func_abi, always_inline=false) const NativeCompilerJob = CompilerJob{NativeCompilerTarget,NativeParams} GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = true GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime -function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module) - triple = GPUCompiler.llvm_triple(job.config.target) - tm = GPUCompiler.llvm_machine(job.config.target) - if VERSION >= v"1.10-beta3" - @dispose pb = LLVM.PassBuilder(tm) begin - @dispose mpm = LLVM.NewPMModulePassManager(pb) begin - build_newpm_pipeline!(pb, mpm) - run!(mpm, mod, tm) - end - end - else - @dispose pm=LLVM.ModulePassManager() begin - build_oldpm_pipeline!(pm) - run!(pm, mod) - end - end +function optimize!(mod::LLVM.Module) + pipeline = LLVM.Interop.JuliaPipeline(opt_level=Base.JLOptions().opt_level) + run!(pipeline, mod) end """ @@ -112,10 +90,10 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} function compile(@nospecialize(job::CompilerJob)) return JuliaContext() do ctx mod, meta = GPUCompiler.compile(:llvm, job, validate=false) - optimize!(job, mod) + optimize!(mod) clone = copy(mod) - analysis = find_allocs!(mod, meta; ignore_throw) + analysis = find_allocs!(mod, meta; ignore_throw, invoke_entry=true) # TODO: This is the wrong meta return clone, meta, analysis end @@ -142,7 +120,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT} end end end - fun = GPUCompiler.cached_compilation(cache, source, config, compile, link) + fun = GPUCompiler.cached_compilation(cache, source, alloc_config(:func), compile, link) # create a callable object that captures the function instance. we don't need to think # about world age here, as GPUCompiler already does and will return a different object @@ -153,7 +131,10 @@ end function (f::CompileResult{Success, F, TT, RT})(args...) where {Success, F, TT, RT} if Success - return abi_call(f.f_ptr, RT, TT, f.func, args...) + argsv = Any[args...] + GC.@preserve argsv begin + return ccall(f.f_ptr, Any, (Any, Ptr{Any}, UInt32), f.func, pointer(argsv), length(args)) + end else error("@check_allocs function contains ", length(f.analysis), " allocations.") end diff --git a/src/compiler_utils.jl b/src/compiler_utils.jl index 15a0e6e..0337d5b 100644 --- a/src/compiler_utils.jl +++ b/src/compiler_utils.jl @@ -20,29 +20,5 @@ function cpu_name() end function cpu_features() - if VERSION >= v"1.10.0-beta1" - return ccall(:jl_get_cpu_features, String, ()) - end - - @static if Sys.ARCH == :x86_64 || - Sys.ARCH == :x86 - return "+mmx,+sse,+sse2,+fxsr,+cx8" # mandated by Julia - else - return "" - end -end - -if VERSION >= v"1.10-beta3" - function build_newpm_pipeline!(pb::LLVM.PassBuilder, mpm::LLVM.NewPMModulePassManager, speedup=2, size=0, lower_intrinsics=true, - dump_native=false, external_use=false, llvm_only=false,) - ccall(:jl_build_newpm_pipeline, Cvoid, - (LLVM.API.LLVMModulePassManagerRef, LLVM.API.LLVMPassBuilderRef, Cint, Cint, Cint, Cint, Cint, Cint), - mpm, pb, speedup, size, lower_intrinsics, dump_native, external_use, llvm_only) - end -else - function build_oldpm_pipeline!(pm::LLVM.ModulePassManager, opt_level=2, lower_intrinsics=true) - ccall(:jl_add_optimization_passes, Cvoid, - (LLVM.API.LLVMPassManagerRef, Cint, Cint), - pm, opt_level, lower_intrinsics) - end + return ccall(:jl_get_cpu_features, String, ()) end diff --git a/test/runtests.jl b/test/runtests.jl index 4377483..189760b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -327,8 +327,25 @@ Documentation for `issue64`. v[i], v[j] = v[j], v[i] v end -let io = IOBuffer() - print(io, @doc issue64) - s = String(take!(io)) - @test occursin("Documentation for `issue64`.", s) +@check_allocs function foo_with_union_rt(t::Tuple{Float64, Float64}) + if rand((1, -1)) == 1 + return t + else + return nothing + end end + +@testset "issues" begin + # issue #64 + let io = IOBuffer() + print(io, @doc issue64) + s = String(take!(io)) + @test occursin("Documentation for `issue64`.", s) + end + + # issue #70 + x = foo_with_union_rt((1.0, 1.5)) + @test x === nothing || x === (1.0, 1.5) + x = foo_with_union_rt((1.0, 1.5)) + @test x === nothing || x === (1.0, 1.5) +end \ No newline at end of file