Skip to content

Commit

Permalink
Bump LLVM and GPUCompiler versions + related fixes:
Browse files Browse the repository at this point in the history
Drop abi_call for a plain invoke call
Change to use string api for pass manager
Drop support for versions before 1.10
  • Loading branch information
gbaraldi committed Sep 11, 2024
1 parent 481adc4 commit 980534d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 72 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"

[compat]
GPUCompiler = "0.24, 0.25, 0.26"
LLVM = "6.3"
GPUCompiler = "0.27"
LLVM = "9.1"
ExprTools = "0.1"
MacroTools = "0.5"
julia = "1.9"
julia = "1.10"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
23 changes: 19 additions & 4 deletions src/AllocCheck.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,27 @@ Find all static allocation sites in the provided LLVM IR.
This function modifies the LLVM module in-place, effectively trashing it.
"""
function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true)
function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true, invoke_entry=false)
(; entry, compiled) = meta

errors = []
worklist = LLVM.Function[ entry ]
seen = LLVM.Function[ entry ]
if invoke_entry
@assert startswith(name(entry), "jfptr")
f = pop!(worklist)
for block in blocks(f)
for inst in instructions(block)
if isa(inst, LLVM.CallInst)
decl = called_operand(inst)
if decl isa LLVM.Function && length(blocks(decl)) > 0 && !in(decl, seen)
push!(worklist, decl)
push!(seen, decl)
end
end
end
end
end
while !isempty(worklist)
f = pop!(worklist)

Expand Down Expand Up @@ -202,12 +217,12 @@ function check_allocs(@nospecialize(func), @nospecialize(types); ignore_throw=tr
end
source = GPUCompiler.methodinstance(Base._stable_typeof(func), Base.to_tuple_type(types))
target = DefaultCompilerTarget()
job = CompilerJob(source, config)
job = CompilerJob(source, alloc_config(:specfunc))
allocs = JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job, validate=false, optimize=false, cleanup=false)
optimize!(job, mod)
optimize!(mod)

allocs = find_allocs!(mod, meta; ignore_throw)
allocs = find_allocs!(mod, meta; ignore_throw, invoke_entry=false)
# display(mod)
# dispose(mod)
allocs
Expand Down
53 changes: 17 additions & 36 deletions src/compiler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function __init__()
tm[] = LLVM.JITTargetMachine(LLVM.triple(), cpu_name(), cpu_features();
optlevel = llvm_codegen_level(opt_level))
LLVM.asm_verbosity!(tm[], true)
lljit = LLVM.has_julia_ojit() ? LLVM.JuliaOJIT() : LLVM.LLJIT(; tm=tm[])
lljit = LLVM.JuliaOJIT()

jd_main = LLVM.JITDylib(lljit)

Expand All @@ -35,20 +35,11 @@ function __init__()
end
end

@static if LLVM.has_julia_ojit()
struct CompilerInstance
jit::LLVM.JuliaOJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end
else
struct CompilerInstance
jit::LLVM.LLJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end
struct CompilerInstance
jit::LLVM.JuliaOJIT
lctm::Union{LLVM.LazyCallThroughManager, Nothing}
ism::Union{LLVM.IndirectStubsManager, Nothing}
end

struct CompileResult{Success, F, TT, RT}
f_ptr::Ptr{Cvoid}
arg_types::Type{TT}
Expand All @@ -65,29 +56,16 @@ const tm = Ref{TargetMachine}() # for opt pipeline
# cache of kernel instances
const _kernel_instances = Dict{Any, Any}()
const compiler_cache = Dict{Any, CompileResult}()
const config = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = :specfunc, always_inline=false)
alloc_config(func_abi::Symbol) = CompilerConfig(DefaultCompilerTarget(), NativeParams();
kernel=false, entry_abi = func_abi, always_inline=false)

const NativeCompilerJob = CompilerJob{NativeCompilerTarget,NativeParams}
GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = true
GPUCompiler.runtime_module(::NativeCompilerJob) = Runtime

function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
triple = GPUCompiler.llvm_triple(job.config.target)
tm = GPUCompiler.llvm_machine(job.config.target)
if VERSION >= v"1.10-beta3"
@dispose pb = LLVM.PassBuilder(tm) begin
@dispose mpm = LLVM.NewPMModulePassManager(pb) begin
build_newpm_pipeline!(pb, mpm)
run!(mpm, mod, tm)
end
end
else
@dispose pm=LLVM.ModulePassManager() begin
build_oldpm_pipeline!(pm)
run!(pm, mod)
end
end
function optimize!(mod::LLVM.Module)
pipeline = LLVM.Interop.JuliaPipeline(opt_level=Base.JLOptions().opt_level)
run!(pipeline, mod)
end

"""
Expand All @@ -112,10 +90,10 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
function compile(@nospecialize(job::CompilerJob))
return JuliaContext() do ctx
mod, meta = GPUCompiler.compile(:llvm, job, validate=false)
optimize!(job, mod)
optimize!(mod)

clone = copy(mod)
analysis = find_allocs!(mod, meta; ignore_throw)
analysis = find_allocs!(mod, meta; ignore_throw, invoke_entry=true)
# TODO: This is the wrong meta
return clone, meta, analysis
end
Expand All @@ -142,7 +120,7 @@ function compile_callable(f::F, tt::TT=Tuple{}; ignore_throw=true) where {F, TT}
end
end
end
fun = GPUCompiler.cached_compilation(cache, source, config, compile, link)
fun = GPUCompiler.cached_compilation(cache, source, alloc_config(:func), compile, link)

# create a callable object that captures the function instance. we don't need to think
# about world age here, as GPUCompiler already does and will return a different object
Expand All @@ -153,7 +131,10 @@ end

function (f::CompileResult{Success, F, TT, RT})(args...) where {Success, F, TT, RT}
if Success
return abi_call(f.f_ptr, RT, TT, f.func, args...)
argsv = Any[args...]
GC.@preserve argsv begin
return ccall(f.f_ptr, Any, (Any, Ptr{Any}, UInt32), f.func, pointer(argsv), length(args))
end
else
error("@check_allocs function contains ", length(f.analysis), " allocations.")
end
Expand Down
26 changes: 1 addition & 25 deletions src/compiler_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,5 @@ function cpu_name()
end

function cpu_features()
if VERSION >= v"1.10.0-beta1"
return ccall(:jl_get_cpu_features, String, ())
end

@static if Sys.ARCH == :x86_64 ||
Sys.ARCH == :x86
return "+mmx,+sse,+sse2,+fxsr,+cx8" # mandated by Julia
else
return ""
end
end

if VERSION >= v"1.10-beta3"
function build_newpm_pipeline!(pb::LLVM.PassBuilder, mpm::LLVM.NewPMModulePassManager, speedup=2, size=0, lower_intrinsics=true,
dump_native=false, external_use=false, llvm_only=false,)
ccall(:jl_build_newpm_pipeline, Cvoid,
(LLVM.API.LLVMModulePassManagerRef, LLVM.API.LLVMPassBuilderRef, Cint, Cint, Cint, Cint, Cint, Cint),
mpm, pb, speedup, size, lower_intrinsics, dump_native, external_use, llvm_only)
end
else
function build_oldpm_pipeline!(pm::LLVM.ModulePassManager, opt_level=2, lower_intrinsics=true)
ccall(:jl_add_optimization_passes, Cvoid,
(LLVM.API.LLVMPassManagerRef, Cint, Cint),
pm, opt_level, lower_intrinsics)
end
return ccall(:jl_get_cpu_features, String, ())
end
25 changes: 21 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,25 @@ Documentation for `issue64`.
v[i], v[j] = v[j], v[i]
v
end
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)
@check_allocs function foo_with_union_rt(t::Tuple{Float64, Float64})
if rand((1, -1)) == 1
return t
else
return nothing
end
end

@testset "issues" begin
# issue #64
let io = IOBuffer()
print(io, @doc issue64)
s = String(take!(io))
@test occursin("Documentation for `issue64`.", s)
end

# issue #70
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
x = foo_with_union_rt((1.0, 1.5))
@test x === nothing || x === (1.0, 1.5)
end

0 comments on commit 980534d

Please sign in to comment.