From 49572a549983c8d84575a379ccf764558e1893c3 Mon Sep 17 00:00:00 2001 From: Sari Sakse Dalum Date: Fri, 19 Aug 2022 11:56:38 +0200 Subject: [PATCH 001/290] Add mutating `stat!` function for non-allocating filesystem `stat` --- base/stat.jl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/base/stat.jl b/base/stat.jl index 13dbca7780b61..99c24608daca0 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -144,14 +144,13 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false # stat & lstat functions -macro stat_call(sym, arg1type, arg) +macro stat_call!(stat_buf, sym, arg1type, arg) return quote - stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) - r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf) + r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), $(esc(stat_buf))) if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)) uv_error(string("stat(", repr($(esc(arg))), ")"), r) end - st = StatStruct($(esc(arg)), stat_buf) + st = StatStruct($(esc(arg)), $(esc(stat_buf))) if ispath(st) != (r == 0) error("stat returned zero type for a valid path") end @@ -159,13 +158,18 @@ macro stat_call(sym, arg1type, arg) end end -stat(fd::OS_HANDLE) = @stat_call jl_fstat OS_HANDLE fd -stat(path::AbstractString) = @stat_call jl_stat Cstring path -lstat(path::AbstractString) = @stat_call jl_lstat Cstring path +stat!(stat_buf::Vector{UInt8}, fd::OS_HANDLE) = @stat_call! stat_buf jl_fstat OS_HANDLE fd +stat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_stat Cstring path +lstat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_lstat Cstring path if RawFD !== OS_HANDLE - global stat(fd::RawFD) = stat(Libc._get_osfhandle(fd)) + global stat!(stat_buf::Vector{UInt8}, fd::RawFD) = stat!(stat_buf, Libc._get_osfhandle(fd)) end -stat(fd::Integer) = stat(RawFD(fd)) +stat!(stat_buf::Vector{UInt8}, fd::Integer) = stat!(stat_buf, RawFD(fd)) + +stat(x) = stat!(get_stat_buf(), x) +lstat(x) = lstat!(get_stat_buf(), x) + +get_stat_buf() = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) """ stat(file) From 4ed7aec910cb8ef96c9bdbf510cd5d1cad391e61 Mon Sep 17 00:00:00 2001 From: Sari Sakse Dalum Date: Fri, 19 Aug 2022 11:57:16 +0200 Subject: [PATCH 002/290] Add docstrings --- base/stat.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/base/stat.jl b/base/stat.jl index 99c24608daca0..c12feb888f0bd 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -158,6 +158,13 @@ macro stat_call!(stat_buf, sym, arg1type, arg) end end +""" + stat!(stat_buf::Vector{UInt8}, file) + +Like [`stat`](@ref), but avoids internal allocations by using a pre-allocated buffer, +`stat_buf`. For a small performance gain over `stat`, consecutive calls to `stat!` can use +the same `stat_buf`. See also [`Base.Filesystem.get_stat_buf`](@ref). +""" stat!(stat_buf::Vector{UInt8}, fd::OS_HANDLE) = @stat_call! stat_buf jl_fstat OS_HANDLE fd stat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_stat Cstring path lstat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_lstat Cstring path @@ -169,6 +176,11 @@ stat!(stat_buf::Vector{UInt8}, fd::Integer) = stat!(stat_buf, RawFD(fd stat(x) = stat!(get_stat_buf(), x) lstat(x) = lstat!(get_stat_buf(), x) +""" + get_stat_buf() + +Return a buffer of bytes of the right size for [`stat!`](@ref). +""" get_stat_buf() = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) """ From ce2275c2abe33446c29cdfa3fe55d703fcf8a3f9 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Thu, 19 Aug 2021 22:09:51 +0900 Subject: [PATCH 003/290] introduce `@nospecializeinfer` macro to tell the compiler to avoid excess inference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces a new compiler annotation called `@nospecializeinfer`, which allows us to request the compiler to avoid excessive inference. \## `@nospecialize` mechanism T discuss `@nospecializeinfer`, let's first understand the behavior of `@nospecialize`. Its docstring says that > This is only a hint for the compiler to avoid excess code generation. , and it works by suppressing dispatches with complex runtime occurrences of the annotated arguments. This could be understood with the example below: ```julia julia> function call_func_itr(func, itr) local r = 0 r += func(itr[1]) r += func(itr[2]) r += func(itr[3]) r end; julia> _isa = isa; # just for the sake of explanation, global variable to prevent inlining julia> func_specialize(a) = _isa(a, Function); julia> func_nospecialize(@nospecialize a) = _isa(a, Function); julia> dispatchonly = Any[sin, muladd, nothing]; # untyped container can cause excessive runtime dispatch julia> @code_typed call_func_itr(func_specialize, dispatchonly) CodeInfo( 1 ─ %1 = π (0, Int64) │ %2 = Base.arrayref(true, itr, 1)::Any │ %3 = (func)(%2)::Any │ %4 = (%1 + %3)::Any │ %5 = Base.arrayref(true, itr, 2)::Any │ %6 = (func)(%5)::Any │ %7 = (%4 + %6)::Any │ %8 = Base.arrayref(true, itr, 3)::Any │ %9 = (func)(%8)::Any │ %10 = (%7 + %9)::Any └── return %10 ) => Any julia> @code_typed call_func_itr(func_nospecialize, dispatchonly) CodeInfo( 1 ─ %1 = π (0, Int64) │ %2 = Base.arrayref(true, itr, 1)::Any │ %3 = invoke func(%2::Any)::Any │ %4 = (%1 + %3)::Any │ %5 = Base.arrayref(true, itr, 2)::Any │ %6 = invoke func(%5::Any)::Any │ %7 = (%4 + %6)::Any │ %8 = Base.arrayref(true, itr, 3)::Any │ %9 = invoke func(%8::Any)::Any │ %10 = (%7 + %9)::Any └── return %10 ) => Any ``` The calls of `func_specialize` remain to be `:call` expression (so that they are dispatched and compiled at runtime) while the calls of `func_nospecialize` are resolved as `:invoke` expressions. This is because `@nospecialize` requests the compiler to give up compiling `func_nospecialize` with runtime argument types but with the declared argument types, allowing `call_func_itr(func_nospecialize, dispatchonly)` to avoid runtime dispatches and accompanying JIT compilations (i.e. "excess code generation"). The difference is evident when checking `specializations`: ```julia julia> call_func_itr(func_specialize, dispatchonly) 2 julia> length(Base.specializations(only(methods(func_specialize)))) 3 # w/ runtime dispatch, multiple specializations julia> call_func_itr(func_nospecialize, dispatchonly) 2 julia> length(Base.specializations(only(methods(func_nospecialize)))) 1 # w/o runtime dispatch, the single specialization ``` The problem here is that it influences dispatch only, and does not intervene into inference in anyway. So there is still a possibility of "excess inference" when the compiler sees a considerable complexity of argument types during inference: ```julia julia> func_specialize(a) = _isa(a, Function); # redefine func to clear the specializations julia> @assert length(Base.specializations(only(methods(func_specialize)))) == 0; julia> func_nospecialize(@nospecialize a) = _isa(a, Function); # redefine func to clear the specializations julia> @assert length(Base.specializations(only(methods(func_nospecialize)))) == 0; julia> withinfernce = tuple(sin, muladd, "foo"); # typed container can cause excessive inference julia> @time @code_typed call_func_itr(func_specialize, withinfernce); 0.000812 seconds (3.77 k allocations: 217.938 KiB, 94.34% compilation time) julia> length(Base.specializations(only(methods(func_specialize)))) 4 # multiple method instances inferred julia> @time @code_typed call_func_itr(func_nospecialize, withinfernce); 0.000753 seconds (3.77 k allocations: 218.047 KiB, 92.42% compilation time) julia> length(Base.specializations(only(methods(func_nospecialize)))) 4 # multiple method instances inferred ``` The purpose of this PR is to implement a mechanism that allows us to avoid excessive inference to reduce the compilation latency when inference sees a considerable complexity of argument types. \## Design Here are some ideas to implement the functionality: 1. make `@nospecialize` block inference 2. add nospecializeinfer effect when `@nospecialize`d method is annotated as `@noinline` 3. implement as `@pure`-like boolean annotation to request nospecializeinfer effect on top of `@nospecialize` 4. implement as annotation that is orthogonal to `@nospecialize` After trying 1 ~ 3., I decided to submit 3. \### 1. make `@nospecialize` block inference This is almost same as what Jameson has done at . It turned out that this approach performs very badly because some of `@nospecialize`'d arguments still need inference to perform reasonably. For example, it's obvious that the following definition of `getindex(@nospecialize(t::Tuple), i::Int)` would perform very badly if `@nospecialize` blocks inference, because of a lack of useful type information for succeeding optimizations: \### 2. add nospecializeinfer effect when `@nospecialize`d method is annotated as `@noinline` The important observation is that we often use `@nospecialize` even when we expect inference to forward type and constant information. Adversely, we may be able to exploit the fact that we usually don't expect inference to forward information to a callee when we annotate it with `@noinline` (i.e. when adding `@noinline`, we're usually fine with disabling inter-procedural optimizations other than resolving dispatch). So the idea is to enable the inference suppression when `@nospecialize`'d method is annotated as `@noinline` too. It's a reasonable choice and can be efficiently implemented with #41922. But it sounds a bit weird to me to associate no infer effect with `@noinline`, and I also think there may be some cases we want to inline a method while partly avoiding inference, e.g.: ```julia \# the compiler will always infer with `f::Any` @noinline function twof(@nospecialize(f), n) # this method body is very simple and should be eligible for inlining if occursin('+', string(typeof(f).name.name::Symbol)) 2 + n elseif occursin('*', string(typeof(f).name.name::Symbol)) 2n else zero(n) end end ``` \### 3. implement as `@pure`-like boolean annotation to request nospecializeinfer effect on top of `@nospecialize` This is what this commit implements. It basically replaces the previous `@noinline` flag with a newly-introduced annotation named `@nospecializeinfer`. It is still associated with `@nospecialize` and it only has effect when used together with `@nospecialize`, but now it is not associated to `@noinline`, and it would help us reason about the behavior of `@nospecializeinfer` and experiment its effect more safely: ```julia \# the compiler will always infer with `f::Any` Base.@nospecializeinfer function twof(@nospecialize(f), n) # the compiler may or not inline this method if occursin('+', string(typeof(f).name.name::Symbol)) 2 + n elseif occursin('*', string(typeof(f).name.name::Symbol)) 2n else zero(n) end end ``` \### 4. implement as annotation that is orthogonal to `@nospecialize` Actually, we can have `@nospecialize` and `@nospecializeinfer` separately, and it would allow us to configure compilation strategies in a more fine-grained way. ```julia function noinfspec(Base.@nospecializeinfer(f), @nospecialize(g)) ... end ``` I'm fine with this approach but at the same time I'm afraid to have too many annotations that are related to some sort (I expect we will annotate both `@nospecializeinfer` and `@nospecialize` in this scheme). Co-authored-by: Mosè Giordano Co-authored-by: Tim Holy --- base/compiler/abstractinterpretation.jl | 4 + base/compiler/utilities.jl | 19 +++- base/essentials.jl | 3 +- base/expr.jl | 39 +++++++- doc/src/base/base.md | 2 + src/ast.c | 2 + src/gf.c | 9 +- src/ircode.c | 10 +- src/jltypes.c | 14 ++- src/julia.h | 2 + src/julia_internal.h | 2 + src/method.c | 5 + stdlib/Serialization/src/Serialization.jl | 11 ++- test/compiler/inference.jl | 107 ++++++++++++++++++++-- test/compiler/irutils.jl | 18 +++- 15 files changed, 216 insertions(+), 31 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 0f2011fd07c3c..097bd56d913ce 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -521,6 +521,10 @@ function abstract_call_method(interp::AbstractInterpreter, sigtuple = unwrap_unionall(sig) sigtuple isa DataType || return MethodCallResult(Any, false, false, nothing, Effects()) + if is_nospecializeinfer(method) + sig = get_nospecializeinfer_sig(method, sig, sparams) + end + # Limit argument type tuple growth of functions: # look through the parents list to see if there's a call to the same method # and from the same method. diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 836c370b98bd4..cb5f916e76914 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -107,6 +107,10 @@ function is_inlineable_constant(@nospecialize(x)) return count_const_size(x) <= MAX_INLINE_CONST_SIZE end +is_nospecialized(method::Method) = method.nospecialize ≠ 0 + +is_nospecializeinfer(method::Method) = method.nospecializeinfer && is_nospecialized(method) + ########################### # MethodInstance/CodeInfo # ########################### @@ -154,8 +158,16 @@ function get_compileable_sig(method::Method, @nospecialize(atype), sparams::Simp isa(atype, DataType) || return nothing mt = ccall(:jl_method_get_table, Any, (Any,), method) mt === nothing && return nothing - return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any), - mt, atype, sparams, method) + return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint), + mt, atype, sparams, method, #=int return_if_compileable=#1) +end + +function get_nospecializeinfer_sig(method::Method, @nospecialize(atype), sparams::SimpleVector) + isa(atype, DataType) || return method.sig + mt = ccall(:jl_method_table_for, Any, (Any,), atype) + mt === nothing && return method.sig + return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any, Cint), + mt, atype, sparams, method, #=int return_if_compileable=#0) end isa_compileable_sig(@nospecialize(atype), sparams::SimpleVector, method::Method) = @@ -203,6 +215,9 @@ function specialize_method(method::Method, @nospecialize(atype), sparams::Simple if isa(atype, UnionAll) atype, sparams = normalize_typevars(method, atype, sparams) end + if is_nospecializeinfer(method) + atype = get_nospecializeinfer_sig(method, atype, sparams) + end if preexisting # check cached specializations # for an existing result stored there diff --git a/base/essentials.jl b/base/essentials.jl index e2035601f4fb5..63e209331b6f0 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -85,7 +85,8 @@ f(y) = [x for x in y] !!! note `@nospecialize` affects code generation but not inference: it limits the diversity of the resulting native code, but it does not impose any limitations (beyond the - standard ones) on type-inference. + standard ones) on type-inference. Use [`Base.@nospecializeinfer`](@ref) together with + `@nospecialize` to additionally suppress inference. # Example diff --git a/base/expr.jl b/base/expr.jl index e45684f95a34f..5952904b3d17b 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -342,7 +342,6 @@ macro noinline(x) return annotate_meta_def_or_block(x, :noinline) end - """ @constprop setting [ex] @@ -763,6 +762,44 @@ function compute_assumed_setting(@nospecialize(setting), val::Bool=true) end end +""" + Base.@nospecializeinfer function f(args...) + @nospecialize ... + ... + end + Base.@nospecializeinfer f(@nospecialize args...) = ... + +Tells the compiler to infer `f` using the declared types of `@nospecialize`d arguments. +This can be used to limit the number of compiler-generated specializations during inference. + +# Example + +```julia +julia> f(A::AbstractArray) = g(A) +f (generic function with 1 method) + +julia> @noinline Base.@nospecializeinfer g(@nospecialize(A::AbstractArray)) = A[1] +g (generic function with 1 method) + +julia> @code_typed f([1.0]) +CodeInfo( +1 ─ %1 = invoke Main.g(_2::AbstractArray)::Any +└── return %1 +) => Any +``` + +In this example, `f` will be inferred for each specific type of `A`, +but `g` will only be inferred once with the declared argument type `A::AbstractArray`, +meaning that the compiler will not likely see the excessive inference time on it +while it can not infer the concrete return type of it. +Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`, +indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on +specialized code generation. +""" +macro nospecializeinfer(ex) + esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex) +end + """ @propagate_inbounds diff --git a/doc/src/base/base.md b/doc/src/base/base.md index 7e45e2176478d..5556578bcc245 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -285,6 +285,8 @@ Base.@inline Base.@noinline Base.@nospecialize Base.@specialize +Base.@nospecializeinfer +Base.@constprop Base.gensym Base.@gensym var"name" diff --git a/src/ast.c b/src/ast.c index 97bbc6e8227ba..9da3cd6dfe995 100644 --- a/src/ast.c +++ b/src/ast.c @@ -83,6 +83,7 @@ JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym; JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym; JL_DLLEXPORT jl_sym_t *jl_purity_sym; JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym; +JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym; JL_DLLEXPORT jl_sym_t *jl_macrocall_sym; JL_DLLEXPORT jl_sym_t *jl_colon_sym; JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym; @@ -342,6 +343,7 @@ void jl_init_common_symbols(void) jl_isdefined_sym = jl_symbol("isdefined"); jl_nospecialize_sym = jl_symbol("nospecialize"); jl_specialize_sym = jl_symbol("specialize"); + jl_nospecializeinfer_sym = jl_symbol("nospecializeinfer"); jl_optlevel_sym = jl_symbol("optlevel"); jl_compile_sym = jl_symbol("compile"); jl_force_compile_sym = jl_symbol("force_compile"); diff --git a/src/gf.c b/src/gf.c index 6d55e479babfe..35bea787f5355 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2565,7 +2565,8 @@ JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst) return -1; } -JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m) +JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_tupletype_t *ti, jl_svec_t *env, jl_method_t *m, + int return_if_compileable) { jl_tupletype_t *tt = NULL; jl_svec_t *newparams = NULL; @@ -2589,7 +2590,7 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t if (!is_compileable) is_compileable = jl_isa_compileable_sig(tt, env, m); JL_GC_POP(); - return is_compileable ? (jl_value_t*)tt : jl_nothing; + return (!return_if_compileable || is_compileable) ? (jl_value_t*)tt : jl_nothing; } jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_PROPAGATES_ROOT) @@ -2600,7 +2601,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_ jl_methtable_t *mt = jl_method_get_table(def); if ((jl_value_t*)mt == jl_nothing) return mi; - jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def); + jl_value_t *compilationsig = jl_normalize_to_compilable_sig(mt, (jl_datatype_t*)mi->specTypes, mi->sparam_vals, def, 1); if (compilationsig == jl_nothing || jl_egal(compilationsig, mi->specTypes)) return mi; jl_svec_t *env = NULL; @@ -2633,7 +2634,7 @@ jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t wor JL_UNLOCK(&mt->writelock); } else { - jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m); + jl_value_t *tt = jl_normalize_to_compilable_sig(mt, ti, env, m, 1); if (tt != jl_nothing) { JL_GC_PUSH2(&tt, &env); if (!jl_egal(tt, (jl_value_t*)ti)) { diff --git a/src/ircode.c b/src/ircode.c index 4121d6691aa5b..bc5cc61e7f892 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -434,13 +434,14 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } } -static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, - uint8_t has_fcall, uint8_t inlining, uint8_t constprop) +static jl_code_info_flags_t code_info_flags(uint8_t inferred, uint8_t propagate_inbounds, uint8_t has_fcall, + uint8_t nospecializeinfer, uint8_t inlining, uint8_t constprop) { jl_code_info_flags_t flags; flags.bits.inferred = inferred; flags.bits.propagate_inbounds = propagate_inbounds; flags.bits.has_fcall = has_fcall; + flags.bits.nospecializeinfer = nospecializeinfer; flags.bits.inlining = inlining; flags.bits.constprop = constprop; return flags; @@ -785,8 +786,8 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) 1 }; - jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, - code->has_fcall, code->inlining, code->constprop); + jl_code_info_flags_t flags = code_info_flags(code->inferred, code->propagate_inbounds, code->has_fcall, + code->nospecializeinfer, code->inlining, code->constprop); write_uint8(s.s, flags.packed); write_uint8(s.s, code->purity.bits); write_uint16(s.s, code->inlining_cost); @@ -885,6 +886,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->inferred = flags.bits.inferred; code->propagate_inbounds = flags.bits.propagate_inbounds; code->has_fcall = flags.bits.has_fcall; + code->nospecializeinfer = flags.bits.nospecializeinfer; code->purity.bits = read_uint8(s.s); code->inlining_cost = read_uint16(s.s); diff --git a/src/jltypes.c b/src/jltypes.c index 1a30df637a706..810e1b954633d 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -2903,7 +2903,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_code_info_type = jl_new_datatype(jl_symbol("CodeInfo"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(21, + jl_perm_symsvec(22, "code", "codelocs", "ssavaluetypes", @@ -2921,11 +2921,12 @@ void jl_init_types(void) JL_GC_DISABLED "inferred", "propagate_inbounds", "has_fcall", + "nospecializeinfer", "inlining", "constprop", "purity", "inlining_cost"), - jl_svec(21, + jl_svec(22, jl_array_any_type, jl_array_int32_type, jl_any_type, @@ -2943,17 +2944,18 @@ void jl_init_types(void) JL_GC_DISABLED jl_bool_type, jl_bool_type, jl_bool_type, + jl_bool_type, jl_uint8_type, jl_uint8_type, jl_uint8_type, jl_uint16_type), jl_emptysvec, - 0, 1, 20); + 0, 1, 22); jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(29, + jl_perm_symsvec(30, "name", "module", "file", @@ -2980,10 +2982,11 @@ void jl_init_types(void) JL_GC_DISABLED "nkw", "isva", "is_for_opaque_closure", + "nospecializeinfer", "constprop", "max_varargs", "purity"), - jl_svec(29, + jl_svec(30, jl_symbol_type, jl_module_type, jl_symbol_type, @@ -3010,6 +3013,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_int32_type, jl_bool_type, jl_bool_type, + jl_bool_type, jl_uint8_type, jl_uint8_type, jl_uint8_type), diff --git a/src/julia.h b/src/julia.h index 286bef615c92d..d214509c7d0b6 100644 --- a/src/julia.h +++ b/src/julia.h @@ -302,6 +302,7 @@ typedef struct _jl_code_info_t { uint8_t inferred; uint8_t propagate_inbounds; uint8_t has_fcall; + uint8_t nospecializeinfer; // uint8 settings uint8_t inlining; // 0 = default; 1 = @inline; 2 = @noinline uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none @@ -359,6 +360,7 @@ typedef struct _jl_method_t { // various boolean properties uint8_t isva; uint8_t is_for_opaque_closure; + uint8_t nospecializeinfer; // uint8 settings uint8_t constprop; // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none uint8_t max_varargs; // 0xFF = use heuristic; otherwise, max # of args to expand diff --git a/src/julia_internal.h b/src/julia_internal.h index 49f0b19ec4209..1dcf40b3d920b 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -607,6 +607,7 @@ typedef struct { uint8_t inferred:1; uint8_t propagate_inbounds:1; uint8_t has_fcall:1; + uint8_t nospecializeinfer:1; uint8_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none } jl_code_info_flags_bitfield_t; @@ -1552,6 +1553,7 @@ extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym; extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym; extern JL_DLLEXPORT jl_sym_t *jl_purity_sym; extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym; +extern JL_DLLEXPORT jl_sym_t *jl_nospecializeinfer_sym; extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym; extern JL_DLLEXPORT jl_sym_t *jl_colon_sym; extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym; diff --git a/src/method.c b/src/method.c index c207149032fb9..9583ead272dca 100644 --- a/src/method.c +++ b/src/method.c @@ -321,6 +321,8 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) li->inlining = 2; else if (ma == (jl_value_t*)jl_propagate_inbounds_sym) li->propagate_inbounds = 1; + else if (ma == (jl_value_t*)jl_nospecializeinfer_sym) + li->nospecializeinfer = 1; else if (ma == (jl_value_t*)jl_aggressive_constprop_sym) li->constprop = 1; else if (ma == (jl_value_t*)jl_no_constprop_sym) @@ -477,6 +479,7 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void) src->inferred = 0; src->propagate_inbounds = 0; src->has_fcall = 0; + src->nospecializeinfer = 0; src->edges = jl_nothing; src->constprop = 0; src->inlining = 0; @@ -682,6 +685,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) } } m->called = called; + m->nospecializeinfer = src->nospecializeinfer; m->constprop = src->constprop; m->purity.bits = src->purity.bits; jl_add_function_to_lineinfo(src, (jl_value_t*)m->name); @@ -811,6 +815,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->primary_world = 1; m->deleted_world = ~(size_t)0; m->is_for_opaque_closure = 0; + m->nospecializeinfer = 0; m->constprop = 0; m->purity.bits = 0; m->max_varargs = UINT8_MAX; diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl index dd901d6910abf..7c1043f33bdfe 100644 --- a/stdlib/Serialization/src/Serialization.jl +++ b/stdlib/Serialization/src/Serialization.jl @@ -80,7 +80,7 @@ const TAGS = Any[ const NTAGS = length(TAGS) @assert NTAGS == 255 -const ser_version = 23 # do not make changes without bumping the version #! +const ser_version = 24 # do not make changes without bumping the version #! format_version(::AbstractSerializer) = ser_version format_version(s::Serializer) = s.version @@ -418,6 +418,7 @@ function serialize(s::AbstractSerializer, meth::Method) serialize(s, meth.nargs) serialize(s, meth.isva) serialize(s, meth.is_for_opaque_closure) + serialize(s, meth.nospecializeinfer) serialize(s, meth.constprop) serialize(s, meth.purity) if isdefined(meth, :source) @@ -1026,10 +1027,14 @@ function deserialize(s::AbstractSerializer, ::Type{Method}) nargs = deserialize(s)::Int32 isva = deserialize(s)::Bool is_for_opaque_closure = false + nospecializeinfer = false constprop = purity = 0x00 template_or_is_opaque = deserialize(s) if isa(template_or_is_opaque, Bool) is_for_opaque_closure = template_or_is_opaque + if format_version(s) >= 24 + nospecializeinfer = deserialize(s)::Bool + end if format_version(s) >= 14 constprop = deserialize(s)::UInt8 end @@ -1054,6 +1059,7 @@ function deserialize(s::AbstractSerializer, ::Type{Method}) meth.nargs = nargs meth.isva = isva meth.is_for_opaque_closure = is_for_opaque_closure + meth.nospecializeinfer = nospecializeinfer meth.constprop = constprop meth.purity = purity if template !== nothing @@ -1195,6 +1201,9 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo}) if format_version(s) >= 20 ci.has_fcall = deserialize(s) end + if format_version(s) >= 24 + ci.nospecializeinfer = deserialize(s)::Bool + end if format_version(s) >= 21 ci.inlining = deserialize(s)::UInt8 end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 5987e10401bc8..385315d614de2 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -1167,25 +1167,18 @@ let typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int} @test only(Base.return_types(promote_type, typeargs)) === Type{Int} end -function count_specializations(method::Method) - specs = method.specializations - specs isa Core.MethodInstance && return 1 - n = count(!isnothing, specs::Core.SimpleVector) - return n -end - # demonstrate that inference can complete without waiting for MAX_TYPE_DEPTH copy_dims_out(out) = () copy_dims_out(out, dim::Int, tail...) = copy_dims_out((out..., dim), tail...) copy_dims_out(out, dim::Colon, tail...) = copy_dims_out((out..., dim), tail...) @test Base.return_types(copy_dims_out, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}] -@test all(m -> 4 < count_specializations(m) < 15, methods(copy_dims_out)) # currently about 5 +@test all(m -> 4 < length(Base.specializations(m)) < 15, methods(copy_dims_out)) # currently about 5 copy_dims_pair(out) = () copy_dims_pair(out, dim::Int, tail...) = copy_dims_pair(out => dim, tail...) copy_dims_pair(out, dim::Colon, tail...) = copy_dims_pair(out => dim, tail...) @test Base.return_types(copy_dims_pair, (Tuple{}, Vararg{Union{Int,Colon}})) == Any[Tuple{}, Tuple{}, Tuple{}] -@test all(m -> 3 < count_specializations(m) < 15, methods(copy_dims_pair)) # currently about 5 +@test all(m -> 3 < length(Base.specializations(m)) < 15, methods(copy_dims_pair)) # currently about 5 # splatting an ::Any should still allow inference to use types of parameters preceding it f22364(::Int, ::Any...) = 0 @@ -4160,6 +4153,102 @@ Base.getproperty(x::Interface41024Extended, sym::Symbol) = x.x end |> only === Int +function call_func_itr(func, itr) + local r = 0 + r += func(itr[1]) + r += func(itr[2]) + r += func(itr[3]) + r += func(itr[4]) + r += func(itr[5]) + r +end + +global inline_checker = c -> c # untyped global, a call of this func will prevent inlining +# if `f` is inlined, `GlobalRef(m, :inline_checker)` should appear within the body of `invokef` +function is_inline_checker(@nospecialize stmt) + isa(stmt, GlobalRef) && stmt.name === :inline_checker +end + +function func_nospecialized(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining +end + +@inline function func_nospecialized_inline(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation) +end + +Base.@nospecializeinfer function func_nospecializeinfer(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining +end + +Base.@nospecializeinfer @inline function func_nospecializeinfer_inline(@nospecialize a) + c = isa(a, Function) + inline_checker(c) # dynamic dispatch, preventing inlining (but forced by the annotation) +end + +Base.@nospecializeinfer Base.@constprop :aggressive function func_nospecializeinfer_constprop(c::Bool, @nospecialize a) + if c + return inline_checker(a) # dynamic dispatch, preventing inlining/constprop (but forced by the annotation) + end + return false +end +Base.@nospecializeinfer func_nospecializeinfer_constprop(@nospecialize a) = func_nospecializeinfer_constprop(false, a) + +itr_dispatchonly = Any[sin, muladd, "foo", nothing, missing] # untyped container can cause excessive runtime dispatch +itr_withinfernce = tuple(sin, muladd, "foo", nothing, missing) # typed container can cause excessive inference + +@testset "compilation annotations" begin + @testset "@nospecialize" begin + # `@nospecialize` should suppress runtime dispatches of `nospecialize` + @test call_func_itr(func_nospecialized, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecialized))))) == 1 + # `@nospecialize` should allow inference to happen + @test call_func_itr(func_nospecialized, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecialized))))) == 6 + @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized, itr_dispatchonly)) == 0 + + # `@nospecialize` should allow inlinining + @test call_func_itr(func_nospecialized_inline, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 1 + @test call_func_itr(func_nospecialized_inline, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecialized_inline))))) == 6 + @test count(is_inline_checker, @get_code call_func_itr(func_nospecialized_inline, itr_dispatchonly)) == 5 + end + + @testset "@nospecializeinfer" begin + # `@nospecialize` should suppress runtime dispatches of `nospecialize` + @test call_func_itr(func_nospecializeinfer, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1 + # `@nospecializeinfer` suppresses inference also + @test call_func_itr(func_nospecializeinfer, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer))))) == 1 + @test !any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer, itr_dispatchonly)) + + # `@nospecializeinfer` should allow inlinining + @test call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1 + @test call_func_itr(func_nospecializeinfer_inline, itr_withinfernce) == 2 + @test length(Base.specializations(only(methods((func_nospecializeinfer_inline))))) == 1 + @test any(is_inline_checker, @get_code call_func_itr(func_nospecializeinfer_inline, itr_dispatchonly)) + + # `@nospecializeinfer` should allow constprop + @test Base.return_types((Any,)) do x + Val(func_nospecializeinfer_constprop(x)) + end |> only == Val{false} + @test call_func_itr(func_nospecializeinfer_constprop, itr_dispatchonly) == 0 + for m = methods(func_nospecializeinfer_constprop) + @test length(Base.specializations(m)) == 1 + end + @test call_func_itr(func_nospecializeinfer_constprop, itr_withinfernce) == 0 + for m = methods(func_nospecializeinfer_constprop) + @test length(Base.specializations(m)) == 1 + end + end +end + @testset "fieldtype for unions" begin # e.g. issue #40177 f40177(::Type{T}) where {T} = fieldtype(T, 1) for T in [ diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index 95ac0d555ef88..00de9b2472de4 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -1,10 +1,17 @@ -import Core: CodeInfo, ReturnNode, MethodInstance -import Core.Compiler: IRCode, IncrementalCompact, VarState, argextype, singleton_type -import Base.Meta: isexpr +using Core: CodeInfo, ReturnNode, MethodInstance +using Core.Compiler: IRCode, IncrementalCompact, singleton_type, VarState +using Base.Meta: isexpr +using InteractiveUtils: gen_call_with_extracted_types_and_kwargs -argextype(@nospecialize args...) = argextype(args..., VarState[]) +argextype(@nospecialize args...) = Core.Compiler.argextype(args..., VarState[]) code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo +macro code_typed1(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :code_typed1, ex0) +end get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code +macro get_code(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :get_code, ex0) +end # check if `x` is a statement with a given `head` isnew(@nospecialize x) = isexpr(x, :new) @@ -45,3 +52,6 @@ function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...) return length(code) == 1 && isreturn(code[1]) end end +macro fully_eliminated(ex0...) + return gen_call_with_extracted_types_and_kwargs(__module__, :fully_eliminated, ex0) +end From 1dc2ed644597ad5e8c8cf61ec7b7735155028fd1 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 12 Apr 2023 19:19:35 +0900 Subject: [PATCH 004/290] experiment `@nospecializeinfer` on `Core.Compiler` This commit adds `@nospecializeinfer` macro on various `Core.Compiler` functions and achieves the following sysimage size reduction: | | this commit | master | % | | --------------------------------- | ----------- | ----------- | ------- | | `Core.Compiler` compilation (sec) | `66.4551` | `71.0846` | `0.935` | | `corecompiler.jl` (KB) | `17638080` | `18407248` | `0.958` | | `sys.jl` (KB) | `88736432` | `89361280` | `0.993` | | `sys-o.a` (KB) | `189484400` | `189907096` | `0.998` | --- base/compiler/abstractinterpretation.jl | 30 +++++------ base/compiler/abstractlattice.jl | 50 +++++++++---------- base/compiler/typelattice.jl | 66 ++++++++++++------------- base/compiler/typelimits.jl | 23 +++++---- base/compiler/utilities.jl | 4 +- 5 files changed, 86 insertions(+), 87 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 097bd56d913ce..35ffcac8f4279 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2645,18 +2645,18 @@ struct BestguessInfo{Interp<:AbstractInterpreter} end end -function widenreturn(@nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(@nospecialize(rt), info::BestguessInfo) return widenreturn(typeinf_lattice(info.interp), rt, info) end -function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::AbstractLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_noslotwrapper(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::BestguessInfo) if isa(rt, MustAlias) if 1 ≤ rt.slot ≤ info.nargs rt = InterMustAlias(rt) @@ -2668,7 +2668,7 @@ function widenreturn(𝕃ᵢ::MustAliasesLattice, @nospecialize(rt), info::Bestg return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::BestguessInfo) ⊑ᵢ = ⊑(𝕃ᵢ) if !(⊑(ipo_lattice(info.interp), info.bestguess, Bool)) || info.bestguess === Bool # give up inter-procedural constraint back-propagation @@ -2705,7 +2705,7 @@ function widenreturn(𝕃ᵢ::ConditionalsLattice, @nospecialize(rt), info::Best isa(rt, InterConditional) && return rt return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) bestguess = info.bestguess if isa(bestguess, InterConditional) # if the bestguess so far is already `Conditional`, try to convert @@ -2723,7 +2723,7 @@ function bool_rt_to_conditional(@nospecialize(rt), info::BestguessInfo) end return rt end -function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo) +@nospecializeinfer function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::BestguessInfo) ⊑ᵢ = ⊑(typeinf_lattice(info.interp)) old = info.slottypes[slot_id] new = widenslotwrapper(info.changes[slot_id].typ) # avoid nested conditional @@ -2742,13 +2742,13 @@ function bool_rt_to_conditional(@nospecialize(rt), slot_id::Int, info::Bestguess return rt end -function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_partials(𝕃ᵢ, rt, info) end -function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) return widenreturn_partials(𝕃ᵢ, rt, info) end -function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) +@nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo) if isa(rt, PartialStruct) fields = copy(rt.fields) local anyrefine = false @@ -2771,21 +2771,21 @@ function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info: return widenreturn(widenlattice(𝕃ᵢ), rt, info) end -function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) return widenreturn_consts(rt) end -function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(::ConstsLattice, @nospecialize(rt), ::BestguessInfo) return widenreturn_consts(rt) end -function widenreturn_consts(@nospecialize(rt)) +@nospecializeinfer function widenreturn_consts(@nospecialize(rt)) isa(rt, Const) && return rt return widenconst(rt) end -function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) return widenconst(rt) end -function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) +@nospecializeinfer function widenreturn_noslotwrapper(::JLTypeLattice, @nospecialize(rt), ::BestguessInfo) return widenconst(rt) end diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl index a84050816cb21..719b5fcf325e4 100644 --- a/base/compiler/abstractlattice.jl +++ b/base/compiler/abstractlattice.jl @@ -161,7 +161,7 @@ If `𝕃` is `JLTypeLattice`, this is equivalent to subtyping. """ function ⊑ end -⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b +@nospecializeinfer ⊑(::JLTypeLattice, @nospecialize(a::Type), @nospecialize(b::Type)) = a <: b """ ⊏(𝕃::AbstractLattice, a, b) -> Bool @@ -169,7 +169,7 @@ function ⊑ end The strict partial order over the type inference lattice. This is defined as the irreflexive kernel of `⊑`. """ -⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a) +@nospecializeinfer ⊏(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = ⊑(𝕃, a, b) && !⊑(𝕃, b, a) """ ⋤(𝕃::AbstractLattice, a, b) -> Bool @@ -177,7 +177,7 @@ This is defined as the irreflexive kernel of `⊑`. This order could be used as a slightly more efficient version of the strict order `⊏`, where we can safely assume `a ⊑ b` holds. """ -⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a) +@nospecializeinfer ⋤(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) = !⊑(𝕃, b, a) """ is_lattice_equal(𝕃::AbstractLattice, a, b) -> Bool @@ -186,7 +186,7 @@ Check if two lattice elements are partial order equivalent. This is basically `a ⊑ b && b ⊑ a` in the lattice of `𝕃` but (optionally) with extra performance optimizations. """ -function is_lattice_equal(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(𝕃::AbstractLattice, @nospecialize(a), @nospecialize(b)) a === b && return true return ⊑(𝕃, a, b) && ⊑(𝕃, b, a) end @@ -197,14 +197,14 @@ end Determines whether the given lattice element `t` of `𝕃` has non-trivial extended lattice information that would not be available from the type itself. """ -has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) = +@nospecializeinfer has_nontrivial_extended_info(𝕃::AbstractLattice, @nospecialize t) = has_nontrivial_extended_info(widenlattice(𝕃), t) -function has_nontrivial_extended_info(𝕃::PartialsLattice, @nospecialize t) +@nospecializeinfer function has_nontrivial_extended_info(𝕃::PartialsLattice, @nospecialize t) isa(t, PartialStruct) && return true isa(t, PartialOpaque) && return true return has_nontrivial_extended_info(widenlattice(𝕃), t) end -function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) +@nospecializeinfer function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) isa(t, PartialTypeVar) && return true if isa(t, Const) val = t.val @@ -212,7 +212,7 @@ function has_nontrivial_extended_info(𝕃::ConstsLattice, @nospecialize t) end return has_nontrivial_extended_info(widenlattice(𝕃), t) end -has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false +@nospecializeinfer has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false """ is_const_prop_profitable_arg(𝕃::AbstractLattice, t) -> Bool @@ -220,9 +220,9 @@ has_nontrivial_extended_info(::JLTypeLattice, @nospecialize(t)) = false Determines whether the given lattice element `t` of `𝕃` has new extended lattice information that should be forwarded along with constant propagation. """ -is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) = +@nospecializeinfer is_const_prop_profitable_arg(𝕃::AbstractLattice, @nospecialize t) = is_const_prop_profitable_arg(widenlattice(𝕃), t) -function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) +@nospecializeinfer function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) if isa(t, PartialStruct) return true # might be a bit aggressive, may want to enable some check like follows: # for i = 1:length(t.fields) @@ -236,7 +236,7 @@ function is_const_prop_profitable_arg(𝕃::PartialsLattice, @nospecialize t) isa(t, PartialOpaque) && return true return is_const_prop_profitable_arg(widenlattice(𝕃), t) end -function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) +@nospecializeinfer function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) if isa(t, Const) # don't consider mutable values useful constants val = t.val @@ -245,24 +245,24 @@ function is_const_prop_profitable_arg(𝕃::ConstsLattice, @nospecialize t) isa(t, PartialTypeVar) && return false # this isn't forwardable return is_const_prop_profitable_arg(widenlattice(𝕃), t) end -is_const_prop_profitable_arg(::JLTypeLattice, @nospecialize t) = false +@nospecializeinfer is_const_prop_profitable_arg(::JLTypeLattice, @nospecialize t) = false -is_forwardable_argtype(𝕃::AbstractLattice, @nospecialize(x)) = +@nospecializeinfer is_forwardable_argtype(𝕃::AbstractLattice, @nospecialize(x)) = is_forwardable_argtype(widenlattice(𝕃), x) -function is_forwardable_argtype(𝕃::ConditionalsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::ConditionalsLattice, @nospecialize x) isa(x, Conditional) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(𝕃::PartialsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::PartialsLattice, @nospecialize x) isa(x, PartialStruct) && return true isa(x, PartialOpaque) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(𝕃::ConstsLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(𝕃::ConstsLattice, @nospecialize x) isa(x, Const) && return true return is_forwardable_argtype(widenlattice(𝕃), x) end -function is_forwardable_argtype(::JLTypeLattice, @nospecialize x) +@nospecializeinfer function is_forwardable_argtype(::JLTypeLattice, @nospecialize x) return false end @@ -281,9 +281,9 @@ External lattice `𝕃ᵢ::ExternalLattice` may overload: """ function widenreturn end, function widenreturn_noslotwrapper end -is_valid_lattice(𝕃::AbstractLattice, @nospecialize(elem)) = +@nospecializeinfer is_valid_lattice(𝕃::AbstractLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) && is_valid_lattice(widenlattice(𝕃), elem) -is_valid_lattice(𝕃::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) +@nospecializeinfer is_valid_lattice(𝕃::JLTypeLattice, @nospecialize(elem)) = is_valid_lattice_norec(𝕃, elem) has_conditional(𝕃::AbstractLattice) = has_conditional(widenlattice(𝕃)) has_conditional(::AnyConditionalsLattice) = true @@ -306,12 +306,12 @@ has_extended_unionsplit(::JLTypeLattice) = false const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance) const fallback_ipo_lattice = InferenceLattice(IPOResultLattice.instance) -⊑(@nospecialize(a), @nospecialize(b)) = ⊑(fallback_lattice, a, b) -tmeet(@nospecialize(a), @nospecialize(b)) = tmeet(fallback_lattice, a, b) -tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b) -⊏(@nospecialize(a), @nospecialize(b)) = ⊏(fallback_lattice, a, b) -⋤(@nospecialize(a), @nospecialize(b)) = ⋤(fallback_lattice, a, b) -is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⊑ @nospecialize(b) = ⊑(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⊏ @nospecialize(b) = ⊏(fallback_lattice, a, b) +@nospecializeinfer @nospecialize(a) ⋤ @nospecialize(b) = ⋤(fallback_lattice, a, b) +@nospecializeinfer tmeet(@nospecialize(a), @nospecialize(b)) = tmeet(fallback_lattice, a, b) +@nospecializeinfer tmerge(@nospecialize(a), @nospecialize(b)) = tmerge(fallback_lattice, a, b) +@nospecializeinfer is_lattice_equal(@nospecialize(a), @nospecialize(b)) = is_lattice_equal(fallback_lattice, a, b) # Widenlattice with argument widenlattice(::JLTypeLattice, @nospecialize(t)) = widenconst(t) diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl index 700a6d333cbc4..75071d2a8a2e0 100644 --- a/base/compiler/typelattice.jl +++ b/base/compiler/typelattice.jl @@ -244,7 +244,7 @@ const CompilerTypes = Union{MaybeUndef, Const, Conditional, MustAlias, NotFound, # slot wrappers # ============= -function assert_nested_slotwrapper(@nospecialize t) +@nospecializeinfer function assert_nested_slotwrapper(@nospecialize t) @assert !(t isa Conditional) "found nested Conditional" @assert !(t isa InterConditional) "found nested InterConditional" @assert !(t isa MustAlias) "found nested MustAlias" @@ -252,7 +252,7 @@ function assert_nested_slotwrapper(@nospecialize t) return t end -function widenslotwrapper(@nospecialize typ) +@nospecializeinfer function widenslotwrapper(@nospecialize typ) if isa(typ, AnyConditional) return widenconditional(typ) elseif isa(typ, AnyMustAlias) @@ -261,7 +261,7 @@ function widenslotwrapper(@nospecialize typ) return typ end -function widenwrappedslotwrapper(@nospecialize typ) +@nospecializeinfer function widenwrappedslotwrapper(@nospecialize typ) if isa(typ, LimitedAccuracy) return LimitedAccuracy(widenslotwrapper(typ.typ), typ.causes) end @@ -271,7 +271,7 @@ end # Conditional # =========== -function widenconditional(@nospecialize typ) +@nospecializeinfer function widenconditional(@nospecialize typ) if isa(typ, AnyConditional) if typ.thentype === Union{} return Const(false) @@ -285,7 +285,7 @@ function widenconditional(@nospecialize typ) end return typ end -function widenwrappedconditional(@nospecialize typ) +@nospecializeinfer function widenwrappedconditional(@nospecialize typ) if isa(typ, LimitedAccuracy) return LimitedAccuracy(widenconditional(typ.typ), typ.causes) end @@ -294,7 +294,7 @@ end # `Conditional` and `InterConditional` are valid in opposite contexts # (i.e. local inference and inter-procedural call), as such they will never be compared -function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional} +@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional} if is_same_conditionals(a, b) if ⊑(lattice, a.thentype, b.thentype) if ⊑(lattice, a.elsetype, b.elsetype) @@ -307,7 +307,7 @@ end is_same_conditionals(a::C, b::C) where C<:AnyConditional = a.slot == b.slot -is_lattice_bool(lattice::AbstractLattice, @nospecialize(typ)) = typ !== Bottom && ⊑(lattice, typ, Bool) +@nospecializeinfer is_lattice_bool(lattice::AbstractLattice, @nospecialize(typ)) = typ !== Bottom && ⊑(lattice, typ, Bool) maybe_extract_const_bool(c::Const) = (val = c.val; isa(val, Bool)) ? val : nothing function maybe_extract_const_bool(c::AnyConditional) @@ -315,12 +315,12 @@ function maybe_extract_const_bool(c::AnyConditional) (c.elsetype === Bottom && !(c.thentype === Bottom)) && return true nothing end -maybe_extract_const_bool(@nospecialize c) = nothing +@nospecializeinfer maybe_extract_const_bool(@nospecialize c) = nothing # MustAlias # ========= -function widenmustalias(@nospecialize typ) +@nospecializeinfer function widenmustalias(@nospecialize typ) if isa(typ, AnyMustAlias) return typ.fldtyp elseif isa(typ, LimitedAccuracy) @@ -329,13 +329,13 @@ function widenmustalias(@nospecialize typ) return typ end -function isalreadyconst(@nospecialize t) +@nospecializeinfer function isalreadyconst(@nospecialize t) isa(t, Const) && return true isa(t, DataType) && isdefined(t, :instance) && return true return isconstType(t) end -function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) +@nospecializeinfer function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) t = widenconst(objtyp) if isa(fldval, Int) fldidx = fldval @@ -352,7 +352,7 @@ function maybe_const_fldidx(@nospecialize(objtyp), @nospecialize(fldval)) return fldidx end -function form_mustalias_conditional(alias::MustAlias, @nospecialize(thentype), @nospecialize(elsetype)) +@nospecializeinfer function form_mustalias_conditional(alias::MustAlias, @nospecialize(thentype), @nospecialize(elsetype)) (; slot, vartyp, fldidx) = alias if isa(vartyp, PartialStruct) fields = vartyp.fields @@ -401,7 +401,7 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ # lattice order # ============= -function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) r || return false isa(b, LimitedAccuracy) || return true @@ -420,7 +420,7 @@ function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) return b.causes ⊆ a.causes end -function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) if isa(a, MaybeUndef) isa(b, MaybeUndef) || return false a, b = a.typ, b.typ @@ -430,7 +430,7 @@ function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(lattice), a, b) end -function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) # Fast paths for common cases b === Any && return true a === Any && return false @@ -450,7 +450,7 @@ function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b) return ⊑(widenlattice(lattice), a, b) end -function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) MustAliasT = isa(𝕃, MustAliasesLattice) ? MustAlias : InterMustAlias if isa(a, MustAliasT) if isa(b, MustAliasT) @@ -463,7 +463,7 @@ function ⊑(𝕃::AnyMustAliasesLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(𝕃), a, b) end -function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, PartialStruct) if isa(b, PartialStruct) if !(length(a.fields) == length(b.fields) && a.typ <: b.typ) @@ -526,7 +526,7 @@ function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(lattice), a, b) end -function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, Const) if isa(b, Const) return a.val === b.val @@ -548,7 +548,7 @@ function ⊑(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) return ⊑(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b)) if isa(a, LimitedAccuracy) isa(b, LimitedAccuracy) || return false a.causes == b.causes || return false @@ -560,7 +560,7 @@ function is_lattice_equal(lattice::InferenceLattice, @nospecialize(a), @nospecia return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) if isa(a, MaybeUndef) || isa(b, MaybeUndef) # TODO: Unwrap these and recurse to is_lattice_equal return ⊑(lattice, a, b) && ⊑(lattice, b, a) @@ -568,7 +568,7 @@ function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecia return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional if isa(a, ConditionalT) || isa(b, ConditionalT) # TODO: Unwrap these and recurse to is_lattice_equal @@ -577,7 +577,7 @@ function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @no return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b)) if isa(a, PartialStruct) isa(b, PartialStruct) || return false length(a.fields) == length(b.fields) || return false @@ -600,7 +600,7 @@ function is_lattice_equal(lattice::PartialsLattice, @nospecialize(a), @nospecial return is_lattice_equal(widenlattice(lattice), a, b) end -function is_lattice_equal(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) +@nospecializeinfer function is_lattice_equal(lattice::ConstsLattice, @nospecialize(a), @nospecialize(b)) a === b && return true if a isa Const if issingletontype(b) @@ -625,7 +625,7 @@ end # lattice operations # ================== -function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, PartialStruct) has_free_typevars(t) && return v widev = widenconst(v) @@ -663,7 +663,7 @@ function tmeet(lattice::PartialsLattice, @nospecialize(v), @nospecialize(t::Type return tmeet(widenlattice(lattice), v, t) end -function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, Const) if !has_free_typevars(t) && !isa(v.val, t) return Bottom @@ -673,7 +673,7 @@ function tmeet(lattice::ConstsLattice, @nospecialize(v), @nospecialize(t::Type)) tmeet(widenlattice(lattice), widenconst(v), t) end -function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, Conditional) if !(Bool <: t) return Bottom @@ -683,33 +683,33 @@ function tmeet(lattice::ConditionalsLattice, @nospecialize(v), @nospecialize(t:: tmeet(widenlattice(lattice), v, t) end -function tmeet(𝕃::MustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(𝕃::MustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, MustAlias) v = widenmustalias(v) end return tmeet(widenlattice(𝕃), v, t) end -function tmeet(lattice::InferenceLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::InferenceLattice, @nospecialize(v), @nospecialize(t::Type)) # TODO: This can probably happen and should be handled @assert !isa(v, LimitedAccuracy) tmeet(widenlattice(lattice), v, t) end -function tmeet(lattice::InterConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::InterConditionalsLattice, @nospecialize(v), @nospecialize(t::Type)) # TODO: This can probably happen and should be handled @assert !isa(v, AnyConditional) tmeet(widenlattice(lattice), v, t) end -function tmeet(𝕃::InterMustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(𝕃::InterMustAliasesLattice, @nospecialize(v), @nospecialize(t::Type)) if isa(v, InterMustAlias) v = widenmustalias(v) end return tmeet(widenlattice(𝕃), v, t) end -function tmeet(lattice::OptimizerLattice, @nospecialize(v), @nospecialize(t::Type)) +@nospecializeinfer function tmeet(lattice::OptimizerLattice, @nospecialize(v), @nospecialize(t::Type)) # TODO: This can probably happen and should be handled @assert !isa(v, MaybeUndef) tmeet(widenlattice(lattice), v, t) @@ -727,7 +727,7 @@ widenconst(m::MaybeUndef) = widenconst(m.typ) widenconst(::PartialTypeVar) = TypeVar widenconst(t::PartialStruct) = t.typ widenconst(t::PartialOpaque) = t.typ -widenconst(t::Type) = t +@nospecializeinfer widenconst(@nospecialize t::Type) = t widenconst(::TypeVar) = error("unhandled TypeVar") widenconst(::TypeofVararg) = error("unhandled Vararg") widenconst(::LimitedAccuracy) = error("unhandled LimitedAccuracy") @@ -743,7 +743,7 @@ function smerge(lattice::AbstractLattice, sa::Union{NotFound,VarState}, sb::Unio return VarState(tmerge(lattice, sa.typ, sb.typ), sa.undef | sb.undef) end -@inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) = +@nospecializeinfer @inline schanged(lattice::AbstractLattice, @nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !(n.undef <= o.undef && ⊑(lattice, n.typ, o.typ)))) # remove any lattice elements that wrap the reassigned slot object from the vartable diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 191820951fae1..957796f6f5c49 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -304,7 +304,7 @@ end # A simplified type_more_complex query over the extended lattice # (assumes typeb ⊑ typea) -function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb)) typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference @assert !isa(typea, LimitedAccuracy) && !isa(typeb, LimitedAccuracy) "LimitedAccuracy not supported by simplertype lattice" # n.b. the caller was supposed to handle these @@ -415,7 +415,7 @@ function merge_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceSt end end -@noinline function tmerge_limited(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer @noinline function tmerge_limited(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) typea === Union{} && return typeb typeb === Union{} && return typea @@ -466,7 +466,7 @@ end return LimitedAccuracy(tmerge(widenlattice(lattice), typea, typeb), causes) end -function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(typeb)) if isa(typea, LimitedAccuracy) || isa(typeb, LimitedAccuracy) return tmerge_limited(lattice, typea, typeb) end @@ -476,7 +476,7 @@ function tmerge(lattice::InferenceLattice, @nospecialize(typea), @nospecialize(t return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) # type-lattice for Conditional wrapper (NOTE never be merged with InterConditional) if isa(typea, Conditional) && isa(typeb, Const) if typeb.val === true @@ -511,7 +511,7 @@ function tmerge(lattice::ConditionalsLattice, @nospecialize(typea), @nospecializ return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospecialize(typeb)) # type-lattice for InterConditional wrapper (NOTE never be merged with Conditional) if isa(typea, InterConditional) && isa(typeb, Const) if typeb.val === true @@ -546,7 +546,7 @@ function tmerge(lattice::InterConditionalsLattice, @nospecialize(typea), @nospec return tmerge(widenlattice(lattice), typea, typeb) end -function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb)) typea = widenmustalias(typea) typeb = widenmustalias(typeb) return tmerge(widenlattice(𝕃), typea, typeb) @@ -554,7 +554,7 @@ end # N.B. This can also be called with both typea::Const and typeb::Const to # to recover PartialStruct from `Const`s with overlapping fields. -function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) aty = widenconst(typea) bty = widenconst(typeb) if aty === bty @@ -612,7 +612,7 @@ function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @ return nothing end -function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) # type-lattice for Const and PartialStruct wrappers aps = isa(typea, PartialStruct) bps = isa(typeb, PartialStruct) @@ -655,8 +655,7 @@ function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(ty return tmerge(wl, typea, typeb) end - -function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb)) +@nospecializeinfer function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(typeb)) acp = isa(typea, Const) || isa(typea, PartialTypeVar) bcp = isa(typeb, Const) || isa(typeb, PartialTypeVar) if acp && bcp @@ -668,7 +667,7 @@ function tmerge(lattice::ConstsLattice, @nospecialize(typea), @nospecialize(type return tmerge(wl, typea, typeb) end -function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type)) +@nospecializeinfer function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb::Type)) # it's always ok to form a Union of two concrete types act = isconcretetype(typea) bct = isconcretetype(typeb) @@ -684,7 +683,7 @@ function tmerge(::JLTypeLattice, @nospecialize(typea::Type), @nospecialize(typeb return tmerge_types_slow(typea, typeb) end -@noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type)) +@nospecializeinfer @noinline function tmerge_types_slow(@nospecialize(typea::Type), @nospecialize(typeb::Type)) # collect the list of types from past tmerge calls returning Union # and then reduce over that list types = Any[] diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index cb5f916e76914..e7ce41a3be92a 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -327,7 +327,7 @@ end # types # ######### -function singleton_type(@nospecialize(ft)) +@nospecializeinfer function singleton_type(@nospecialize(ft)) ft = widenslotwrapper(ft) if isa(ft, Const) return ft.val @@ -339,7 +339,7 @@ function singleton_type(@nospecialize(ft)) return nothing end -function maybe_singleton_const(@nospecialize(t)) +@nospecializeinfer function maybe_singleton_const(@nospecialize(t)) if isa(t, DataType) if issingletontype(t) return Const(t.instance) From 2e7f2ef6c029d1e1ca152d8cd40c04e77cf77a0f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Fri, 5 May 2023 02:08:23 -0400 Subject: [PATCH 005/290] timing: Introduce `JL_TIMING_CREATE_BLOCK` to separate alloc/init This includes several changes to the TIMING API: - Adds `JL_TIMING_CREATE_BLOCK(block, subsystem, event)` to create a timing block _without_ starting it - Adds `jl_timing_block_start` to start a timing block which was created with JL_TIMING_CREATE_BLOCK - Removes the C++-specific RAII implementation for JL_TIMING. Although it'd be nice to support JL_TIMING without GCC/Clang, the reality is that the C API prevents that from being achievable. - Renames `JL_TIMING_CURRENT_BLOCK` to `JL_TIMING_DEFAULT_BLOCK` To summarize, `JL_TIMING(subsystem, event)` is now equivalent to: ``` JL_TIMING_CREATE(__timing_block, subsystem, event); jl_timing_block_start(&__timing_block); ``` which also means that conditional events can be supported with: ``` JL_TIMING_CREATE(__timing_block, subsystem, event); if (condition) jl_timing_block_start(&__timing_block); ``` --- src/ast.c | 2 +- src/codegen.cpp | 4 +- src/dlload.c | 6 +- src/gc.c | 6 +- src/gf.c | 4 +- src/jitlayers.cpp | 7 +- src/method.c | 2 +- src/safepoint.c | 2 +- src/staticdata.c | 2 +- src/timing.c | 14 ++-- src/timing.h | 165 ++++++++++++++++++++-------------------------- src/toplevel.c | 2 +- 12 files changed, 98 insertions(+), 118 deletions(-) diff --git a/src/ast.c b/src/ast.c index 97bbc6e8227ba..7333d856bd177 100644 --- a/src/ast.c +++ b/src/ast.c @@ -784,7 +784,7 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len, size_t offset, jl_value_t *options) { JL_TIMING(PARSING, PARSING); - jl_timing_show_filename(jl_string_data(filename), JL_TIMING_CURRENT_BLOCK); + jl_timing_show_filename(jl_string_data(filename), JL_TIMING_DEFAULT_BLOCK); if (offset > text_len) { jl_value_t *textstr = jl_pchar_to_string(text, text_len); JL_GC_PUSH1(&textstr); diff --git a/src/codegen.cpp b/src/codegen.cpp index a5d54f16ed2e6..cfc2d8ff24b0c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8517,7 +8517,7 @@ jl_llvm_functions_t jl_emit_code( jl_codegen_params_t ¶ms) { JL_TIMING(CODEGEN, CODEGEN_LLVM); - jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_DEFAULT_BLOCK); // caller must hold codegen_lock jl_llvm_functions_t decls = {}; assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache || @@ -8579,7 +8579,7 @@ jl_llvm_functions_t jl_emit_codeinst( jl_codegen_params_t ¶ms) { JL_TIMING(CODEGEN, CODEGEN_Codeinst); - jl_timing_show_method_instance(codeinst->def, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(codeinst->def, JL_TIMING_DEFAULT_BLOCK); JL_GC_PUSH1(&src); if (!src) { src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); diff --git a/src/dlload.c b/src/dlload.c index 3fb5a08ba2438..4e9e9c9ce48fc 100644 --- a/src/dlload.c +++ b/src/dlload.c @@ -284,7 +284,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, JL_TIMING(DL_OPEN, DL_OPEN); if (!(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, modname); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, modname); // Detect if our `modname` is something like `@rpath/libfoo.dylib` #ifdef _OS_DARWIN_ @@ -342,7 +342,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, #endif handle = jl_dlopen(path, flags); if (handle && !(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle)); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle)); if (handle) return handle; #ifdef _OS_WINDOWS_ @@ -364,7 +364,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, snprintf(path, PATHBUF, "%s%s", modname, ext); handle = jl_dlopen(path, flags); if (handle && !(flags & JL_RTLD_NOLOAD)) - jl_timing_puts(JL_TIMING_CURRENT_BLOCK, jl_pathname_for_handle(handle)); + jl_timing_puts(JL_TIMING_DEFAULT_BLOCK, jl_pathname_for_handle(handle)); if (handle) return handle; #ifdef _OS_WINDOWS_ diff --git a/src/gc.c b/src/gc.c index fee39b9e7db19..f124de3f1e9c2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -205,7 +205,7 @@ void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads) { JL_TIMING(GC, GC_Stop); #ifdef USE_TRACY - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); + TracyCZoneCtx ctx = JL_TIMING_DEFAULT_BLOCK->tracy_ctx; TracyCZoneColor(ctx, 0x696969); #endif assert(gc_n_threads); @@ -3310,7 +3310,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_TIMING(GC, GC_Sweep); #ifdef USE_TRACY if (sweep_full) { - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); + TracyCZoneCtx ctx = *(JL_TIMING_DEFAULT_BLOCK->tracy_ctx); TracyCZoneColor(ctx, 0xFFA500); } #endif @@ -3456,7 +3456,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) return; } - JL_TIMING_SUSPEND(GC, ct); + JL_TIMING_SUSPEND_TASK(GC, ct); JL_TIMING(GC, GC); int last_errno = errno; diff --git a/src/gf.c b/src/gf.c index 431443dbbf451..8bfbad4b0f7ca 100644 --- a/src/gf.c +++ b/src/gf.c @@ -367,7 +367,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force) fargs[1] = (jl_value_t*)mi; fargs[2] = jl_box_ulong(world); - jl_timing_show_method_instance(mi, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(mi, JL_TIMING_DEFAULT_BLOCK); #ifdef TRACE_INFERENCE if (mi->specTypes != (jl_value_t*)jl_emptytuple_type) { jl_printf(JL_STDERR,"inference on "); @@ -1986,7 +1986,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method JL_TIMING(ADD_METHOD, ADD_METHOD); assert(jl_is_method(method)); assert(jl_is_mtable(mt)); - jl_timing_show_method(method, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method(method, JL_TIMING_DEFAULT_BLOCK); jl_value_t *type = method->sig; jl_value_t *oldvalue = NULL; jl_array_t *oldmi = NULL; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 895fed9c056e2..15131ebeae4ae 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -190,8 +190,7 @@ static jl_callptr_t _jl_compile_codeinst( JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); #ifdef USE_TRACY if (is_recompile) { - TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx); - TracyCZoneColor(ctx, 0xFFA500); + TracyCZoneColor(JL_TIMING_DEFAULT_BLOCK->tracy_ctx, 0xFFA500); } #endif jl_callptr_t fptr = NULL; @@ -252,7 +251,7 @@ static jl_callptr_t _jl_compile_codeinst( for (auto &def : emitted) { jl_code_instance_t *this_code = def.first; if (i < jl_timing_print_limit) - jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK); jl_llvm_functions_t decls = std::get<1>(def.second); jl_callptr_t addr; @@ -301,7 +300,7 @@ static jl_callptr_t _jl_compile_codeinst( i++; } if (i > jl_timing_print_limit) - jl_timing_printf(JL_TIMING_CURRENT_BLOCK, "... <%d methods truncated>", i - 10); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "... <%d methods truncated>", i - 10); uint64_t end_time = 0; if (timed) diff --git a/src/method.c b/src/method.c index c207149032fb9..be18ead18eadf 100644 --- a/src/method.c +++ b/src/method.c @@ -569,7 +569,7 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, siz JL_TIMING(STAGED_FUNCTION, STAGED_FUNCTION); jl_value_t *tt = linfo->specTypes; jl_method_t *def = linfo->def.method; - jl_timing_show_method_instance(linfo, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_method_instance(linfo, JL_TIMING_DEFAULT_BLOCK); jl_value_t *generator = def->generator; assert(generator != NULL); assert(jl_is_method(def)); diff --git a/src/safepoint.c b/src/safepoint.c index 19eca4bf6f00d..d64df084b0349 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -151,7 +151,7 @@ void jl_safepoint_end_gc(void) void jl_safepoint_wait_gc(void) { jl_task_t *ct = jl_current_task; (void)ct; - JL_TIMING_SUSPEND(GC_SAFEPOINT, ct); + JL_TIMING_SUSPEND_TASK(GC_SAFEPOINT, ct); // The thread should have set this is already assert(jl_atomic_load_relaxed(&ct->ptls->gc_state) != 0); // Use normal volatile load in the loop for speed until GC finishes. diff --git a/src/staticdata.c b/src/staticdata.c index 29d745770439b..435148581f4fc 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3367,7 +3367,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname) { JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg); - jl_timing_printf(JL_TIMING_CURRENT_BLOCK, pkgname); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname); uint64_t checksum = 0; int64_t dataendpos = 0; int64_t datastartpos = 0; diff --git a/src/timing.c b/src/timing.c index 3290e68ee9169..9753187c7565d 100644 --- a/src/timing.c +++ b/src/timing.c @@ -44,7 +44,7 @@ JL_DLLEXPORT uint32_t jl_timing_print_limit = 10; const char *jl_timing_names[(int)JL_TIMING_LAST] = { #define X(name) #name, - JL_TIMING_OWNERS + JL_TIMING_SUBSYSTEMS #undef X }; @@ -187,7 +187,7 @@ JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block) if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif } @@ -197,7 +197,7 @@ JL_DLLEXPORT void jl_timing_show_module(jl_module_t *m, jl_timing_block_t *cur_b jl_module_t *root = jl_module_root(m); if (root == m || root == jl_main_module) { const char *module_name = jl_symbol_name(m->name); - TracyCZoneText(*(cur_block->tracy_ctx), module_name, strlen(module_name)); + TracyCZoneText(cur_block->tracy_ctx, module_name, strlen(module_name)); } else { jl_timing_printf(cur_block, "%s.%s", jl_symbol_name(root->name), jl_symbol_name(m->name)); } @@ -208,7 +208,7 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c { #ifdef USE_TRACY const char *filename = gnu_basename(path); - TracyCZoneText(*(cur_block->tracy_ctx), filename, strlen(filename)); + TracyCZoneText(cur_block->tracy_ctx, filename, strlen(filename)); #endif } @@ -243,7 +243,7 @@ JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_ if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif } @@ -261,7 +261,7 @@ JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *for if (buf.size == buf.maxsize) memset(&buf.buf[IOS_INLSIZE - 3], '.', 3); - TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size); + TracyCZoneText(cur_block->tracy_ctx, buf.buf, buf.size); #endif va_end(args); } @@ -269,7 +269,7 @@ JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *for JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str) { #ifdef USE_TRACY - TracyCZoneText(*(cur_block->tracy_ctx), str, strlen(str)); + TracyCZoneText(cur_block->tracy_ctx, str, strlen(str)); #endif } diff --git a/src/timing.h b/src/timing.h index 73614864ea733..ec20ca2cfbf6a 100644 --- a/src/timing.h +++ b/src/timing.h @@ -55,9 +55,7 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; } #endif -#ifdef __cplusplus -#define HAVE_TIMING_SUPPORT -#elif defined(_COMPILER_CLANG_) +#if defined(_COMPILER_CLANG_) #define HAVE_TIMING_SUPPORT #elif defined(_COMPILER_GCC_) #define HAVE_TIMING_SUPPORT @@ -70,7 +68,9 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #if !defined( ENABLE_TIMINGS ) || !defined( HAVE_TIMING_SUPPORT ) #define JL_TIMING(subsystem, event) -#define JL_TIMING_SUSPEND(subsystem, ct) +#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event) + +#define JL_TIMING_SUSPEND_TASK(subsystem, ct) #define jl_timing_show(v, b) #define jl_timing_show_module(m, b) @@ -81,6 +81,7 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #define jl_timing_printf(b, f, ...) #define jl_timing_puts(b, s) #define jl_timing_init_task(t) +#define jl_timing_block_start(blk) #define jl_timing_block_enter_task(ct, ptls, blk) #define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL) #define jl_pop_timing_block(blk) @@ -127,17 +128,14 @@ JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t * JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); + #ifdef __cplusplus } #endif -#ifdef __cplusplus -#define JL_TIMING_CURRENT_BLOCK (&__timing_block.block) -#else -#define JL_TIMING_CURRENT_BLOCK (&__timing_block) -#endif +#define JL_TIMING_DEFAULT_BLOCK (&__timing_block) -#define JL_TIMING_OWNERS \ +#define JL_TIMING_SUBSYSTEMS \ X(ROOT) \ X(GC) \ X(LOWERING) \ @@ -171,7 +169,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); #define JL_TIMING_EVENTS \ - JL_TIMING_OWNERS \ + JL_TIMING_SUBSYSTEMS \ X(GC_Stop) \ X(GC_Mark) \ X(GC_Sweep) \ @@ -200,9 +198,9 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(ImageSize) \ -enum jl_timing_owners { +enum jl_timing_subsystems { #define X(name) JL_TIMING_ ## name, - JL_TIMING_OWNERS + JL_TIMING_SUBSYSTEMS #undef X JL_TIMING_LAST }; @@ -232,14 +230,14 @@ enum jl_timing_counter_types { #ifdef USE_TIMING_COUNTS #define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_CTOR(block, owner) _jl_timing_counts_ctor(block, owner) -#define _COUNTS_DESTROY(block) _jl_timing_counts_destroy(block) +#define _COUNTS_CTOR(block) _jl_timing_counts_ctor(block) +#define _COUNTS_DESTROY(block, subsystem) _jl_timing_counts_destroy(block, subsystem) #define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) #define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) #else #define _COUNTS_CTX_MEMBER -#define _COUNTS_CTOR(block, owner) -#define _COUNTS_DESTROY(block) +#define _COUNTS_CTOR(block) +#define _COUNTS_DESTROY(block, subsystem) #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) #endif @@ -249,24 +247,24 @@ enum jl_timing_counter_types { **/ #ifdef USE_TRACY -#define _TRACY_CTX_MEMBER TracyCZoneCtx *tracy_ctx; -#define _TRACY_CTOR(context, name, enable) TracyCZoneN(__tracy_ctx, name, (enable)); \ - (context) = &__tracy_ctx -#define _TRACY_DESTROY(ctx) TracyCZoneEnd(*ctx) +#define _TRACY_CTX_MEMBER TracyCZoneCtx tracy_ctx; const struct ___tracy_source_location_data *tracy_srcloc; +#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; \ + (block)->tracy_srcloc = &TracyConcat(__tracy_source_location,TracyLine) +#define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 ); +#define _TRACY_STOP(ctx) TracyCZoneEnd(*ctx) #else #define _TRACY_CTX_MEMBER -#define _TRACY_CTOR(context, name, enable) -#define _TRACY_DESTROY(block) +#define _TRACY_CTOR(block, name) +#define _TRACY_START(block) +#define _TRACY_STOP(block) #endif #ifdef USE_ITTAPI -#define _ITTAPI_CTX_MEMBER int owner; int event; -#define _ITTAPI_CTOR(block, owner, event) block->owner = owner; block->event = event -#define _ITTAPI_START(block) if (_jl_timing_enabled(block->owner)) __itt_event_start(jl_timing_ittapi_events[block->event]) -#define _ITTAPI_STOP(block) if (_jl_timing_enabled(block->owner)) __itt_event_end(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_CTX_MEMBER +#define _ITTAPI_START(block) __itt_event_start(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_STOP(block) __itt_event_end(jl_timing_ittapi_events[block->event]) #else #define _ITTAPI_CTX_MEMBER -#define _ITTAPI_CTOR(block, owner, event) #define _ITTAPI_START(block) #define _ITTAPI_STOP(block) #endif @@ -279,7 +277,6 @@ extern JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST]; typedef struct _jl_timing_counts_t { uint64_t total; uint64_t t0; - int owner; #ifdef JL_DEBUG_BUILD uint8_t running; #endif @@ -301,16 +298,15 @@ STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t block->t0 = t; } -STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, int owner) JL_NOTSAFEPOINT { - block->owner = owner; +STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block) JL_NOTSAFEPOINT { block->total = 0; #ifdef JL_DEBUG_BUILD block->running = 0; #endif } -STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block) JL_NOTSAFEPOINT { - jl_timing_counts[block->owner] += block->total; +STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subsystem) JL_NOTSAFEPOINT { + jl_timing_counts[subsystem] += block->total; } /** @@ -325,46 +321,62 @@ extern JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAS struct _jl_timing_block_t { // typedef in julia.h struct _jl_timing_block_t *prev; + _TRACY_CTX_MEMBER _ITTAPI_CTX_MEMBER _COUNTS_CTX_MEMBER + + int subsystem; + int event; + int8_t is_running; }; -STATIC_INLINE int _jl_timing_enabled(int event) JL_NOTSAFEPOINT { - return !!(jl_timing_enable_mask & (1 << event)); +STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { + return (jl_timing_enable_mask & (1 << subsystem)) != 0; } -STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner, int event) JL_NOTSAFEPOINT { +STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { + assert(!block->is_running); + if (!_jl_timing_enabled(block->subsystem)) return; + uint64_t t = cycleclock(); (void)t; - _COUNTS_CTOR(&block->counts_ctx, owner); _COUNTS_START(&block->counts_ctx, t); - _ITTAPI_CTOR(block, owner, event); _ITTAPI_START(block); + _TRACY_START(block); - jl_task_t *ct = jl_current_task; - jl_timing_block_t **prevp = &ct->ptls->timing_stack; + jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; block->prev = *prevp; + block->is_running = 1; if (block->prev) { _COUNTS_STOP(&block->prev->counts_ctx, t); } *prevp = block; } -STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { - uint64_t t = cycleclock(); (void)t; - - _ITTAPI_STOP(block); - _COUNTS_STOP(&block->counts_ctx, t); - _COUNTS_DESTROY(&block->counts_ctx); - _TRACY_DESTROY(block->tracy_ctx); +STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int subsystem, int event) JL_NOTSAFEPOINT { + block->subsystem = subsystem; + block->event = event; + block->is_running = 0; + _COUNTS_CTOR(&block->counts_ctx); +} - jl_task_t *ct = jl_current_task; - jl_timing_block_t **pcur = &ct->ptls->timing_stack; - assert(*pcur == block); - *pcur = block->prev; - if (block->prev) { - _COUNTS_START(&block->prev->counts_ctx, t); +STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { + if (block->is_running) { + uint64_t t = cycleclock(); (void)t; + _ITTAPI_STOP(block); + _COUNTS_STOP(&block->counts_ctx, t); + _TRACY_STOP(&block->tracy_ctx); + + jl_task_t *ct = jl_current_task; + jl_timing_block_t **pcur = &ct->ptls->timing_stack; + assert(*pcur == block); + *pcur = block->prev; + if (block->prev) { + _COUNTS_START(&block->prev->counts_ctx, t); + } } + + _COUNTS_DESTROY(&block->counts_ctx, block->subsystem); } typedef struct _jl_timing_suspend_t { @@ -384,51 +396,20 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N #endif } -#ifdef __cplusplus -struct jl_timing_block_cpp_t { - jl_timing_block_t block; - jl_timing_block_cpp_t(int owner, int event) JL_NOTSAFEPOINT { - _jl_timing_block_ctor(&block, owner, event); - } - ~jl_timing_block_cpp_t() JL_NOTSAFEPOINT { - _jl_timing_block_destroy(&block); - } - jl_timing_block_cpp_t(const jl_timing_block_cpp_t&) = delete; - jl_timing_block_cpp_t(const jl_timing_block_cpp_t&&) = delete; - jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &) = delete; - jl_timing_block_cpp_t& operator=(const jl_timing_block_cpp_t &&) = delete; -}; -#define JL_TIMING(subsystem, event) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(__timing_block.block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1) -#else #define JL_TIMING(subsystem, event) \ + JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \ + jl_timing_block_start(&__timing_block) + +#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event) \ __attribute__((cleanup(_jl_timing_block_destroy))) \ - jl_timing_block_t __timing_block; \ - _jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(__timing_block.tracy_ctx, #event, (jl_timing_enable_mask >> (JL_TIMING_ ## subsystem)) & 1) -#endif + jl_timing_block_t new_block_name; \ + _jl_timing_block_ctor(&new_block_name, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ + _TRACY_CTOR(&new_block_name, #event) -#ifdef __cplusplus -struct jl_timing_suspend_cpp_t { - jl_timing_suspend_t suspend; - jl_timing_suspend_cpp_t(const char *subsystem, jl_task_t *ct) JL_NOTSAFEPOINT { - _jl_timing_suspend_ctor(&suspend, subsystem, ct); - } - ~jl_timing_suspend_cpp_t() JL_NOTSAFEPOINT { - _jl_timing_suspend_destroy(&suspend); - } - jl_timing_suspend_cpp_t(const jl_timing_suspend_cpp_t &) = delete; - jl_timing_suspend_cpp_t(jl_timing_suspend_cpp_t &&) = delete; - jl_timing_suspend_cpp_t& operator=(const jl_timing_suspend_cpp_t &) = delete; - jl_timing_suspend_cpp_t& operator=(jl_timing_suspend_cpp_t &&) = delete; -}; -#define JL_TIMING_SUSPEND(subsystem, ct) jl_timing_suspend_cpp_t __suspend_block(#subsystem, ct) -#else -#define JL_TIMING_SUSPEND(subsystem, ct) \ +#define JL_TIMING_SUSPEND_TASK(subsystem, ct) \ __attribute__((cleanup(_jl_timing_suspend_destroy))) \ jl_timing_suspend_t __timing_suspend; \ _jl_timing_suspend_ctor(&__timing_suspend, #subsystem, ct) -#endif // Counting #ifdef USE_ITTAPI diff --git a/src/toplevel.c b/src/toplevel.c index 200d0ad220231..8f148727e0249 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -65,7 +65,7 @@ static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROO void jl_module_run_initializer(jl_module_t *m) { JL_TIMING(INIT_MODULE, INIT_MODULE); - jl_timing_show_module(m, JL_TIMING_CURRENT_BLOCK); + jl_timing_show_module(m, JL_TIMING_DEFAULT_BLOCK); jl_function_t *f = jl_module_get_initializer(m); if (f == NULL) return; From f9c9d2239e01572464ac0c78c39a6501d2b545c3 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Fri, 5 May 2023 13:00:17 -0400 Subject: [PATCH 006/290] Split GC_Sweep JL_TIMING event into incremental/full versions --- src/gc.c | 11 ++++++----- src/timing.h | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/gc.c b/src/gc.c index f124de3f1e9c2..586653f7b0b91 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3307,12 +3307,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t start_sweep_time = jl_hrtime(); JL_PROBE_GC_SWEEP_BEGIN(sweep_full); { - JL_TIMING(GC, GC_Sweep); + JL_TIMING_CREATE_BLOCK(incremental_timing_block, + GC, GC_IncrementalSweep); + JL_TIMING_CREATE_BLOCK(full_timing_block, + GC, GC_FullSweep); + jl_timing_block_start(sweep_full ? &full_timing_block : &incremental_timing_block); #ifdef USE_TRACY - if (sweep_full) { - TracyCZoneCtx ctx = *(JL_TIMING_DEFAULT_BLOCK->tracy_ctx); - TracyCZoneColor(ctx, 0xFFA500); - } + TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500); #endif sweep_weak_refs(); sweep_stack_pools(); diff --git a/src/timing.h b/src/timing.h index ec20ca2cfbf6a..d779c2d4148a0 100644 --- a/src/timing.h +++ b/src/timing.h @@ -172,7 +172,8 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); JL_TIMING_SUBSYSTEMS \ X(GC_Stop) \ X(GC_Mark) \ - X(GC_Sweep) \ + X(GC_FullSweep) \ + X(GC_IncrementalSweep) \ X(GC_Finalizers) \ X(CODEGEN_LLVM) \ X(CODEGEN_Codeinst) \ From edf55b99f67513a4e29e6473821f12a3089eca96 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 11 May 2023 15:40:35 -0400 Subject: [PATCH 007/290] timing: Create ITTAPI events on the fly Instead of initializing all ITTAPI events during init, this change makes ITTAPI events use a statically-allocated object to track whether the event has been created. This makes our generation of events more similar to the Tracy API, where source locations are generated statically, in-line at each macro call-site instead of constructing them all up front. --- src/timing.c | 7 ------- src/timing.h | 37 +++++++++++++++++++++---------------- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/src/timing.c b/src/timing.c index 9753187c7565d..0b573551ec246 100644 --- a/src/timing.c +++ b/src/timing.c @@ -50,10 +50,6 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] = JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; -#ifdef USE_ITTAPI -JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST]; -#endif - void jl_print_timings(void) { #ifdef USE_TIMING_COUNTS @@ -91,9 +87,6 @@ void jl_init_timing(void) int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI -#define X(name) jl_timing_ittapi_events[i++] = __itt_event_create(#name, strlen(#name)); - JL_TIMING_EVENTS -#undef X i = 0; #define X(name) jl_timing_counters[i++].ittapi_counter = __itt_counter_create(#name, "julia.runtime"); JL_TIMING_COUNTERS diff --git a/src/timing.h b/src/timing.h index d779c2d4148a0..667ce4e749d46 100644 --- a/src/timing.h +++ b/src/timing.h @@ -220,10 +220,8 @@ enum jl_timing_counter_types { JL_TIMING_COUNTER_LAST }; -/** - * Timing back-ends differ in terms of whether they support nested - * and asynchronous events. - **/ +#define TIMING_XCONCAT(x1, x2) x1##x2 +#define TIMING_CONCAT(x1, x2) TIMING_XCONCAT(x1, x2) /** * Timing Backend: Aggregated timing counts (implemented in timing.c) @@ -249,8 +247,8 @@ enum jl_timing_counter_types { #ifdef USE_TRACY #define _TRACY_CTX_MEMBER TracyCZoneCtx tracy_ctx; const struct ___tracy_source_location_data *tracy_srcloc; -#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; \ - (block)->tracy_srcloc = &TracyConcat(__tracy_source_location,TracyLine) +#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TIMING_CONCAT(__tracy_source_location,__LINE__) = { name, __func__, TracyFile, (uint32_t)__LINE__, 0 }; \ + (block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__) #define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 ); #define _TRACY_STOP(ctx) TracyCZoneEnd(*ctx) #else @@ -260,12 +258,21 @@ enum jl_timing_counter_types { #define _TRACY_STOP(block) #endif +/** + * Timing Backend: Intel VTune (ITTAPI) + **/ + #ifdef USE_ITTAPI -#define _ITTAPI_CTX_MEMBER -#define _ITTAPI_START(block) __itt_event_start(jl_timing_ittapi_events[block->event]) -#define _ITTAPI_STOP(block) __itt_event_end(jl_timing_ittapi_events[block->event]) +#define _ITTAPI_CTX_MEMBER __itt_event ittapi_event; +#define _ITTAPI_CTOR(block, name) static __itt_event TIMING_CONCAT(__itt_event,__LINE__) = INT_MAX; \ + if (TIMING_CONCAT(__itt_event,__LINE__) == INT_MAX) \ + TIMING_CONCAT(__itt_event,__LINE__) = __itt_event_create(name, strlen(name)); \ + (block)->ittapi_event = TIMING_CONCAT(__itt_event,__LINE__) +#define _ITTAPI_START(block) __itt_event_start((block)->ittapi_event) +#define _ITTAPI_STOP(block) __itt_event_end((block)->ittapi_event) #else #define _ITTAPI_CTX_MEMBER +#define _ITTAPI_CTOR(block, name) #define _ITTAPI_START(block) #define _ITTAPI_STOP(block) #endif @@ -316,9 +323,6 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs extern JL_DLLEXPORT uint64_t jl_timing_enable_mask; extern const char *jl_timing_names[(int)JL_TIMING_LAST]; -#ifdef USE_ITTAPI -extern JL_DLLEXPORT __itt_event jl_timing_ittapi_events[(int)JL_TIMING_EVENT_LAST]; -#endif struct _jl_timing_block_t { // typedef in julia.h struct _jl_timing_block_t *prev; @@ -401,11 +405,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \ jl_timing_block_start(&__timing_block) -#define JL_TIMING_CREATE_BLOCK(new_block_name, subsystem, event) \ +#define JL_TIMING_CREATE_BLOCK(block, subsystem, event) \ __attribute__((cleanup(_jl_timing_block_destroy))) \ - jl_timing_block_t new_block_name; \ - _jl_timing_block_ctor(&new_block_name, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(&new_block_name, #event) + jl_timing_block_t block; \ + _jl_timing_block_ctor(&block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ + _TRACY_CTOR(&block, #event); \ + _ITTAPI_CTOR(&block, #event) #define JL_TIMING_SUSPEND_TASK(subsystem, ct) \ __attribute__((cleanup(_jl_timing_suspend_destroy))) \ From 3cadb6cbe31b28d06cf93832bc6521c51c84442f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Mon, 15 May 2023 12:29:12 -0400 Subject: [PATCH 008/290] Remove pointer indirection in `_TRACY_STOP` --- src/timing.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/timing.h b/src/timing.h index 667ce4e749d46..32e372bce271e 100644 --- a/src/timing.h +++ b/src/timing.h @@ -250,12 +250,12 @@ enum jl_timing_counter_types { #define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TIMING_CONCAT(__tracy_source_location,__LINE__) = { name, __func__, TracyFile, (uint32_t)__LINE__, 0 }; \ (block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__) #define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 ); -#define _TRACY_STOP(ctx) TracyCZoneEnd(*ctx) +#define _TRACY_STOP(ctx) TracyCZoneEnd(ctx) #else #define _TRACY_CTX_MEMBER #define _TRACY_CTOR(block, name) #define _TRACY_START(block) -#define _TRACY_STOP(block) +#define _TRACY_STOP(ctx) #endif /** @@ -370,7 +370,7 @@ STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFE uint64_t t = cycleclock(); (void)t; _ITTAPI_STOP(block); _COUNTS_STOP(&block->counts_ctx, t); - _TRACY_STOP(&block->tracy_ctx); + _TRACY_STOP(block->tracy_ctx); jl_task_t *ct = jl_current_task; jl_timing_block_t **pcur = &ct->ptls->timing_stack; From 74addd3582202338d602b67151a011f19043250f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Mon, 15 May 2023 15:14:50 -0400 Subject: [PATCH 009/290] Reset `active` status for timing zone upon block entry --- src/timing.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/timing.h b/src/timing.h index 32e372bce271e..8eda28c22c89c 100644 --- a/src/timing.h +++ b/src/timing.h @@ -248,7 +248,8 @@ enum jl_timing_counter_types { #ifdef USE_TRACY #define _TRACY_CTX_MEMBER TracyCZoneCtx tracy_ctx; const struct ___tracy_source_location_data *tracy_srcloc; #define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TIMING_CONCAT(__tracy_source_location,__LINE__) = { name, __func__, TracyFile, (uint32_t)__LINE__, 0 }; \ - (block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__) + (block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__); \ + (block)->tracy_ctx.active = 0 #define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 ); #define _TRACY_STOP(ctx) TracyCZoneEnd(ctx) #else From dfbcc45f254b1604395b4e28a3bf12696e90ccea Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 16 May 2023 11:10:13 +0900 Subject: [PATCH 010/290] ensure all `isequal` methods to be inferred to return `Bool` (#49800) This would help inference on `Core.Compiler.return_type(isequal, tt)` when `tt` is not well inferred (e.g. `tt` is inferred to `Tuple{Any,Any}`). (although JuliaLang/julia#46810 may disable this `Core.Compiler.return_type` improvement for good reasons). Anyway, it is explicitly stated in the documentation that the `isequal` method should always return a value of `Bool`. So, not only does this annotation assist inference, it also serves to ensure the correctness of our code base, and therefore should be beneficial. We may need to take similar measures for `isless` and `isgreater` (in separate PRs). --- base/complex.jl | 6 +++--- base/operators.jl | 2 +- base/pair.jl | 2 +- test/missing.jl | 2 +- test/testhelpers/Furlongs.jl | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/complex.jl b/base/complex.jl index a0473c90d5c17..97b47eac91a5a 100644 --- a/base/complex.jl +++ b/base/complex.jl @@ -245,9 +245,9 @@ bswap(z::Complex) = Complex(bswap(real(z)), bswap(imag(z))) ==(z::Complex, x::Real) = isreal(z) && real(z) == x ==(x::Real, z::Complex) = isreal(z) && real(z) == x -isequal(z::Complex, w::Complex) = isequal(real(z),real(w)) & isequal(imag(z),imag(w)) -isequal(z::Complex, w::Real) = isequal(real(z),w) & isequal(imag(z),zero(w)) -isequal(z::Real, w::Complex) = isequal(z,real(w)) & isequal(zero(z),imag(w)) +isequal(z::Complex, w::Complex) = isequal(real(z),real(w))::Bool & isequal(imag(z),imag(w))::Bool +isequal(z::Complex, w::Real) = isequal(real(z),w)::Bool & isequal(imag(z),zero(w))::Bool +isequal(z::Real, w::Complex) = isequal(z,real(w))::Bool & isequal(zero(z),imag(w))::Bool in(x::Complex, r::AbstractRange{<:Real}) = isreal(x) && real(x) in r diff --git a/base/operators.jl b/base/operators.jl index 5893c5944a3a0..3f51be737ca5c 100644 --- a/base/operators.jl +++ b/base/operators.jl @@ -79,7 +79,7 @@ also implement [`<`](@ref) to ensure consistency of comparisons. == """ - isequal(x, y) + isequal(x, y) -> Bool Similar to [`==`](@ref), except for the treatment of floating point numbers and of missing values. `isequal` treats all floating-point `NaN` values as equal diff --git a/base/pair.jl b/base/pair.jl index f34ebb89c80da..1953dc2886053 100644 --- a/base/pair.jl +++ b/base/pair.jl @@ -44,7 +44,7 @@ indexed_iterate(p::Pair, i::Int, state=1) = (getfield(p, i), i + 1) hash(p::Pair, h::UInt) = hash(p.second, hash(p.first, h)) ==(p::Pair, q::Pair) = (p.first==q.first) & (p.second==q.second) -isequal(p::Pair, q::Pair) = isequal(p.first,q.first) & isequal(p.second,q.second) +isequal(p::Pair, q::Pair) = isequal(p.first,q.first)::Bool & isequal(p.second,q.second)::Bool isless(p::Pair, q::Pair) = ifelse(!isequal(p.first,q.first), isless(p.first,q.first), isless(p.second,q.second)) diff --git a/test/missing.jl b/test/missing.jl index f06d1aad7a6b1..489d264192fef 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -80,7 +80,7 @@ end @test isapprox(missing, 1.0, atol=1e-6) === missing @test isapprox(1.0, missing, rtol=1e-6) === missing - @test !any(T -> T === Union{Missing,Bool}, Base.return_types(isequal, Tuple{Any,Any})) + @test all(==(Bool), Base.return_types(isequal, Tuple{Any,Any})) end @testset "arithmetic operators" begin diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl index 17970f0b0572e..f63b5460c7c16 100644 --- a/test/testhelpers/Furlongs.jl +++ b/test/testhelpers/Furlongs.jl @@ -74,7 +74,7 @@ for op in (:+, :-) end end for op in (:(==), :(!=), :<, :<=, :isless, :isequal) - @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val) + @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)::Bool end for (f,op) in ((:_plus,:+),(:_minus,:-),(:_times,:*),(:_div,://)) @eval function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q} From 78fbf1bd74ded2a8a830f9f4b35c98666f2a7e16 Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Tue, 16 May 2023 16:11:38 +1000 Subject: [PATCH 011/290] docs: fix code formatting and add some spaces (#49814) --- doc/src/manual/arrays.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md index f9e60d83ff052..0b4532e1b423d 100644 --- a/doc/src/manual/arrays.md +++ b/doc/src/manual/arrays.md @@ -107,7 +107,7 @@ where no arguments are given. [Array literal can be typed](@ref man-array-typed- the syntax `T[A, B, C, ...]` where `T` is a type. ```jldoctest -julia> [1,2,3] # An array of `Int`s +julia> [1, 2, 3] # An array of `Int`s 3-element Vector{Int64}: 1 2 @@ -326,8 +326,8 @@ These syntaxes are shorthands for function calls that themselves are convenience | Syntax | Function | Description | |:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- | | | [`cat`](@ref) | concatenate input arrays along dimension(s) `k` | -| `[A; B; C; ...]` | [`vcat`](@ref) | shorthand for `cat(A...; dims=1) | -| `[A B C ...]` | [`hcat`](@ref) | shorthand for `cat(A...; dims=2) | +| `[A; B; C; ...]` | [`vcat`](@ref) | shorthand for `cat(A...; dims=1)` | +| `[A B C ...]` | [`hcat`](@ref) | shorthand for `cat(A...; dims=2)` | | `[A B; C D; ...]` | [`hvcat`](@ref) | simultaneous vertical and horizontal concatenation | | `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate | @@ -356,7 +356,7 @@ Comprehensions provide a general and powerful way to construct arrays. Comprehen similar to set construction notation in mathematics: ``` -A = [ F(x,y,...) for x=rx, y=ry, ... ] +A = [ F(x, y, ...) for x=rx, y=ry, ... ] ``` The meaning of this form is that `F(x,y,...)` is evaluated with the variables `x`, `y`, etc. taking @@ -440,7 +440,7 @@ Ranges in generators and comprehensions can depend on previous ranges by writing keywords: ```jldoctest -julia> [(i,j) for i=1:3 for j=1:i] +julia> [(i, j) for i=1:3 for j=1:i] 6-element Vector{Tuple{Int64, Int64}}: (1, 1) (2, 1) @@ -455,7 +455,7 @@ In such cases, the result is always 1-d. Generated values can be filtered using the `if` keyword: ```jldoctest -julia> [(i,j) for i=1:3 for j=1:i if i+j == 4] +julia> [(i, j) for i=1:3 for j=1:i if i+j == 4] 2-element Vector{Tuple{Int64, Int64}}: (2, 2) (3, 1) @@ -740,17 +740,17 @@ that is sometimes referred to as pointwise indexing. For example, it enables accessing the diagonal elements from the first "page" of `A` from above: ```jldoctest cartesianindex -julia> page = A[:,:,1] +julia> page = A[:, :, 1] 4×4 Matrix{Int64}: 1 5 9 13 2 6 10 14 3 7 11 15 4 8 12 16 -julia> page[[CartesianIndex(1,1), - CartesianIndex(2,2), - CartesianIndex(3,3), - CartesianIndex(4,4)]] +julia> page[[CartesianIndex(1, 1), + CartesianIndex(2, 2), + CartesianIndex(3, 3), + CartesianIndex(4, 4)]] 4-element Vector{Int64}: 1 6 @@ -964,7 +964,7 @@ construct, `i` will be an `Int` if `A` is an array type with fast linear indexin it will be a `CartesianIndex`: ```jldoctest -julia> A = rand(4,3); +julia> A = rand(4, 3); julia> B = view(A, 1:3, 2:3); @@ -1029,9 +1029,9 @@ sizes, such as adding a vector to each column of a matrix. An inefficient way to be to replicate the vector to the size of the matrix: ```julia-repl -julia> a = rand(2,1); A = rand(2,3); +julia> a = rand(2, 1); A = rand(2, 3); -julia> repeat(a,1,3)+A +julia> repeat(a, 1, 3) + A 2×3 Array{Float64,2}: 1.20813 1.82068 1.25387 1.56851 1.86401 1.67846 @@ -1153,9 +1153,9 @@ arranged contiguously in column major order. This means that the stride of the f dimension — the spacing between elements in the same column — is `1`: ```julia-repl -julia> A = rand(5,7,2); +julia> A = rand(5, 7, 2); -julia> stride(A,1) +julia> stride(A, 1) 1 ``` From c55000ab9c34228613abba5746e22aa6069a17ed Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 16 May 2023 14:09:57 -0400 Subject: [PATCH 012/290] add a hash value to Typeofwrapper objects (#49725) We probably should not do this in full correctness, but the performance gain is too great to ignore. --- src/jltypes.c | 5 ++++- test/hashing.jl | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/jltypes.c b/src/jltypes.c index 1a30df637a706..ff40b7e93092d 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1672,11 +1672,14 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable) } assert(dt->isconcretetype || dt->isdispatchtuple ? dt->maybe_subtype_of_cache : 1); if (dt->name == jl_type_typename) { - cacheable = 0; // n.b. the cache for Type ignores parameter normalization, so it can't be used to make a stable hash value jl_value_t *p = jl_tparam(dt, 0); if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type dt->has_concrete_subtype = 0; dt->maybe_subtype_of_cache = 1; + jl_value_t *uw = jl_unwrap_unionall(p); + // n.b. the cache for Type ignores parameter normalization except for Typeofwrapper, so it can't be used to make a stable hash value + if (!jl_is_datatype(uw) || ((jl_datatype_t*)uw)->name->wrapper != p) + cacheable = 0; } dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable); } diff --git a/test/hashing.jl b/test/hashing.jl index 943109924f280..5230442354d99 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -302,3 +302,11 @@ let t1 = Tuple{AbstractVector,AbstractVector{<:Integer},UnitRange{<:Integer}}, @test hash(t1) == hash(t2) @test length(Set{Type}([t1, t2])) == 1 end + +struct AUnionParam{T<:Union{Nothing,Float32,Float64}} end +@test AUnionParam.body.hash == 0 +@test Type{AUnionParam}.hash != 0 +@test Type{AUnionParam{<:Union{Float32,Float64}}}.hash == 0 +@test Type{AUnionParam{<:Union{Nothing,Float32,Float64}}} === Type{AUnionParam} +@test Type{AUnionParam.body}.hash == 0 +@test Type{Base.Broadcast.Broadcasted}.hash != 0 From 4d0f35d444b73ad5ad2b0c040dfd1740827d2153 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 16 May 2023 11:35:37 -0700 Subject: [PATCH 013/290] Don't depend on `CompilerSupportLibraries_jll` from `OpenBLAS_jll` This is important because CSL_jll loads in many other libraries that we may or may not care that much about, such as `libstdc++` and `libgomp`. We load `libstdc++` eagerly on Linux, so that will already be loaded in all cases that we care about, however on macOS we don't generally want that loaded, and this suppresses that. `libgomp` is needed by BB-provided software that uses OpenMP during compilation, however it can conflict with software compiled by the Intel compilers, such as `MKL`. It's best to allow MKL to load its OpenMP libraries first, so delaying loading `libgomp` until someone actually calls `using CompilerSupportLibraries_jll` is the right thing to do. In the future, we want to rework JLLs such that libraries aren't eagerly loaded at JLL `__init__()` time, but rather they should be JIT loaded upon first usage of the library handle itself. This would allow BB to emit much more fine-grained dependency structures, so that the distribution of a set of libraries can happen together, but the loading of said libraries would be independent. --- stdlib/OpenBLAS_jll/Project.toml | 1 + stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml index 6d953327003be..529c9945e65f1 100644 --- a/stdlib/OpenBLAS_jll/Project.toml +++ b/stdlib/OpenBLAS_jll/Project.toml @@ -3,6 +3,7 @@ uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.23+0" [deps] +# See note in `src/OpenBLAS_jll.jl` about this dependency. CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl index 4f1c57a7d06be..a0c11ab047142 100644 --- a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl +++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl @@ -2,7 +2,17 @@ ## dummy stub for https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl baremodule OpenBLAS_jll -using Base, Libdl, CompilerSupportLibraries_jll, Base.BinaryPlatforms +using Base, Libdl, Base.BinaryPlatforms + +# We are explicitly NOT loading this at runtime, as it contains `libgomp` +# which conflicts with `libiomp5`, breaking things like MKL. In the future, +# we hope to transition to a JLL interface that provides a more granular +# interface than eagerly dlopen'ing all libraries provided in the JLL +# which will eliminate issues like this, where we avoid loading a JLL +# because we don't want to load a library that we don't even use yet. +# using CompilerSupportLibraries_jll +# Because of this however, we have to manually load the libraries we +# _do_ care about, namely libgfortran Base.Experimental.@compiler_options compile=min optimize=0 infer=false const PATH_list = String[] @@ -25,10 +35,13 @@ end if Sys.iswindows() const libopenblas = "libopenblas$(libsuffix).dll" + const _libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll") elseif Sys.isapple() const libopenblas = "@rpath/libopenblas$(libsuffix).dylib" + const _libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib") else const libopenblas = "libopenblas$(libsuffix).so" + const _libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major) end function __init__() @@ -50,6 +63,10 @@ function __init__() ENV["OPENBLAS_DEFAULT_NUM_THREADS"] = "1" end + # As mentioned above, we are sneaking this in here so that we don't have to + # depend on CSL_jll and load _all_ of its libraries. + dlopen(_libgfortran) + global libopenblas_handle = dlopen(libopenblas) global libopenblas_path = dlpath(libopenblas_handle) global artifact_dir = dirname(Sys.BINDIR) From ee0199fa96cf99b7df87d0212ab4d09b19c26966 Mon Sep 17 00:00:00 2001 From: "Viral B. Shah" Date: Tue, 16 May 2023 15:44:55 -0400 Subject: [PATCH 014/290] Various improvements to peakflops() (#49833) * Various improvements to peakflops Use 4096 as the default matrix size Add kwarg to pick the type of elements in the matrix Add kwarg for number of trials and pick best time --- .../InteractiveUtils/src/InteractiveUtils.jl | 8 +++--- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 26 ++++++++++++------- stdlib/LinearAlgebra/test/generic.jl | 2 +- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index b0bf24e0d1379..48fc2b7dafe8f 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -301,7 +301,7 @@ end # TODO: @deprecate peakflops to LinearAlgebra export peakflops """ - peakflops(n::Integer=2000; parallel::Bool=false) + peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) `peakflops` computes the peak flop rate of the computer by using double precision [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). For more information see @@ -311,12 +311,12 @@ export peakflops This function will be moved from `InteractiveUtils` to `LinearAlgebra` in the future. In Julia 1.1 and later it is available as `LinearAlgebra.peakflops`. """ -function peakflops(n::Integer=2000; parallel::Bool=false) - # Base.depwarn("`peakflop`s have moved to the LinearAlgebra module, " * +function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) + # Base.depwarn("`peakflops` has moved to the LinearAlgebra module, " * # "add `using LinearAlgebra` to your imports.", :peakflops) let LinearAlgebra = Base.require(Base.PkgId( Base.UUID((0x37e2e46d_f89d_539d,0xb4ee_838fcccc9c8e)), "LinearAlgebra")) - return LinearAlgebra.peakflops(n; parallel = parallel) + return LinearAlgebra.peakflops(n, eltype=eltype, ntrials=ntrials, parallel=parallel) end end diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index a29c259dae607..5cda4af366814 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -557,14 +557,20 @@ end ldiv(F, B) """ - LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false) + LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) `peakflops` computes the peak flop rate of the computer by using double precision [`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it -multiplies a matrix of size `n x n`, where `n = 2000`. If the underlying BLAS is using +multiplies two `Float64` matrices of size `n x n`, where `n = 4096`. If the underlying BLAS is using multiple threads, higher flop rates are realized. The number of BLAS threads can be set with [`BLAS.set_num_threads(n)`](@ref). +If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements +of type `eltype` for calculating the peak flop rate. + +By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument +is provided, `peakflops` will use those many trials for picking the best timing. + If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all the worker processors. The flop rate of the entire parallel computer is returned. When running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size @@ -574,19 +580,21 @@ of the problem that is solved on each processor. This function requires at least Julia 1.1. In Julia 1.0 it is available from the standard library `InteractiveUtils`. """ -function peakflops(n::Integer=2000; parallel::Bool=false) - a = fill(1.,100,100) - t = @elapsed a2 = a*a - a = fill(1.,n,n) - t = @elapsed a2 = a*a - @assert a2[1,1] == n +function peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false) + t = zeros(Float64, ntrials) + for i=1:ntrials + a = ones(eltype,n,n) + t[i] = @elapsed a2 = a*a + @assert a2[1,1] == n + end + if parallel let Distributed = Base.require(Base.PkgId( Base.UUID((0x8ba89e20_285c_5b6f, 0x9357_94700520ee1b)), "Distributed")) return sum(Distributed.pmap(peakflops, fill(n, Distributed.nworkers()))) end else - return 2*Float64(n)^3 / t + return 2*Float64(n)^3 / minimum(t) end end diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl index 3ebaf38e84945..33eb50d58836a 100644 --- a/stdlib/LinearAlgebra/test/generic.jl +++ b/stdlib/LinearAlgebra/test/generic.jl @@ -558,7 +558,7 @@ end end @testset "peakflops" begin - @test LinearAlgebra.peakflops() > 0 + @test LinearAlgebra.peakflops(1024, eltype=Float32, ntrials=2) > 0 end @testset "NaN handling: Issue 28972" begin From 45748b84ac74b01eb6080d92f79948a817a50005 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 16 May 2023 16:14:14 -0400 Subject: [PATCH 015/290] [Profile] fix overhead counts in format=:flat (#49824) Regression caused by #41742, which inverted the loop without inverting the logic. And fix a number of related formatting mistakes. Fix #49732 --- stdlib/Profile/src/Profile.jl | 80 ++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 4bce0c4fecd88..71bbfc70ee937 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -227,11 +227,15 @@ function print(io::IO, elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]]) @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report" end - any_nosamples = false - println(io, "Overhead ╎ [+additional indent] Count File:Line; Function") - println(io, "=========================================================") + any_nosamples = true + if format === :tree + Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n") + Base.print(io, "=========================================================\n") + end if groupby == [:task, :thread] - for taskid in intersect(get_task_ids(data), tasks) + taskids = intersect(get_task_ids(data), tasks) + isempty(taskids) && (any_nosamples = true) + for taskid in taskids threadids = intersect(get_thread_ids(data, taskid), threads) if length(threadids) == 0 any_nosamples = true @@ -247,7 +251,9 @@ function print(io::IO, end end elseif groupby == [:thread, :task] - for threadid in intersect(get_thread_ids(data), threads) + threadids = intersect(get_thread_ids(data), threads) + isempty(threadids) && (any_nosamples = true) + for threadid in threadids taskids = intersect(get_task_ids(data, threadid), tasks) if length(taskids) == 0 any_nosamples = true @@ -264,7 +270,9 @@ function print(io::IO, end elseif groupby === :task threads = 1:typemax(Int) - for taskid in intersect(get_task_ids(data), tasks) + taskids = intersect(get_task_ids(data), tasks) + isempty(taskids) && (any_nosamples = true) + for taskid in taskids printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color()) nosamples = print(io, data, lidict, pf, format, threads, taskid, true) nosamples && (any_nosamples = true) @@ -272,7 +280,9 @@ function print(io::IO, end elseif groupby === :thread tasks = 1:typemax(UInt) - for threadid in intersect(get_thread_ids(data), threads) + threadids = intersect(get_thread_ids(data), threads) + isempty(threadids) && (any_nosamples = true) + for threadid in threadids printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color()) nosamples = print(io, data, lidict, pf, format, threadid, tasks, true) nosamples && (any_nosamples = true) @@ -387,6 +397,7 @@ function getdict!(dict::LineInfoDict, data::Vector{UInt}) n_unique_ips = length(unique_ips) n_unique_ips == 0 && return dict iplookups = similar(unique_ips, Vector{StackFrame}) + sort!(unique_ips) # help each thread to get a disjoint set of libraries, as much if possible @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.threadpoolsize(), RoundUp)) Threads.@spawn begin for i in indexes_part @@ -653,7 +664,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, m = Int[] lilist_idx = Dict{T, Int}() recursive = Set{T}() - first = true + leaf = 0 totalshots = 0 startframe = length(data) skip = false @@ -677,12 +688,16 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, skip = false totalshots += 1 empty!(recursive) - first = true + if leaf != 0 + m[leaf] += 1 + end + leaf = 0 startframe = i elseif !skip frames = lidict[ip] nframes = (frames isa Vector ? length(frames) : 1) - for j = 1:nframes + # the last lookup is the non-inlined root frame, the first is the inlined leaf frame + for j = nframes:-1:1 frame = (frames isa Vector ? frames[j] : frames) !C && frame.from_c && continue key = (T === UInt64 ? ip : frame) @@ -696,10 +711,7 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, push!(recursive, key) n[idx] += 1 end - if first - m[idx] += 1 - first = false - end + leaf = idx end end end @@ -710,30 +722,31 @@ end function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool) lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks) + if false # optional: drop the "non-interpretable" ones + keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist) + lilist = lilist[keep] + n = n[keep] + m = m[keep] + end util_perc = (1 - (nsleeping / totalshots)) * 100 + filenamemap = Dict{Symbol,String}() if isempty(lilist) if is_subsection Base.print(io, "Total snapshots: ") printstyled(io, "$(totalshots)", color=Base.warn_color()) - Base.println(io, " (", round(Int, util_perc), "% utilization)") + Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n") else warning_empty() end return true end - if false # optional: drop the "non-interpretable" ones - keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist) - lilist = lilist[keep] - n = n[keep] - m = m[keep] - end - filenamemap = Dict{Symbol,String}() - print_flat(io, lilist, n, m, cols, filenamemap, fmt) - Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization") + is_subsection || print_flat(io, lilist, n, m, cols, filenamemap, fmt) + Base.print(io, "Total snapshots: ", totalshots, ". Utilization: ", round(Int, util_perc), "%") if is_subsection - println(io, ")") + println(io) + print_flat(io, lilist, n, m, cols, filenamemap, fmt) else - println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)") + Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n") end return false end @@ -1054,8 +1067,8 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat filenamemap = Dict{Symbol,String}() worklist = [(bt, 0, 0, "")] if !is_subsection - println(io, "Overhead ╎ [+additional indent] Count File:Line; Function") - println(io, "=========================================================") + Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n") + Base.print(io, "=========================================================\n") end while !isempty(worklist) (bt, level, noisefloor, str) = popfirst!(worklist) @@ -1101,24 +1114,23 @@ function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, Line root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks) end util_perc = (1 - (nsleeping / root.count)) * 100 - !is_subsection && print_tree(io, root, cols, fmt, is_subsection) + is_subsection || print_tree(io, root, cols, fmt, is_subsection) if isempty(root.down) if is_subsection Base.print(io, "Total snapshots: ") printstyled(io, "$(root.count)", color=Base.warn_color()) - Base.println(io, ". Utilization: ", round(Int, util_perc), "%") + Base.print(io, ". Utilization: ", round(Int, util_perc), "%\n") else warning_empty() end return true - else - Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%") end + Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%") if is_subsection - println(io) + Base.println(io) print_tree(io, root, cols, fmt, is_subsection) else - println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task") + Base.print(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task.\n") end return false end From c24517918dd7ea33df4eb0c965dd7d45d530d7b0 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 16 May 2023 16:14:58 -0400 Subject: [PATCH 016/290] fix missing gc root on store to iparams (#49820) Try to optimize the order of this code a bit more, given that these checks are somewhat infrequently to be needed. Fix #49762 --- src/jltypes.c | 75 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/src/jltypes.c b/src/jltypes.c index ff40b7e93092d..5fc98194775b5 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1844,13 +1844,8 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value jl_typename_t *tn = dt->name; int istuple = (tn == jl_tuple_typename); int isnamedtuple = (tn == jl_namedtuple_typename); - if (check && tn != jl_type_typename) { - size_t i; - for (i = 0; i < ntp; i++) - iparams[i] = normalize_unionalls(iparams[i]); - } - // check type cache, if applicable + // check if type cache will be applicable int cacheable = 1; if (istuple) { size_t i; @@ -1886,7 +1881,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value if (jl_has_free_typevars(iparams[i])) cacheable = 0; } + // if applicable, check the cache first for a match if (cacheable) { + jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); + if (lkup != NULL) + return lkup; + } + // if some normalization might be needed, do that now + // it is probably okay to mutate iparams, and we only store globally rooted objects here + if (check && cacheable) { size_t i; for (i = 0; i < ntp; i++) { jl_value_t *pi = iparams[i]; @@ -1894,18 +1897,16 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value continue; if (jl_is_datatype(pi)) continue; - if (jl_is_vararg(pi)) { - pi = jl_unwrap_vararg(pi); - if (jl_has_free_typevars(pi)) - continue; - } - // normalize types equal to wrappers (prepare for wrapper_id) + if (jl_is_vararg(pi)) + // This would require some special handling, but is not needed + // at the moment (and might be better handled in jl_wrap_vararg instead). + continue; + if (!cacheable && jl_has_free_typevars(pi)) + continue; + // normalize types equal to wrappers (prepare for Typeofwrapper) jl_value_t *tw = extract_wrapper(pi); if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) && jl_types_equal(pi, tw)) { - // This would require some special handling, but is never used at - // the moment. - assert(!jl_is_vararg(iparams[i])); iparams[i] = tw; if (p) jl_gc_wb(p, tw); } @@ -1915,6 +1916,9 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value // normalize Type{Type{Union{}}} to Type{TypeofBottom} iparams[0] = (jl_value_t*)jl_typeofbottom_type; } + } + // then check the cache again, if applicable + if (cacheable) { jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); if (lkup != NULL) return lkup; @@ -1923,12 +1927,15 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value if (stack_lkup) return stack_lkup; + // check parameters against bounds in type definition + // for whether this is even valid if (check && !istuple) { - // check parameters against bounds in type definition + assert(ntp > 0); check_datatype_parameters(tn, iparams, ntp); } else if (ntp == 0 && jl_emptytuple_type != NULL) { // empty tuple type case + assert(istuple); return (jl_value_t*)jl_emptytuple_type; } @@ -1974,6 +1981,42 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value jl_svecset(p, i, iparams[i]); } + // try to simplify some type parameters + if (check && tn != jl_type_typename) { + size_t i; + int changed = 0; + if (istuple) // normalization might change Tuple's, but not other types's, cacheable status + cacheable = 1; + for (i = 0; i < ntp; i++) { + jl_value_t *newp = normalize_unionalls(iparams[i]); + if (newp != iparams[i]) { + iparams[i] = newp; + jl_svecset(p, i, newp); + changed = 1; + } + if (istuple && cacheable && !jl_is_concrete_type(newp)) + cacheable = 0; + } + if (changed) { + // If this changed something, we need to check the cache again, in + // case we missed the match earlier before the normalizations + // + // e.g. return inst_datatype_inner(dt, p, iparams, ntp, stack, env, 0); + if (cacheable) { + jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp); + if (lkup != NULL) { + JL_GC_POP(); + return lkup; + } + } + jl_value_t *stack_lkup = lookup_type_stack(stack, dt, ntp, iparams); + if (stack_lkup) { + JL_GC_POP(); + return stack_lkup; + } + } + } + // acquire the write lock now that we know we need a new object // since we're going to immediately leak it globally via the instantiation stack if (cacheable) { From ff012aa05f03a28cebd0b436669668aa781bf311 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 17 May 2023 17:52:36 +0900 Subject: [PATCH 017/290] improve inferrability of loading.jl (#49812) --- base/loading.jl | 13 ++++++++----- base/reflection.jl | 3 ++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 9cc2f13752dfb..27de1df7f81b5 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -627,7 +627,7 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} if exts !== nothing # Check if `where` is an extension of the project - if where.name in keys(exts) && where.uuid == uuid5(proj.uuid, where.name) + if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::String, where.name) # Extensions can load weak deps... weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing} if weakdeps !== nothing @@ -1209,7 +1209,9 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi extensions = get(d_proj, "extensions", nothing)::Union{Nothing, Dict{String, Any}} extensions === nothing && return weakdeps === nothing && return - return _insert_extension_triggers(pkg, extensions, weakdeps) + if weakdeps isa Dict{String, Any} + return _insert_extension_triggers(pkg, extensions, weakdeps) + end end # Now look in manifest @@ -1231,7 +1233,7 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi return _insert_extension_triggers(pkg, extensions, weakdeps) end - d_weakdeps = Dict{String, String}() + d_weakdeps = Dict{String, Any}() for (dep_name, entries) in d dep_name in weakdeps || continue entries::Vector{Any} @@ -1251,8 +1253,9 @@ function insert_extension_triggers(env::String, pkg::PkgId)::Union{Nothing,Missi return nothing end -function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, <:Any}, weakdeps::Dict{String, <:Any}) - for (ext::String, triggers::Union{String, Vector{String}}) in extensions +function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}, weakdeps::Dict{String, Any}) + for (ext, triggers) in extensions + triggers = triggers::Union{String, Vector{String}} triggers isa String && (triggers = [triggers]) id = PkgId(uuid5(parent.uuid, ext), ext) if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id) diff --git a/base/reflection.jl b/base/reflection.jl index 97f1ed14c6729..0ae644d5d41af 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1248,7 +1248,8 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim generator = method.generator isa(generator, Core.GeneratedFunctionStub) || return false gen_mthds = _methods_by_ftype(Tuple{typeof(generator.gen), Vararg{Any}}, 1, method.primary_world) - (gen_mthds isa Vector && length(gen_mthds) == 1) || return false + gen_mthds isa Vector || return false + length(gen_mthds) == 1 || return false generator_method = first(gen_mthds).method nsparams = length(sparams) From 10dc33e983e0b570d5850308d56f0501071361d4 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 17 May 2023 11:59:10 +0200 Subject: [PATCH 018/290] Revert "Dark and light images for README.md" (#49819) --- README.md | 10 +- doc/src/assets/julialogoheaderimage_dark.svg | 209 ------------------ doc/src/assets/julialogoheaderimage_light.svg | 209 ------------------ 3 files changed, 4 insertions(+), 424 deletions(-) delete mode 100644 doc/src/assets/julialogoheaderimage_dark.svg delete mode 100644 doc/src/assets/julialogoheaderimage_light.svg diff --git a/README.md b/README.md index 26fbb21a8a6a7..a4480ecf482cd 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ + diff --git a/doc/src/assets/julialogoheaderimage_dark.svg b/doc/src/assets/julialogoheaderimage_dark.svg deleted file mode 100644 index 04e06d2665633..0000000000000 --- a/doc/src/assets/julialogoheaderimage_dark.svg +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/assets/julialogoheaderimage_light.svg b/doc/src/assets/julialogoheaderimage_light.svg deleted file mode 100644 index 892ca1bd08701..0000000000000 --- a/doc/src/assets/julialogoheaderimage_light.svg +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 0b599ce75d1e25628ddbcb06498237634a13295b Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 17 May 2023 13:16:35 +0000 Subject: [PATCH 019/290] Fix --image-codegen (#49631) --- src/cgutils.cpp | 3 +- src/codegen.cpp | 2 +- src/jitlayers.cpp | 61 +++++++++++++++++++------------- test/llvmpasses/image-codegen.jl | 22 ++++++++++++ 4 files changed, 61 insertions(+), 27 deletions(-) create mode 100644 test/llvmpasses/image-codegen.jl diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9e42a6b246e9b..21d58cc200e48 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -333,7 +333,8 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr) StringRef localname; std::string gvname; if (!gv) { - raw_string_ostream(gvname) << cname << ctx.global_targets.size(); + uint64_t id = ctx.emission_context.imaging ? jl_atomic_fetch_add(&globalUniqueGeneratedNames, 1) : ctx.global_targets.size(); + raw_string_ostream(gvname) << cname << id; localname = StringRef(gvname); } else { diff --git a/src/codegen.cpp b/src/codegen.cpp index cfc2d8ff24b0c..9e97c819a5911 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1284,7 +1284,7 @@ static const auto &builtin_func_map() { static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs}; -static _Atomic(int) globalUniqueGeneratedNames{1}; +static _Atomic(uint64_t) globalUniqueGeneratedNames{1}; // --- code generation --- extern "C" { diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 15131ebeae4ae..fde7ba9e30130 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -212,35 +212,46 @@ static jl_callptr_t _jl_compile_codeinst( if (params._shared_module) jl_ExecutionEngine->addModule(orc::ThreadSafeModule(std::move(params._shared_module), params.tsctx)); - StringMap NewExports; - StringMap NewGlobals; - for (auto &global : params.globals) { - NewGlobals[global.second->getName()] = global.first; - } - for (auto &def : emitted) { - orc::ThreadSafeModule &TSM = std::get<0>(def.second); - //The underlying context object is still locked because params is not destroyed yet - auto M = TSM.getModuleUnlocked(); - for (auto &F : M->global_objects()) { - if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { - NewExports[F.getName()] = &TSM; + if (!params.imaging) { + StringMap NewExports; + StringMap NewGlobals; + for (auto &global : params.globals) { + NewGlobals[global.second->getName()] = global.first; + } + for (auto &def : emitted) { + orc::ThreadSafeModule &TSM = std::get<0>(def.second); + //The underlying context object is still locked because params is not destroyed yet + auto M = TSM.getModuleUnlocked(); + for (auto &F : M->global_objects()) { + if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) { + NewExports[F.getName()] = &TSM; + } + } + // Let's link all globals here also (for now) + for (auto &GV : M->globals()) { + auto InitValue = NewGlobals.find(GV.getName()); + if (InitValue != NewGlobals.end()) { + jl_link_global(&GV, InitValue->second); + } } } - // Let's link all globals here also (for now) - for (auto &GV : M->globals()) { - auto InitValue = NewGlobals.find(GV.getName()); - if (InitValue != NewGlobals.end()) { - jl_link_global(&GV, InitValue->second); + DenseMap Queued; + std::vector Stack; + for (auto &def : emitted) { + // Add the results to the execution engine now + orc::ThreadSafeModule &M = std::get<0>(def.second); + jl_add_to_ee(M, NewExports, Queued, Stack); + assert(Queued.empty() && Stack.empty() && !M); + } + } else { + jl_jit_globals(params.globals); + auto main = std::move(emitted[codeinst].first); + for (auto &def : emitted) { + if (def.first != codeinst) { + jl_merge_module(main, std::move(def.second.first)); } } - } - DenseMap Queued; - std::vector Stack; - for (auto &def : emitted) { - // Add the results to the execution engine now - orc::ThreadSafeModule &M = std::get<0>(def.second); - jl_add_to_ee(M, NewExports, Queued, Stack); - assert(Queued.empty() && Stack.empty() && !M); + jl_ExecutionEngine->addModule(std::move(main)); } ++CompiledCodeinsts; MaxWorkqueueSize.updateMax(emitted.size()); diff --git a/test/llvmpasses/image-codegen.jl b/test/llvmpasses/image-codegen.jl new file mode 100644 index 0000000000000..8132dc4faa22a --- /dev/null +++ b/test/llvmpasses/image-codegen.jl @@ -0,0 +1,22 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--print-before=loop-vectorize --print-module-scope" +# RUN: rm -rf %t +# RUN: mkdir %t +# RUN: julia --image-codegen --startup-file=no %s 2> %t/output.txt +# RUN: FileCheck %s < %t/output.txt + +# COM: checks that global variables compiled in imaging codegen +# COM: are marked as external and not internal +# COM: Also makes sure that --imaging-codegen doesn't crash + +# CHECK: *** IR Dump Before +# CHECK-NOT: internal global +# CHECK-NOT: private global +# CHECK: jl_global +# CHECK-SAME: = global +# CHECK: julia_f_ +# CHECK-NOT: internal global +# CHECK-NOT: private global + +f() = "abcd" +f() From becaa78fabb9ce2b5664a4099a3213a1e5742e1f Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 17 May 2023 15:31:14 +0000 Subject: [PATCH 020/290] Add optnone to invoke wrappers (#44590) --- src/codegen.cpp | 2 ++ src/passes.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/codegen.cpp b/src/codegen.cpp index 9e97c819a5911..3992631c8ac76 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -6659,6 +6659,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M); jl_init_function(w, params.TargetTriple); w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()})); + w->addFnAttr(Attribute::OptimizeNone); + w->addFnAttr(Attribute::NoInline); Function::arg_iterator AI = w->arg_begin(); Value *funcArg = &*AI++; Value *argArray = &*AI++; diff --git a/src/passes.h b/src/passes.h index acbfcd9538106..7894fa103d043 100644 --- a/src/passes.h +++ b/src/passes.h @@ -54,10 +54,12 @@ struct CPUFeatures : PassInfoMixin { struct RemoveNI : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; + static bool isRequired() { return true; } }; struct LowerSIMDLoop : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; + static bool isRequired() { return true; } }; struct FinalLowerGCPass : PassInfoMixin { From 34a243627cd78a35999f5e6e4fb40e45f6bcb2e8 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 17 May 2023 09:12:36 -0700 Subject: [PATCH 021/290] Remove CSL from the test suite --- test/precompile.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/precompile.jl b/test/precompile.jl index 606ee1087e51e..9ed3654437675 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -394,7 +394,7 @@ precompile_test_harness(false) do dir Dict(let m = Base.root_module(Base, s) Base.PkgId(m) => Base.module_build_id(m) end for s in - [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates, + [:ArgTools, :Artifacts, :Base64, :CRC32c, :Dates, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra, :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf, From 869c70eda52e22ea1c67950c30456a603486e102 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 18 May 2023 02:47:43 +0900 Subject: [PATCH 022/290] follow up #49812, fix the wrong type declaration (#49854) JuliaLang/julia#49812 introduced a bug and broke the CI. This commit fixes it up. --- base/loading.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/loading.jl b/base/loading.jl index 27de1df7f81b5..a7d05e5940ef3 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -627,7 +627,7 @@ function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothi exts = get(d, "extensions", nothing)::Union{Dict{String, Any}, Nothing} if exts !== nothing # Check if `where` is an extension of the project - if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::String, where.name) + if where.name in keys(exts) && where.uuid == uuid5(proj.uuid::UUID, where.name) # Extensions can load weak deps... weakdeps = get(d, "weakdeps", nothing)::Union{Dict{String, Any}, Nothing} if weakdeps !== nothing From 3583fae0dee412d4ac14ef9b25df387593054028 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 17 May 2023 20:14:39 +0200 Subject: [PATCH 023/290] Tracy: add source-code information to lowering and macro zones. (#49802) --- src/ast.c | 3 +++ src/timing.c | 39 +++++++++++++++++++++++++++++++-------- src/timing.h | 4 ++++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/ast.c b/src/ast.c index 7333d856bd177..b6d88ab62dcfe 100644 --- a/src/ast.c +++ b/src/ast.c @@ -1031,6 +1031,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule jl_method_error(margs[0], &margs[1], nargs, ct->world_age); // unreachable } + jl_timing_show_macro(mfunc, margs[1], inmodule, JL_TIMING_DEFAULT_BLOCK); *ctx = mfunc->def.method->module; result = jl_invoke(margs[0], &margs[1], nargs - 1, mfunc); } @@ -1178,6 +1179,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod const char *file, int line, size_t world) { JL_TIMING(LOWERING, LOWERING); + jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK); JL_GC_PUSH1(&expr); expr = jl_copy_ast(expr); expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1); @@ -1191,6 +1193,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t * const char *file, int line) { JL_TIMING(LOWERING, LOWERING); + jl_timing_show_location(file, line, inmodule, JL_TIMING_DEFAULT_BLOCK); jl_array_t *kwargs = NULL; JL_GC_PUSH2(&expr, &kwargs); expr = jl_copy_ast(expr); diff --git a/src/timing.c b/src/timing.c index 0b573551ec246..eca29d5191c07 100644 --- a/src/timing.c +++ b/src/timing.c @@ -205,23 +205,37 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c #endif } +JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block) +{ +#ifdef USE_TRACY + jl_module_t *root = jl_module_root(mod); + if (root == mod || root == jl_main_module) { + jl_timing_printf(cur_block, "%s:%d in %s", + gnu_basename(file), + line, + jl_symbol_name(mod->name)); + } else { + // TODO: generalize to print the entire module hierarchy + jl_timing_printf(cur_block, "%s:%d in %s.%s", + gnu_basename(file), + line, + jl_symbol_name(root->name), + jl_symbol_name(mod->name)); + } +#endif +} + JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block) { jl_timing_show_func_sig(mi->specTypes, cur_block); jl_method_t *def = mi->def.method; - jl_timing_printf(cur_block, "%s:%d in %s", - gnu_basename(jl_symbol_name(def->file)), - def->line, - jl_symbol_name(def->module->name)); + jl_timing_show_location(jl_symbol_name(def->file), def->line, def->module, cur_block); } JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block) { jl_timing_show((jl_value_t *)method, cur_block); - jl_timing_printf(cur_block, "%s:%d in %s", - gnu_basename(jl_symbol_name(method->file)), - method->line, - jl_symbol_name(method->module->name)); + jl_timing_show_location(jl_symbol_name(method->file), method->line, method->module, cur_block); } JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block) @@ -240,6 +254,15 @@ JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_ #endif } +JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block) +{ + jl_timing_printf(cur_block, "%s", jl_symbol_name(macro->def.method->name)); + assert(jl_typetagis(lno, jl_linenumbernode_type)); + jl_timing_show_location(jl_symbol_name((jl_sym_t*)jl_fieldref(lno, 1)), + jl_unbox_int64(jl_fieldref(lno, 0)), + mod, cur_block); +} + JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...) { va_list args; diff --git a/src/timing.h b/src/timing.h index 8eda28c22c89c..a832a3644f9f4 100644 --- a/src/timing.h +++ b/src/timing.h @@ -78,6 +78,8 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #define jl_timing_show_method_instance(mi, b) #define jl_timing_show_method(mi, b) #define jl_timing_show_func_sig(tt, b) +#define jl_timing_show_location(file, line, mod, b) +#define jl_timing_show_macro(macro, lno, mod, b) #define jl_timing_printf(b, f, ...) #define jl_timing_puts(b, s) #define jl_timing_init_task(t) @@ -126,6 +128,8 @@ JL_DLLEXPORT void jl_timing_show_filename(const char *path, jl_timing_block_t *c JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_show_func_sig(jl_value_t *v, jl_timing_block_t *cur_block); +JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_t* mod, jl_timing_block_t *cur_block); +JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* lno, jl_module_t* mod, jl_timing_block_t *cur_block); JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); From d50e25edbcf8ef2f448ba02fb374bf6a7ce159eb Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 14:31:19 -0400 Subject: [PATCH 024/290] Make jl_timing_counts atomic --- src/timing.c | 11 ++++++----- src/timing.h | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/timing.c b/src/timing.c index eca29d5191c07..4b97dd8bf31c2 100644 --- a/src/timing.c +++ b/src/timing.c @@ -35,7 +35,7 @@ JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) | JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~0ull; #endif -JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST] = {0}; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST] = {0}; // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. @@ -56,14 +56,15 @@ void jl_print_timings(void) uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; for (int i = 0; i < JL_TIMING_LAST; i++) { - root_time -= jl_timing_counts[i]; + root_time -= jl_atomic_load_relaxed(jl_timing_counts + i); } - jl_timing_counts[0] = root_time; + jl_atomic_store_relaxed(jl_timing_counts, root_time); fprintf(stderr, "\nJULIA TIMINGS\n"); for (int i = 0; i < JL_TIMING_LAST; i++) { - if (jl_timing_counts[i] != 0) + uint64_t counts = jl_atomic_load_relaxed(jl_timing_counts + i); + if (counts != 0) fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], - 100 * (((double)jl_timing_counts[i]) / total_time), jl_timing_counts[i]); + 100 * (((double)counts) / total_time), counts); } fprintf(stderr, "\nJULIA COUNTERS\n"); diff --git a/src/timing.h b/src/timing.h index a832a3644f9f4..e9c64cd87cc91 100644 --- a/src/timing.h +++ b/src/timing.h @@ -286,7 +286,7 @@ enum jl_timing_counter_types { * Implementation: Aggregated counts back-end **/ -extern JL_DLLEXPORT uint64_t jl_timing_counts[(int)JL_TIMING_LAST]; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST]; typedef struct _jl_timing_counts_t { uint64_t total; uint64_t t0; @@ -319,7 +319,7 @@ STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block) JL_NOTSAFEP } STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subsystem) JL_NOTSAFEPOINT { - jl_timing_counts[subsystem] += block->total; + jl_atomic_fetch_add_relaxed(jl_timing_counts + subsystem, block->total); } /** From 84d4b92929a3a1884b5bbb20128e24e605313651 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 14:49:17 -0400 Subject: [PATCH 025/290] Print timing outputs as CSV --- src/timing.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/timing.c b/src/timing.c index 4b97dd8bf31c2..9b6c5be0929d3 100644 --- a/src/timing.c +++ b/src/timing.c @@ -60,18 +60,19 @@ void jl_print_timings(void) } jl_atomic_store_relaxed(jl_timing_counts, root_time); fprintf(stderr, "\nJULIA TIMINGS\n"); + fprintf(stderr, "%-25s, %-30s\n", "Event", "Cycles (%% of total)"); for (int i = 0; i < JL_TIMING_LAST; i++) { uint64_t counts = jl_atomic_load_relaxed(jl_timing_counts + i); if (counts != 0) - fprintf(stderr, "%-25s : %5.2f %% %" PRIu64 "\n", jl_timing_names[i], - 100 * (((double)counts) / total_time), counts); + fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[i], counts, 100 * (((double)counts) / total_time)); } fprintf(stderr, "\nJULIA COUNTERS\n"); + fprintf(stderr, "%-25s, %-20s\n", "Counter", "Value"); #define X(name) do { \ int64_t val = (int64_t) jl_atomic_load_relaxed(&jl_timing_counters[(int)JL_TIMING_COUNTER_##name].basic_counter); \ if (val != 0) \ - fprintf(stderr, "%-25s : %" PRIi64 "\n", #name, val); \ + fprintf(stderr, "%-25s, %20" PRIi64 "\n", #name, val); \ } while (0); JL_TIMING_COUNTERS From 4dc683bef2f3ddc1b29a120c55839f1469d7ed5b Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 14:55:40 -0400 Subject: [PATCH 026/290] Make jl_timing_enable_mask atomic --- src/timing.c | 8 ++++---- src/timing.h | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/timing.c b/src/timing.c index 9b6c5be0929d3..66b6df28c7a0d 100644 --- a/src/timing.c +++ b/src/timing.c @@ -25,14 +25,14 @@ static uint64_t t0; * can generally keep up with that, those events also bloat the saved ".tracy" * files, so we disable them by default. **/ -JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) | +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) | (1ull << JL_TIMING_TYPE_CACHE_LOOKUP) | (1ull << JL_TIMING_METHOD_MATCH) | (1ull << JL_TIMING_METHOD_LOOKUP_FAST) | (1ull << JL_TIMING_AST_COMPRESS) | (1ull << JL_TIMING_AST_UNCOMPRESS)); #else -JL_DLLEXPORT uint64_t jl_timing_enable_mask = ~0ull; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask = ~0ull; #endif JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST] = {0}; @@ -333,9 +333,9 @@ JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) if (strcmp(subsystem, jl_timing_names[i]) == 0) { uint64_t subsystem_bit = (1ul << i); if (enabled) { - jl_timing_enable_mask |= subsystem_bit; + jl_atomic_fetch_or_relaxed(&jl_timing_enable_mask, subsystem_bit); } else { - jl_timing_enable_mask &= ~subsystem_bit; + jl_atomic_fetch_and_relaxed(&jl_timing_enable_mask, ~subsystem_bit); } return 0; } diff --git a/src/timing.h b/src/timing.h index e9c64cd87cc91..87286da05c11b 100644 --- a/src/timing.h +++ b/src/timing.h @@ -5,6 +5,8 @@ #include "julia.h" +#define USE_TIMING_COUNTS + static inline const char *gnu_basename(const char *path) { const char *base = strrchr(path, '/'); @@ -326,7 +328,7 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs * Top-level jl_timing implementation **/ -extern JL_DLLEXPORT uint64_t jl_timing_enable_mask; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask; extern const char *jl_timing_names[(int)JL_TIMING_LAST]; struct _jl_timing_block_t { // typedef in julia.h @@ -342,7 +344,7 @@ struct _jl_timing_block_t { // typedef in julia.h }; STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { - return (jl_timing_enable_mask & (1 << subsystem)) != 0; + return (jl_atomic_load_relaxed(&jl_timing_enable_mask) & (1 << subsystem)) != 0; } STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { From 8e3a756a89f9ddb10f6d74b07fdd433d50207984 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 15:04:17 -0400 Subject: [PATCH 027/290] jl_timing_enable_mask -> jl_timing_disable_mask --- src/timing.c | 41 ++++++++++++++++++++++++----------------- src/timing.h | 6 ++---- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/timing.c b/src/timing.c index 66b6df28c7a0d..d94015cc8cdc7 100644 --- a/src/timing.c +++ b/src/timing.c @@ -6,6 +6,10 @@ #include "options.h" #include "stdio.h" +#if defined(USE_TRACY) || defined(USE_ITTAPI) +#define DISABLE_FREQUENT_EVENTS +#endif + jl_module_t *jl_module_root(jl_module_t *m); #ifdef __cplusplus @@ -19,21 +23,8 @@ extern "C" { #endif static uint64_t t0; -#if defined(USE_TRACY) || defined(USE_ITTAPI) -/** - * These sources often generate millions of events / minute. Although Tracy - * can generally keep up with that, those events also bloat the saved ".tracy" - * files, so we disable them by default. - **/ -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask = ~((1ull << JL_TIMING_ROOT) | - (1ull << JL_TIMING_TYPE_CACHE_LOOKUP) | - (1ull << JL_TIMING_METHOD_MATCH) | - (1ull << JL_TIMING_METHOD_LOOKUP_FAST) | - (1ull << JL_TIMING_AST_COMPRESS) | - (1ull << JL_TIMING_AST_UNCOMPRESS)); -#else -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask = ~0ull; -#endif + +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask; JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST] = {0}; @@ -108,6 +99,22 @@ void jl_init_timing(void) TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_JITDataSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0); TracyCPlotConfig(jl_timing_counters[JL_TIMING_COUNTER_ImageSize].tracy_counter.name, TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0); #endif + +/** + * These sources often generate millions of events / minute. Although Tracy + * can generally keep up with that, those events also bloat the saved ".tracy" + * files, so we disable them by default. + **/ +#ifdef DISABLE_FREQUENT_EVENTS +#define DISABLE_EVENT(event) jl_atomic_fetch_or_relaxed(&jl_timing_disable_mask, JL_TIMING_##event) + DISABLE_EVENT(ROOT); + DISABLE_EVENT(TYPE_CACHE_LOOKUP); + DISABLE_EVENT(METHOD_MATCH); + DISABLE_EVENT(METHOD_LOOKUP_FAST); + DISABLE_EVENT(AST_COMPRESS); + DISABLE_EVENT(AST_UNCOMPRESS); +#endif + } void jl_destroy_timing(void) @@ -333,9 +340,9 @@ JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) if (strcmp(subsystem, jl_timing_names[i]) == 0) { uint64_t subsystem_bit = (1ul << i); if (enabled) { - jl_atomic_fetch_or_relaxed(&jl_timing_enable_mask, subsystem_bit); + jl_atomic_fetch_and_relaxed(&jl_timing_disable_mask, ~subsystem_bit); } else { - jl_atomic_fetch_and_relaxed(&jl_timing_enable_mask, ~subsystem_bit); + jl_atomic_fetch_or_relaxed(&jl_timing_disable_mask, subsystem_bit); } return 0; } diff --git a/src/timing.h b/src/timing.h index 87286da05c11b..5ef1669501aca 100644 --- a/src/timing.h +++ b/src/timing.h @@ -5,8 +5,6 @@ #include "julia.h" -#define USE_TIMING_COUNTS - static inline const char *gnu_basename(const char *path) { const char *base = strrchr(path, '/'); @@ -328,7 +326,7 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs * Top-level jl_timing implementation **/ -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_enable_mask; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask; extern const char *jl_timing_names[(int)JL_TIMING_LAST]; struct _jl_timing_block_t { // typedef in julia.h @@ -344,7 +342,7 @@ struct _jl_timing_block_t { // typedef in julia.h }; STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { - return (jl_atomic_load_relaxed(&jl_timing_enable_mask) & (1 << subsystem)) != 0; + return (jl_atomic_load_relaxed(&jl_timing_disable_mask) & (1 << subsystem)) == 0; } STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { From 8b4bb8909e795708f6ce13fba6124a492df05572 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 15:15:15 -0400 Subject: [PATCH 028/290] Allow number of subsystems to exceed 64 --- src/timing.c | 23 +++++++++++------------ src/timing.h | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/timing.c b/src/timing.c index d94015cc8cdc7..e1f92d5ccaff1 100644 --- a/src/timing.c +++ b/src/timing.c @@ -24,7 +24,7 @@ extern "C" { static uint64_t t0; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST] = {0}; @@ -75,7 +75,6 @@ void jl_init_timing(void) { t0 = cycleclock(); - _Static_assert(JL_TIMING_EVENT_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing events!"); _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!"); int i __attribute__((unused)) = 0; @@ -106,13 +105,13 @@ void jl_init_timing(void) * files, so we disable them by default. **/ #ifdef DISABLE_FREQUENT_EVENTS -#define DISABLE_EVENT(event) jl_atomic_fetch_or_relaxed(&jl_timing_disable_mask, JL_TIMING_##event) - DISABLE_EVENT(ROOT); - DISABLE_EVENT(TYPE_CACHE_LOOKUP); - DISABLE_EVENT(METHOD_MATCH); - DISABLE_EVENT(METHOD_LOOKUP_FAST); - DISABLE_EVENT(AST_COMPRESS); - DISABLE_EVENT(AST_UNCOMPRESS); +#define DISABLE_SUBSYSTEM(subsystem) jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (JL_TIMING_##subsystem / (sizeof(uint64_t) * CHAR_BIT)), 1 << (JL_TIMING_##subsystem % (sizeof(uint64_t) * CHAR_BIT))) + DISABLE_SUBSYSTEM(ROOT); + DISABLE_SUBSYSTEM(TYPE_CACHE_LOOKUP); + DISABLE_SUBSYSTEM(METHOD_MATCH); + DISABLE_SUBSYSTEM(METHOD_LOOKUP_FAST); + DISABLE_SUBSYSTEM(AST_COMPRESS); + DISABLE_SUBSYSTEM(AST_UNCOMPRESS); #endif } @@ -338,11 +337,11 @@ JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { for (int i = 0; i < JL_TIMING_LAST; i++) { if (strcmp(subsystem, jl_timing_names[i]) == 0) { - uint64_t subsystem_bit = (1ul << i); + uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); if (enabled) { - jl_atomic_fetch_and_relaxed(&jl_timing_disable_mask, ~subsystem_bit); + jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); } else { - jl_atomic_fetch_or_relaxed(&jl_timing_disable_mask, subsystem_bit); + jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), subsystem_bit); } return 0; } diff --git a/src/timing.h b/src/timing.h index 5ef1669501aca..5e3daf310a36a 100644 --- a/src/timing.h +++ b/src/timing.h @@ -326,7 +326,7 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs * Top-level jl_timing implementation **/ -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; extern const char *jl_timing_names[(int)JL_TIMING_LAST]; struct _jl_timing_block_t { // typedef in julia.h @@ -342,7 +342,7 @@ struct _jl_timing_block_t { // typedef in julia.h }; STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { - return (jl_atomic_load_relaxed(&jl_timing_disable_mask) & (1 << subsystem)) == 0; + return (jl_atomic_load_relaxed(jl_timing_disable_mask + subsystem / (sizeof(uint64_t) * CHAR_BIT)) & (1 << (subsystem % (sizeof(uint64_t) * CHAR_BIT)))) == 0; } STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { From 57740294f24f838e4dab45804023d4281afc5423 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 15:30:37 -0400 Subject: [PATCH 029/290] Sort timing names --- src/timing.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/timing.c b/src/timing.c index e1f92d5ccaff1..e255f5eaf8079 100644 --- a/src/timing.c +++ b/src/timing.c @@ -39,6 +39,8 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] = #undef X }; +int jl_timing_names_sorted[(int)JL_TIMING_LAST]; + JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; void jl_print_timings(void) @@ -51,11 +53,12 @@ void jl_print_timings(void) } jl_atomic_store_relaxed(jl_timing_counts, root_time); fprintf(stderr, "\nJULIA TIMINGS\n"); - fprintf(stderr, "%-25s, %-30s\n", "Event", "Cycles (%% of total)"); + fprintf(stderr, "%-25s, %-30s\n", "Event", "Cycles (% of total)"); for (int i = 0; i < JL_TIMING_LAST; i++) { - uint64_t counts = jl_atomic_load_relaxed(jl_timing_counts + i); + int j = jl_timing_names_sorted[i]; + uint64_t counts = jl_atomic_load_relaxed(jl_timing_counts + j); if (counts != 0) - fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[i], counts, 100 * (((double)counts) / total_time)); + fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[j], counts, 100 * (((double)counts) / total_time)); } fprintf(stderr, "\nJULIA COUNTERS\n"); @@ -71,12 +74,23 @@ void jl_print_timings(void) #endif } +int cmp_names(const void *a, const void *b) { + int ia = *(const int*)a; + int ib = *(const int*)b; + return strcmp(jl_timing_names[ia], jl_timing_names[ib]); +} + void jl_init_timing(void) { t0 = cycleclock(); _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!"); + for (int i = 0; i < JL_TIMING_LAST; i++) { + jl_timing_names_sorted[i] = i; + } + qsort(jl_timing_names_sorted, JL_TIMING_LAST, sizeof(int), cmp_names); + int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI i = 0; @@ -333,20 +347,23 @@ void jl_timing_init_task(jl_task_t *t) #endif } +int cmp_name_idx(const void *name, const void *idx) { + return strcmp((const char *)name, jl_timing_names[*(const int *)idx]); +} + JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { - for (int i = 0; i < JL_TIMING_LAST; i++) { - if (strcmp(subsystem, jl_timing_names[i]) == 0) { - uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); - if (enabled) { - jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); - } else { - jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), subsystem_bit); - } - return 0; - } + const int *idx = (const int *)bsearch(subsystem, jl_timing_names_sorted, JL_TIMING_LAST, sizeof(int), cmp_name_idx); + if (idx == NULL) + return -1; + int i = *idx; + uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); + if (enabled) { + jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); + } else { + jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), subsystem_bit); } - return -1; + return 0; } static void jl_timing_set_enable_from_env(void) From 5a503b0d83cab3fd35d230cf30fc01435c32772c Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 15:48:24 -0400 Subject: [PATCH 030/290] Print total time and self timing with timing counts --- src/timing.c | 16 +++++++++------- src/timing.h | 28 ++++++++++++++++------------ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/timing.c b/src/timing.c index e255f5eaf8079..7eec80f7a2fa3 100644 --- a/src/timing.c +++ b/src/timing.c @@ -26,7 +26,8 @@ static uint64_t t0; JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST] = {0}; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_LAST]; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_LAST]; // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. @@ -49,16 +50,17 @@ void jl_print_timings(void) uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; for (int i = 0; i < JL_TIMING_LAST; i++) { - root_time -= jl_atomic_load_relaxed(jl_timing_counts + i); + root_time -= jl_atomic_load_relaxed(jl_timing_self_counts + i); } - jl_atomic_store_relaxed(jl_timing_counts, root_time); + jl_atomic_store_relaxed(jl_timing_self_counts, root_time); fprintf(stderr, "\nJULIA TIMINGS\n"); - fprintf(stderr, "%-25s, %-30s\n", "Event", "Cycles (% of total)"); + fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)"); for (int i = 0; i < JL_TIMING_LAST; i++) { int j = jl_timing_names_sorted[i]; - uint64_t counts = jl_atomic_load_relaxed(jl_timing_counts + j); - if (counts != 0) - fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[j], counts, 100 * (((double)counts) / total_time)); + uint64_t self = jl_atomic_load_relaxed(jl_timing_self_counts + j); + uint64_t total = jl_atomic_load_relaxed(jl_timing_full_counts + j); + if (total != 0) + fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%), %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[j], self, 100 * (((double)self) / total_time), total, 100 * (((double)total) / total_time)); } fprintf(stderr, "\nJULIA COUNTERS\n"); diff --git a/src/timing.h b/src/timing.h index 5e3daf310a36a..d0a28bf830151 100644 --- a/src/timing.h +++ b/src/timing.h @@ -233,14 +233,14 @@ enum jl_timing_counter_types { #ifdef USE_TIMING_COUNTS #define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_CTOR(block) _jl_timing_counts_ctor(block) -#define _COUNTS_DESTROY(block, subsystem) _jl_timing_counts_destroy(block, subsystem) +#define _COUNTS_CTOR(block, t) _jl_timing_counts_ctor(block, t) +#define _COUNTS_DESTROY(block, subsystem, t) _jl_timing_counts_destroy(block, subsystem, t) #define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) #define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) #else #define _COUNTS_CTX_MEMBER -#define _COUNTS_CTOR(block) -#define _COUNTS_DESTROY(block, subsystem) +#define _COUNTS_CTOR(block, t) +#define _COUNTS_DESTROY(block, subsystem, t) #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) #endif @@ -286,9 +286,11 @@ enum jl_timing_counter_types { * Implementation: Aggregated counts back-end **/ -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_counts[(int)JL_TIMING_LAST]; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_LAST]; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_LAST]; typedef struct _jl_timing_counts_t { uint64_t total; + uint64_t start; uint64_t t0; #ifdef JL_DEBUG_BUILD uint8_t running; @@ -300,7 +302,7 @@ STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, uint64_t t) assert(block->running); block->running = 0; #endif - block->total += t - block->t0; + block->total += t - block->start; } STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { @@ -308,18 +310,20 @@ STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t assert(!block->running); block->running = 1; #endif - block->t0 = t; + block->start = t; } -STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { block->total = 0; + block->t0 = t; #ifdef JL_DEBUG_BUILD block->running = 0; #endif } -STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subsystem) JL_NOTSAFEPOINT { - jl_atomic_fetch_add_relaxed(jl_timing_counts + subsystem, block->total); +STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subsystem, uint64_t t) JL_NOTSAFEPOINT { + jl_atomic_fetch_add_relaxed(jl_timing_self_counts + subsystem, block->total); + jl_atomic_fetch_add_relaxed(jl_timing_full_counts + subsystem, t - block->t0); } /** @@ -367,7 +371,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int subsystem block->subsystem = subsystem; block->event = event; block->is_running = 0; - _COUNTS_CTOR(&block->counts_ctx); + _COUNTS_CTOR(&block->counts_ctx, cycleclock()); } STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { @@ -386,7 +390,7 @@ STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFE } } - _COUNTS_DESTROY(&block->counts_ctx, block->subsystem); + _COUNTS_DESTROY(&block->counts_ctx, block->subsystem, cycleclock()); } typedef struct _jl_timing_suspend_t { From cb7d14144055b2d7aaac521a47c58ffa90867682 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Wed, 17 May 2023 15:54:18 -0400 Subject: [PATCH 031/290] Overwrite random value in root timing slot --- src/timing.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/timing.c b/src/timing.c index 7eec80f7a2fa3..e12260608e77f 100644 --- a/src/timing.c +++ b/src/timing.c @@ -52,7 +52,8 @@ void jl_print_timings(void) for (int i = 0; i < JL_TIMING_LAST; i++) { root_time -= jl_atomic_load_relaxed(jl_timing_self_counts + i); } - jl_atomic_store_relaxed(jl_timing_self_counts, root_time); + jl_atomic_store_relaxed(jl_timing_self_counts + JL_TIMING_ROOT, root_time); + jl_atomic_store_relaxed(jl_timing_total_counts + JL_TIMING_ROOT, total_time); fprintf(stderr, "\nJULIA TIMINGS\n"); fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)"); for (int i = 0; i < JL_TIMING_LAST; i++) { From a6123889a127a3a2fb5989a49bed9b2013531113 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 18 May 2023 10:27:57 +0900 Subject: [PATCH 032/290] reflection: declare keyword arguments types for reflection methods (#49783) --- base/compiler/compiler.jl | 1 + base/compiler/types.jl | 3 +- base/reflection.jl | 88 ++++++++++++++++++-------------------- test/compiler/inference.jl | 2 +- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 0a1b852b052f9..74814733f088d 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -33,6 +33,7 @@ convert(::Type{T}, x::T) where {T} = x # mostly used by compiler/methodtable.jl, but also by reflection.jl abstract type MethodTableView end +abstract type AbstractInterpreter end # essential files and libraries include("essentials.jl") diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 4a4f27c9c27c2..71ec3670ad688 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -16,7 +16,8 @@ the following methods to satisfy the `AbstractInterpreter` API requirement: - `get_inference_cache(interp::NewInterpreter)` - return the local inference cache - `code_cache(interp::NewInterpreter)` - return the global inference cache """ -abstract type AbstractInterpreter end +:(AbstractInterpreter) + abstract type AbstractLattice end struct ArgInfo diff --git a/base/reflection.jl b/base/reflection.jl index 0ae644d5d41af..bcfc39d2bd3a8 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1247,7 +1247,8 @@ function may_invoke_generator(method::Method, @nospecialize(atype), sparams::Sim # generator only has one method generator = method.generator isa(generator, Core.GeneratedFunctionStub) || return false - gen_mthds = _methods_by_ftype(Tuple{typeof(generator.gen), Vararg{Any}}, 1, method.primary_world) + tt = Tuple{typeof(generator.gen), Vararg{Any}} + gen_mthds = _methods_by_ftype(tt, #=lim=#1, method.primary_world) gen_mthds isa Vector || return false length(gen_mthds) == 1 || return false @@ -1308,19 +1309,20 @@ generic function and type signature. # Keyword Arguments -- `optimize=true`: controls whether additional optimizations, such as inlining, are also applied. -- `debuginfo=:default`: controls the amount of code metadata present in the output, -possible options are `:source` or `:none`. +- `optimize::Bool = true`: optional, controls whether additional optimizations, + such as inlining, are also applied. +- `debuginfo::Symbol = :default`: optional, controls the amount of code metadata present + in the output, possible options are `:source` or `:none`. # Internal Keyword Arguments This section should be considered internal, and is only for who understands Julia compiler internals. -- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up methods, -use current world age if not specified. -- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to use, -use the native interpreter Julia uses if not specified. +- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use + when looking up methods, use current world age if not specified. +- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`: + optional, controls the abstract interpreter to use, use the native interpreter if not specified. # Example @@ -1335,16 +1337,12 @@ julia> code_typed(+, (Float64, Float64)) ) => Float64 ``` """ -function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); - optimize=true, - debuginfo::Symbol=:default, - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) +function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f)); kwargs...) if isa(f, Core.OpaqueClosure) - return code_typed_opaque_closure(f; optimize, debuginfo, interp) + return code_typed_opaque_closure(f; kwargs...) end tt = signature_type(f, types) - return code_typed_by_type(tt; optimize, debuginfo, world, interp) + return code_typed_by_type(tt; kwargs...) end # returns argument tuple type which is supposed to be used for `code_typed` and its family; @@ -1366,10 +1364,10 @@ Similar to [`code_typed`](@ref), except the argument is a tuple type describing a full signature to query. """ function code_typed_by_type(@nospecialize(tt::Type); - optimize=true, + optimize::Bool=true, debuginfo::Symbol=:default, - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") if @isdefined(IRShow) @@ -1381,7 +1379,7 @@ function code_typed_by_type(@nospecialize(tt::Type); throw(ArgumentError("'debuginfo' must be either :source or :none")) end tt = to_tuple_type(tt) - matches = _methods_by_ftype(tt, -1, world)::Vector + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector asts = [] for match in matches match = match::Core.MethodMatch @@ -1398,7 +1396,7 @@ function code_typed_by_type(@nospecialize(tt::Type); end function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure); - debuginfo::Symbol=:default, __...) + debuginfo::Symbol=:default, _...) ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions") m = oc.source if isa(m, Method) @@ -1425,14 +1423,15 @@ See also: [`code_typed`](@ref) This section should be considered internal, and is only for who understands Julia compiler internals. -- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up - methods, use current world age if not specified. -- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to - use, use the native interpreter Julia uses if not specified. -- `optimize_until`: optional, controls the optimization passes to run. If it is a string, - it specifies the name of the pass up to which the optimizer is run. If it is an integer, - it specifies the number of passes to run. If it is `nothing` (default), all passes are - run. +- `world::UInt = Base.get_world_counter()`: optional, controls the world age to use + when looking up methods, use current world age if not specified. +- `interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world)`: + optional, controls the abstract interpreter to use, use the native interpreter if not specified. +- `optimize_until::Union{Integer,AbstractString,Nothing} = nothing`: optional, + controls the optimization passes to run. + If it is a string, it specifies the name of the pass up to which the optimizer is run. + If it is an integer, it specifies the number of passes to run. + If it is `nothing` (default), all passes are run. # Example @@ -1454,18 +1453,12 @@ julia> Base.code_ircode(+, (Float64, Int64); optimize_until = "compact 1") => Float64 ``` """ -function code_ircode( - @nospecialize(f), - @nospecialize(types = default_tt(f)); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world), - optimize_until::Union{Integer,AbstractString,Nothing} = nothing, -) +function code_ircode(@nospecialize(f), @nospecialize(types = default_tt(f)); kwargs...) if isa(f, Core.OpaqueClosure) error("OpaqueClosure not supported") end tt = signature_type(f, types) - return code_ircode_by_type(tt; world, interp, optimize_until) + return code_ircode_by_type(tt; kwargs...) end """ @@ -1476,14 +1469,14 @@ a full signature to query. """ function code_ircode_by_type( @nospecialize(tt::Type); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world), - optimize_until::Union{Integer,AbstractString,Nothing} = nothing, + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world), + optimize_until::Union{Integer,AbstractString,Nothing}=nothing, ) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") tt = to_tuple_type(tt) - matches = _methods_by_ftype(tt, -1, world)::Vector + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector asts = [] for match in matches match = match::Core.MethodMatch @@ -1538,8 +1531,8 @@ julia> Base.return_types(sum, (Union{Vector{Int},UnitRange{Int}},)) doing so will result in an error. """ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f)); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) (ccall(:jl_is_in_pure_context, Bool, ()) || world == typemax(UInt)) && error("code reflection cannot be used from generated functions") if isa(f, Core.OpaqueClosure) @@ -1553,7 +1546,9 @@ function return_types(@nospecialize(f), @nospecialize(types=default_tt(f)); return Any[Core.Compiler.widenconst(rt)] end rts = [] - for match in _methods(f, types, -1, world)::Vector + tt = signature_type(f, types) + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector + for match in matches match = match::Core.MethodMatch meth = func_for_method_checked(match.method, types, match.sparams) ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams) @@ -1644,9 +1639,10 @@ function print_statement_costs(io::IO, @nospecialize(f), @nospecialize(t); kwarg end function print_statement_costs(io::IO, @nospecialize(tt::Type); - world = get_world_counter(), - interp = Core.Compiler.NativeInterpreter(world)) - matches = _methods_by_ftype(tt, -1, world)::Vector + world::UInt=get_world_counter(), + interp::Core.Compiler.AbstractInterpreter=Core.Compiler.NativeInterpreter(world)) + tt = to_tuple_type(tt) + matches = _methods_by_ftype(tt, #=lim=#-1, world)::Vector params = Core.Compiler.OptimizationParams(interp) cst = Int[] for match in matches diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 5987e10401bc8..b1c690207eb93 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -362,7 +362,7 @@ code_llvm(devnull, invoke_g10878, ()) # issue #10930 -@test isa(code_typed(promote,(Any,Any,Vararg{Any})), Array) +@test isa(Base.return_types(promote, (Any,Any,Vararg{Any})), Vector) find_tvar10930(sig::Type{T}) where {T<:Tuple} = 1 function find_tvar10930(arg) if isa(arg, Type) && arg<:Tuple From ce3909cc8ce6dd7ee3d11ef98ce7ff075a5604d6 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 18 May 2023 11:54:39 +0900 Subject: [PATCH 033/290] inference: prioritize `SlotNumber`-constraint over `MustAlias`-constraint (#49856) Currently external `AbstractInterpreter` that uses `MustAliasesLattice` can fail to propagate type constraint on `SlotNumber` in the call-site refinement, e.g. fail to infer the return type of `firstitem(::ItrList)` in the following code: ```julia struct ItrList list::Union{Tuple{},Vector{Int}} end hasitems(list) = length(list) >= 1 function firstitem(ilist::ItrList) list = ilist.list if hasitems(list) return list end error("list is empty") end ``` (xref: ) This commit fixes it up as well as fixes the implementation of `from_interprocedural!` so that it uses the correct lattice. --- base/compiler/abstractinterpretation.jl | 60 ++++++++++++------------- test/compiler/inference.jl | 14 ++++++ 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 097eb7a5d098e..7de4577c1f42a 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -156,7 +156,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), all_effects = Effects(all_effects; nothrow=false) end - rettype = from_interprocedural!(𝕃ₚ, rettype, sv, arginfo, conditionals) + rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals) # Also considering inferring the compilation signature for this method, so # it is available to the compiler in case it ends up needing it. @@ -303,7 +303,8 @@ function find_matching_methods(𝕃::AbstractLattice, end """ - from_interprocedural!(𝕃ₚ::AbstractLattice, rt, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo) -> newrt + from_interprocedural!(interp::AbstractInterpreter, rt, sv::AbsIntState, + arginfo::ArgInfo, maybecondinfo) -> newrt Converts inter-procedural return type `rt` into a local lattice element `newrt`, that is appropriate in the context of current local analysis frame `sv`, especially: @@ -322,15 +323,16 @@ In such cases `maybecondinfo` should be either of: When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by `tmerge`ing argument signature type of each method call. """ -function from_interprocedural!(𝕃ₚ::AbstractLattice, @nospecialize(rt), sv::AbsIntState, arginfo::ArgInfo, @nospecialize(maybecondinfo)) +function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), sv::AbsIntState, + arginfo::ArgInfo, @nospecialize(maybecondinfo)) rt = collect_limitations!(rt, sv) if isa(rt, InterMustAlias) rt = from_intermustalias(rt, arginfo) - elseif is_lattice_bool(𝕃ₚ, rt) + elseif is_lattice_bool(ipo_lattice(interp), rt) if maybecondinfo === nothing rt = widenconditional(rt) else - rt = from_interconditional(𝕃ₚ, rt, sv, arginfo, maybecondinfo) + rt = from_interconditional(typeinf_lattice(interp), rt, sv, arginfo, maybecondinfo) end end @assert !(rt isa InterConditional || rt isa InterMustAlias) "invalid lattice element returned from inter-procedural context" @@ -361,34 +363,32 @@ function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo) return widenmustalias(rt) end -function from_interconditional(𝕃ₚ::AbstractLattice, - typ, sv::AbsIntState, arginfo::ArgInfo, maybecondinfo) - @nospecialize typ maybecondinfo - has_conditional(𝕃ₚ, sv) || return widenconditional(typ) +function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::AbsIntState, + arginfo::ArgInfo, @nospecialize(maybecondinfo)) + has_conditional(𝕃ᵢ, sv) || return widenconditional(rt) (; fargs, argtypes) = arginfo - fargs === nothing && return widenconditional(typ) - 𝕃 = widenlattice(𝕃ₚ) + fargs === nothing && return widenconditional(rt) slot = 0 alias = nothing thentype = elsetype = Any - condval = maybe_extract_const_bool(typ) + condval = maybe_extract_const_bool(rt) for i in 1:length(fargs) # find the first argument which supports refinement, # and intersect all equivalent arguments with it argtyp = argtypes[i] if alias === nothing - if argtyp isa MustAlias - old = argtyp.fldtyp - id = argtyp.slot - elseif alias === nothing && argtyp isa Type - arg = ssa_def_slot(fargs[i], sv) - arg isa SlotNumber || continue # can't refine + arg = ssa_def_slot(fargs[i], sv) + if isa(arg, SlotNumber) && widenslotwrapper(argtyp) isa Type old = argtyp id = slot_id(arg) + elseif argtyp isa MustAlias + old = argtyp.fldtyp + id = argtyp.slot else continue # unlikely to refine end elseif argtyp isa MustAlias && issubalias(argtyp, alias) + arg = nothing old = alias.fldtyp id = alias.slot else @@ -401,32 +401,32 @@ function from_interconditional(𝕃ₚ::AbstractLattice, new_elsetype = maybecondinfo[2][i] else # otherwise compute it on the fly - cnd = conditional_argtype(typ, maybecondinfo, argtypes, i) + cnd = conditional_argtype(rt, maybecondinfo, argtypes, i) new_thentype = cnd.thentype new_elsetype = cnd.elsetype end if condval === false thentype = Bottom - elseif ⊑(𝕃, new_thentype, thentype) + elseif ⊑(𝕃ᵢ, new_thentype, thentype) thentype = new_thentype else - thentype = tmeet(𝕃, thentype, widenconst(new_thentype)) + thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype)) end if condval === true elsetype = Bottom - elseif ⊑(𝕃, new_elsetype, elsetype) + elseif ⊑(𝕃ᵢ, new_elsetype, elsetype) elsetype = new_elsetype else - elsetype = tmeet(𝕃, elsetype, widenconst(new_elsetype)) + elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype)) end - if (slot > 0 || condval !== false) && ⋤(𝕃, thentype, old) + if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old) slot = id - if argtyp isa MustAlias + if !(arg isa SlotNumber) && argtyp isa MustAlias alias = argtyp end - elseif (slot > 0 || condval !== true) && ⋤(𝕃, elsetype, old) + elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old) slot = id - if argtyp isa MustAlias + if !(arg isa SlotNumber) && argtyp isa MustAlias alias = argtyp end else # reset: no new useful information for this slot @@ -444,7 +444,7 @@ function from_interconditional(𝕃ₚ::AbstractLattice, end return Conditional(slot, thentype, elsetype) # record a Conditional improvement to this slot end - return widenconditional(typ) + return widenconditional(rt) end function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int) @@ -1906,7 +1906,7 @@ function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgIn (; rt, effects, const_result, edge) = const_call_result end end - rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, sig) + rt = from_interprocedural!(interp, rt, sv, arginfo, sig) effects = Effects(effects; nonoverlayed=!overlayed) info = InvokeCallInfo(match, const_result) edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge) @@ -2053,7 +2053,7 @@ function abstract_call_opaque_closure(interp::AbstractInterpreter, effects = Effects(effects; nothrow=false) end end - rt = from_interprocedural!(𝕃ₚ, rt, sv, arginfo, match.spec_types) + rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types) info = OpaqueClosureCallInfo(match, const_result) edge !== nothing && add_backedge!(sv, edge) return CallMeta(rt, effects, info) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index b1c690207eb93..0bd14f94784d0 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -2403,6 +2403,20 @@ from_interconditional_check22(::Union{Int,String}, y) = isa(y, Int) return 0 end |> only === Int +# prioritize constraints on slot objects +# https://github.com/aviatesk/JET.jl/issues/509 +struct JET509 + list::Union{Tuple{},Vector{Int}} +end +jet509_hasitems(list) = length(list) >= 1 +@test Base.return_types((JET509,); interp=MustAliasInterpreter()) do ilist::JET509 + list = ilist.list + if jet509_hasitems(list) + return list + end + error("list is empty") +end |> only == Vector{Int} + # === constraint # -------------- From 71115975bebfc82952309712a465e665527787d3 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Wed, 17 May 2023 10:25:10 -0700 Subject: [PATCH 034/290] [cli] Ensure that probed `libstdc++` path is NULL-terminated It appears that we were assuming our path was initialized with zeros, but that is not a safe assumption. --- cli/loader_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/loader_lib.c b/cli/loader_lib.c index e2f615c684637..a344910478ccd 100644 --- a/cli/loader_lib.c +++ b/cli/loader_lib.c @@ -345,6 +345,8 @@ static char *libstdcxxprobe(void) free(path); return NULL; } + // Ensure that `path` is zero-terminated. + path[pathlen] = '\0'; return path; } } From 863e131f6f811d5ddd44aab689711934cdb0f47a Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Thu, 18 May 2023 21:27:01 -0400 Subject: [PATCH 035/290] Time events instead of subsystems --- src/timing.c | 26 +++++++++++++++----------- src/timing.h | 17 ++++++++--------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/timing.c b/src/timing.c index e12260608e77f..d120432f2f56f 100644 --- a/src/timing.c +++ b/src/timing.c @@ -26,21 +26,21 @@ static uint64_t t0; JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_LAST]; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_LAST]; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_EVENT_LAST]; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_EVENT_LAST]; // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. JL_DLLEXPORT uint32_t jl_timing_print_limit = 10; -const char *jl_timing_names[(int)JL_TIMING_LAST] = +static const char *jl_timing_names[(int)JL_TIMING_EVENT_LAST] = { #define X(name) #name, - JL_TIMING_SUBSYSTEMS + JL_TIMING_EVENTS #undef X }; -int jl_timing_names_sorted[(int)JL_TIMING_LAST]; +static int jl_timing_names_sorted[(int)JL_TIMING_EVENT_LAST]; JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; @@ -49,14 +49,14 @@ void jl_print_timings(void) #ifdef USE_TIMING_COUNTS uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; - for (int i = 0; i < JL_TIMING_LAST; i++) { + for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { root_time -= jl_atomic_load_relaxed(jl_timing_self_counts + i); } jl_atomic_store_relaxed(jl_timing_self_counts + JL_TIMING_ROOT, root_time); - jl_atomic_store_relaxed(jl_timing_total_counts + JL_TIMING_ROOT, total_time); + jl_atomic_store_relaxed(jl_timing_full_counts + JL_TIMING_ROOT, total_time); fprintf(stderr, "\nJULIA TIMINGS\n"); fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)"); - for (int i = 0; i < JL_TIMING_LAST; i++) { + for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { int j = jl_timing_names_sorted[i]; uint64_t self = jl_atomic_load_relaxed(jl_timing_self_counts + j); uint64_t total = jl_atomic_load_relaxed(jl_timing_full_counts + j); @@ -89,10 +89,10 @@ void jl_init_timing(void) _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!"); - for (int i = 0; i < JL_TIMING_LAST; i++) { + for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { jl_timing_names_sorted[i] = i; } - qsort(jl_timing_names_sorted, JL_TIMING_LAST, sizeof(int), cmp_names); + qsort(jl_timing_names_sorted, JL_TIMING_EVENT_LAST, sizeof(int), cmp_names); int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI @@ -356,10 +356,14 @@ int cmp_name_idx(const void *name, const void *idx) { JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { - const int *idx = (const int *)bsearch(subsystem, jl_timing_names_sorted, JL_TIMING_LAST, sizeof(int), cmp_name_idx); + const int *idx = (const int *)bsearch(subsystem, jl_timing_names_sorted, JL_TIMING_EVENT_LAST, sizeof(int), cmp_name_idx); if (idx == NULL) return -1; int i = *idx; + // sorted names include events, so skip if we're looking at an event instead of a subsystem + // events are always at least JL_TIMING_LAST + if (i >= JL_TIMING_LAST) + return -1; uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); if (enabled) { jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); diff --git a/src/timing.h b/src/timing.h index d0a28bf830151..d9d471ed270d2 100644 --- a/src/timing.h +++ b/src/timing.h @@ -234,13 +234,13 @@ enum jl_timing_counter_types { #ifdef USE_TIMING_COUNTS #define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; #define _COUNTS_CTOR(block, t) _jl_timing_counts_ctor(block, t) -#define _COUNTS_DESTROY(block, subsystem, t) _jl_timing_counts_destroy(block, subsystem, t) +#define _COUNTS_DESTROY(block, event, t) _jl_timing_counts_destroy(block, event, t) #define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) #define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) #else #define _COUNTS_CTX_MEMBER #define _COUNTS_CTOR(block, t) -#define _COUNTS_DESTROY(block, subsystem, t) +#define _COUNTS_DESTROY(block, event, t) #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) #endif @@ -286,8 +286,8 @@ enum jl_timing_counter_types { * Implementation: Aggregated counts back-end **/ -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_LAST]; -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_LAST]; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_EVENT_LAST]; +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_EVENT_LAST]; typedef struct _jl_timing_counts_t { uint64_t total; uint64_t start; @@ -321,9 +321,9 @@ STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, uint64_t t) #endif } -STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subsystem, uint64_t t) JL_NOTSAFEPOINT { - jl_atomic_fetch_add_relaxed(jl_timing_self_counts + subsystem, block->total); - jl_atomic_fetch_add_relaxed(jl_timing_full_counts + subsystem, t - block->t0); +STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int event, uint64_t t) JL_NOTSAFEPOINT { + jl_atomic_fetch_add_relaxed(jl_timing_self_counts + event, block->total); + jl_atomic_fetch_add_relaxed(jl_timing_full_counts + event, t - block->t0); } /** @@ -331,7 +331,6 @@ STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int subs **/ extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; -extern const char *jl_timing_names[(int)JL_TIMING_LAST]; struct _jl_timing_block_t { // typedef in julia.h struct _jl_timing_block_t *prev; @@ -390,7 +389,7 @@ STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFE } } - _COUNTS_DESTROY(&block->counts_ctx, block->subsystem, cycleclock()); + _COUNTS_DESTROY(&block->counts_ctx, block->event, cycleclock()); } typedef struct _jl_timing_suspend_t { From 1acec74b4e9876dd8635c3c1477681f1b91fb6ee Mon Sep 17 00:00:00 2001 From: Martin Holters Date: Fri, 19 May 2023 12:04:55 +0200 Subject: [PATCH 036/290] Make `apply_type_nothrow` robust against `TypeVar`s in upper bounds (#49863) For types like `Foo{S, T<:S}`, `apply_type_nothrow` could in some situations check whether the argument is a subtype of the upper bound of `T`, i.e. `S`, but subtyping agaist a plain `TypeVar` would fail. Instead return `false` in this case. Fixes #49785. --- base/compiler/tfuncs.jl | 2 +- test/compiler/inference.jl | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index f894d4ab3f4a5..20543b207895e 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1665,7 +1665,7 @@ function apply_type_nothrow(𝕃::AbstractLattice, argtypes::Vector{Any}, @nospe end else istype || return false - if !(T <: u.var.ub) + if isa(u.var.ub, TypeVar) || !(T <: u.var.ub) return false end if exact ? !(u.var.lb <: T) : !(u.var.lb === Bottom) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 0bd14f94784d0..4a7bdca8a8951 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -4918,3 +4918,17 @@ let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do end @test count(@nospecialize(x)->isa(x, Core.PhiNode), src.code) == 0 end + +struct Issue49785{S, T<:S} end +let 𝕃 = Core.Compiler.OptimizerLattice() + argtypes = Any[Core.Compiler.Const(Issue49785), + Union{Type{String},Type{Int}}, + Union{Type{String},Type{Int}}] + rt = Type{Issue49785{<:Any, Int}} + # the following should not throw + @test !Core.Compiler.apply_type_nothrow(𝕃, argtypes, rt) + @test code_typed() do + S = Union{Type{String},Type{Int}}[Int][1] + map(T -> Issue49785{S,T}, (a = S,)) + end isa Vector +end From a43ca052f2c9c29c812f02701600a9f6533507cb Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Fri, 19 May 2023 16:14:26 -0400 Subject: [PATCH 037/290] limit printing depth of argument types in stack traces (#49795) Co-authored-by: Tim Holy --- NEWS.md | 2 + base/errorshow.jl | 16 +++++-- base/show.jl | 111 +++++++++++++++++++++++++++++++++++++++++++- base/stacktraces.jl | 5 ++ test/stacktraces.jl | 54 +++++++++++++++++++++ 5 files changed, 181 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5c42c469e4051..ad39de5de7d61 100644 --- a/NEWS.md +++ b/NEWS.md @@ -94,6 +94,8 @@ Standard library changes #### REPL +* When stack traces are printed, the printed depth of types in function signatures will be limited + to avoid overly verbose output ([#49795]). #### SuiteSparse diff --git a/base/errorshow.jl b/base/errorshow.jl index 03650920aae57..176cae4b5251a 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -761,6 +761,9 @@ function show_backtrace(io::IO, t::Vector) if haskey(io, :last_shown_line_infos) empty!(io[:last_shown_line_infos]) end + # this will be set to true if types in the stacktrace are truncated + limitflag = Ref(false) + io = IOContext(io, :stacktrace_types_limited => limitflag) # t is a pre-processed backtrace (ref #12856) if t isa Vector{Any} @@ -781,12 +784,15 @@ function show_backtrace(io::IO, t::Vector) if length(filtered) > BIG_STACKTRACE_SIZE show_reduced_backtrace(IOContext(io, :backtrace => true), filtered) return + else + try invokelatest(update_stackframes_callback[], filtered) catch end + # process_backtrace returns a Vector{Tuple{Frame, Int}} + show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks()) end - - try invokelatest(update_stackframes_callback[], filtered) catch end - # process_backtrace returns a Vector{Tuple{Frame, Int}} - show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks()) - return + if limitflag[] + print(io, "\nSome type information was truncated. Use `show(err)` to see complete types.") + end + nothing end diff --git a/base/show.jl b/base/show.jl index 36f7df54d0008..fa8c411588102 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2470,15 +2470,16 @@ function print_within_stacktrace(io, s...; color=:normal, bold=false) end end -function show_tuple_as_call(io::IO, name::Symbol, sig::Type; +function show_tuple_as_call(out::IO, name::Symbol, sig::Type; demangle=false, kwargs=nothing, argnames=nothing, qualified=false, hasfirst=true) # print a method signature tuple for a lambda definition if sig === Tuple - print(io, demangle ? demangle_function_name(name) : name, "(...)") + print(out, demangle ? demangle_function_name(name) : name, "(...)") return end tv = Any[] + io = IOContext(IOBuffer(), out) env_io = io while isa(sig, UnionAll) push!(tv, sig.var) @@ -2516,9 +2517,115 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type; end print_within_stacktrace(io, ")", bold=true) show_method_params(io, tv) + str = String(take!(unwrapcontext(io)[1])) + if get(out, :limit, false)::Bool + sz = get(out, :displaysize, (typemax(Int), typemax(Int)))::Tuple{Int, Int} + str_lim = type_depth_limit(str, max(sz[2], 120)) + if sizeof(str_lim) < sizeof(str) + typelimitflag = get(out, :stacktrace_types_limited, nothing) + if typelimitflag !== nothing + typelimitflag[] = true + end + end + str = str_lim + end + print(out, str) nothing end +# limit nesting depth of `{ }` until string textwidth is less than `n` +function type_depth_limit(str::String, n::Int; maxdepth = nothing) + depth = 0 + width_at = Int[] # total textwidth at each nesting depth + depths = zeros(Int16, lastindex(str)) # depth at each character index + levelcount = Int[] # number of nodes at each level + strwid = 0 + st_0, st_backslash, st_squote, st_dquote = 0,1,2,4 + state::Int = st_0 + stateis(s) = (state & s) != 0 + quoted() = stateis(st_squote) || stateis(st_dquote) + enter(s) = (state |= s) + leave(s) = (state &= ~s) + for (i, c) in ANSIIterator(str) + if c isa ANSIDelimiter + depths[i] = depth + continue + end + + if c == '\\' && quoted() + enter(st_backslash) + elseif c == '\'' + if stateis(st_backslash) || stateis(st_dquote) + elseif stateis(st_squote) + leave(st_squote) + else + enter(st_squote) + end + elseif c == '"' + if stateis(st_backslash) || stateis(st_squote) + elseif stateis(st_dquote) + leave(st_dquote) + else + enter(st_dquote) + end + end + if c == '}' && !quoted() + depth -= 1 + end + + wid = textwidth(c) + strwid += wid + if depth > 0 + width_at[depth] += wid + end + depths[i] = depth + + if c == '{' && !quoted() + depth += 1 + if depth > length(width_at) + push!(width_at, 0) + push!(levelcount, 0) + end + levelcount[depth] += 1 + end + if c != '\\' && stateis(st_backslash) + leave(st_backslash) + end + end + if maxdepth === nothing + limit_at = length(width_at) + 1 + while strwid > n + limit_at -= 1 + limit_at <= 1 && break + # add levelcount[] to include space taken by `…` + strwid = strwid - width_at[limit_at] + levelcount[limit_at] + if limit_at < length(width_at) + # take away the `…` from the previous considered level + strwid -= levelcount[limit_at+1] + end + end + else + limit_at = maxdepth + end + output = IOBuffer() + prev = 0 + for (i, c) in ANSIIterator(str) + di = depths[i] + if di < limit_at + if c isa ANSIDelimiter + write(output, c.del) + else + write(output, c) + end + end + if di > prev && di == limit_at + write(output, "…") + end + prev = di + end + return String(take!(output)) +end + function print_type_bicolor(io, type; kwargs...) str = sprint(show, type, context=io) print_type_bicolor(io, str; kwargs...) diff --git a/base/stacktraces.jl b/base/stacktraces.jl index ee6a2762d7818..23dadca8c8fa5 100644 --- a/base/stacktraces.jl +++ b/base/stacktraces.jl @@ -326,6 +326,11 @@ function show_spec_linfo(io::IO, frame::StackFrame) linfo, linfo.sig end if def isa Method + if get(io, :limit, :false)::Bool + if !haskey(io, :displaysize) + io = IOContext(io, :displaysize => displaysize(io)) + end + end argnames = Base.method_argnames(def) argnames = replace(argnames, :var"#unused#" => :var"") if def.nkw > 0 diff --git a/test/stacktraces.jl b/test/stacktraces.jl index 96393b124f70e..590abb90c590f 100644 --- a/test/stacktraces.jl +++ b/test/stacktraces.jl @@ -192,3 +192,57 @@ let bt end @test any(s->startswith(string(s), "f33065(x::Float32, y::Float32; b::Float64, a::String, c::"), bt) end + +struct F49231{a,b,c,d,e,f,g} end +(::F49231)(a,b,c) = error("oops") + +@testset "type_depth_limit" begin + tdl = Base.type_depth_limit + + str = repr(typeof(view([1, 2, 3], 1:2))) + @test tdl(str, 0, maxdepth = 1) == "SubArray{…}" + @test tdl(str, 0, maxdepth = 2) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 0, maxdepth = 3) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}" + @test tdl(str, 0, maxdepth = 4) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}" + @test tdl(str, 3) == "SubArray{…}" + @test tdl(str, 44) == "SubArray{…}" + @test tdl(str, 45) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 59) == "SubArray{$Int, 1, Vector{…}, Tuple{…}, true}" + @test tdl(str, 60) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{…}}, true}" + @test tdl(str, 100) == "SubArray{$Int, 1, Vector{$Int}, Tuple{UnitRange{$Int}}, true}" + + str = repr(Vector{V} where V<:AbstractVector{T} where T<:Real) + @test tdl(str, 0, maxdepth = 1) == "Vector{…} where {…}" + @test tdl(str, 0, maxdepth = 2) == "Vector{V} where {T<:Real, V<:AbstractVector{…}}" + @test tdl(str, 0, maxdepth = 3) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}" + @test tdl(str, 20) == "Vector{…} where {…}" + @test tdl(str, 46) == "Vector{…} where {…}" + @test tdl(str, 47) == "Vector{V} where {T<:Real, V<:AbstractVector{T}}" + + str = "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}" + @test tdl(str, 105) == "F49231{Vector,Val{('}','}')},Vector{Vector{Vector{…}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}" + @test tdl(str, 85) == "F49231{Vector,Val{…},Vector{…},Tuple{…},Int,Int,Int}" + + # Stacktrace + a = UInt8(81):UInt8(160) + b = view(a, 1:64) + c = reshape(b, (8, 8)) + d = reinterpret(reshape, Float64, c) + sqrteach(a) = [sqrt(x) for x in a] + st = try + sqrteach(d) + catch e + stacktrace(catch_backtrace()) + end + str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,105))) + @test endswith(str, "to see complete types.") + @test contains(str, "[5] \e[0m\e[1mcollect_to!\e[22m\e[0m\e[1m(\e[22m\e[90mdest\e[39m::\e[0mVector\e[90m{…}\e[39m, \e[90mitr\e[39m::\e[0mBase.Generator\e[90m{…}\e[39m, \e[90moffs\e[39m::\e[0m$Int, \e[90mst\e[39m::\e[0mTuple\e[90m{…}\e[39m\e[0m\e[1m)\e[22m\n\e[90m") + + st = try + F49231{Vector,Val{'}'},Vector{Vector{Vector{Vector}}},Tuple{Int,Int,Int,Int,Int,Int,Int},Int,Int,Int}()(1,2,3) + catch e + stacktrace(catch_backtrace()) + end + str = sprint(Base.show_backtrace, st, context = (:limit=>true, :color=>true, :displaysize=>(50,132))) + @test contains(str, "[2] \e[0m\e[1m(::$F49231{Vector, Val{…}, Vector{…}, NTuple{…}, $Int, $Int, $Int})\e[22m\e[0m\e[1m(\e[22m\e[90ma\e[39m::\e[0m$Int, \e[90mb\e[39m::\e[0m$Int, \e[90mc\e[39m::\e[0m$Int\e[0m\e[1m)\e[22m\n\e[90m") +end From 3500ba46f2223284b7f4b83d53aa78293ec7450e Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Fri, 19 May 2023 17:30:43 -0400 Subject: [PATCH 038/290] Move t0 init to jl_timing_block_start --- src/timing.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/timing.h b/src/timing.h index d9d471ed270d2..e2497b430ffb0 100644 --- a/src/timing.h +++ b/src/timing.h @@ -233,14 +233,14 @@ enum jl_timing_counter_types { #ifdef USE_TIMING_COUNTS #define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_CTOR(block, t) _jl_timing_counts_ctor(block, t) +#define _COUNTS_CTOR(block) _jl_timing_counts_ctor(block) #define _COUNTS_DESTROY(block, event, t) _jl_timing_counts_destroy(block, event, t) #define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) #define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) #else #define _COUNTS_CTX_MEMBER #define _COUNTS_CTOR(block, t) -#define _COUNTS_DESTROY(block, event, t) +#define _COUNTS_DESTROY(block, event) #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) #endif @@ -313,9 +313,8 @@ STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t block->start = t; } -STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block) JL_NOTSAFEPOINT { block->total = 0; - block->t0 = t; #ifdef JL_DEBUG_BUILD block->running = 0; #endif @@ -360,6 +359,7 @@ STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; block->prev = *prevp; block->is_running = 1; + block->t0 = t; if (block->prev) { _COUNTS_STOP(&block->prev->counts_ctx, t); } @@ -370,7 +370,7 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int subsystem block->subsystem = subsystem; block->event = event; block->is_running = 0; - _COUNTS_CTOR(&block->counts_ctx, cycleclock()); + _COUNTS_CTOR(&block->counts_ctx); } STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { From 6b2ba1dc700c8f0a7611f3a2bdf30fedc1dc4424 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 19 May 2023 21:49:18 +0000 Subject: [PATCH 039/290] Update src/timing.h Co-authored-by: Cody Tapscott <84105208+topolarity@users.noreply.github.com> --- src/timing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/timing.h b/src/timing.h index e2497b430ffb0..603f9c0a061ed 100644 --- a/src/timing.h +++ b/src/timing.h @@ -239,8 +239,8 @@ enum jl_timing_counter_types { #define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) #else #define _COUNTS_CTX_MEMBER -#define _COUNTS_CTOR(block, t) -#define _COUNTS_DESTROY(block, event) +#define _COUNTS_CTOR(block) +#define _COUNTS_DESTROY(block, event, t) #define _COUNTS_START(block, t) #define _COUNTS_STOP(block, t) #endif From 6d70d2afcde7f5ed33b0f2f4dac3458c5c94494d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 19 May 2023 15:49:47 -0600 Subject: [PATCH 040/290] Attempting to add debug logs for ENQUEUING an invalid object (#49741) * Attempting to add debug logs for ENQUEUING an invalid object Check for the object's validity _before enqueuing_ so that we can hopefully give a more useful error message (which object's pointer was corrupted). --------- Co-authored-by: Diogo Netto --- src/gc.c | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/src/gc.c b/src/gc.c index 586653f7b0b91..846ade33b271a 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1777,14 +1777,37 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset, return *(uintptr_t*)real_addr; } -JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt, - jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT -{ - jl_safe_printf("GC error (probable corruption) :\n"); - jl_gc_debug_print_status(); - jl_(vt); - jl_gc_debug_critical_error(); - abort(); +STATIC_INLINE void gc_assert_parent_validity(jl_value_t *parent, jl_value_t *child) JL_NOTSAFEPOINT +{ +#ifdef GC_ASSERT_PARENT_VALIDITY + jl_taggedvalue_t *child_astagged = jl_astaggedvalue(child); + jl_taggedvalue_t *child_vtag = (jl_taggedvalue_t *)(child_astagged->header & ~(uintptr_t)0xf); + uintptr_t child_vt = (uintptr_t)child_vtag; + if (child_vt == (jl_datatype_tag << 4) || + child_vt == (jl_unionall_tag << 4) || + child_vt == (jl_uniontype_tag << 4) || + child_vt == (jl_tvar_tag << 4) || + child_vt == (jl_vararg_tag << 4)) { + // Skip, since these wouldn't hit the object assert anyway + return; + } + else if (child_vt < jl_max_tags << 4) { + // Skip, since these wouldn't hit the object assert anyway + return; + } + if (__unlikely(!jl_is_datatype((jl_datatype_t *)child_vt) || ((jl_datatype_t *)child_vt)->smalltag)) { + jl_safe_printf("GC error (probable corruption)\n"); + jl_gc_debug_print_status(); + jl_safe_printf("Parent %p\n", (void *)parent); + jl_safe_printf("of type:\n"); + jl_(jl_typeof(parent)); + jl_safe_printf("While marking child at %p\n", (void *)child); + jl_safe_printf("of type:\n"); + jl_(child_vtag); + jl_gc_debug_critical_error(); + abort(); + } +#endif } // Check if `nptr` is tagged for `old + refyoung`, @@ -1884,6 +1907,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj8(jl_ptls_t ptls, char *obj8_parent, uint8_ if (new_obj != NULL) { verify_parent2("object", obj8_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj8_parent, slot, (jl_datatype_t*)jl_typeof(obj8_parent))); + gc_assert_parent_validity((jl_value_t *)obj8_parent, new_obj); if (obj8_begin + 1 != obj8_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -1915,6 +1939,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj16(jl_ptls_t ptls, char *obj16_parent, uint if (new_obj != NULL) { verify_parent2("object", obj16_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj16_parent, slot, (jl_datatype_t*)jl_typeof(obj16_parent))); + gc_assert_parent_validity((jl_value_t *)obj16_parent, new_obj); if (obj16_begin + 1 != obj16_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -1946,6 +1971,7 @@ STATIC_INLINE jl_value_t *gc_mark_obj32(jl_ptls_t ptls, char *obj32_parent, uint if (new_obj != NULL) { verify_parent2("object", obj32_parent, slot, "field(%d)", gc_slot_to_fieldidx(obj32_parent, slot, (jl_datatype_t*)jl_typeof(obj32_parent))); + gc_assert_parent_validity((jl_value_t *)obj32_parent, new_obj); if (obj32_begin + 1 != obj32_end) { gc_try_claim_and_push(mq, new_obj, &nptr); } @@ -2011,6 +2037,7 @@ STATIC_INLINE void gc_mark_objarray(jl_ptls_t ptls, jl_value_t *obj_parent, jl_v if (new_obj != NULL) { verify_parent2("obj array", obj_parent, obj_begin, "elem(%d)", gc_slot_to_arrayidx(obj_parent, obj_begin)); + gc_assert_parent_validity(obj_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(obj_parent, &new_obj); } @@ -2084,6 +2111,7 @@ STATIC_INLINE void gc_mark_array8(jl_ptls_t ptls, jl_value_t *ary8_parent, jl_va if (new_obj != NULL) { verify_parent2("array", ary8_parent, &new_obj, "elem(%d)", gc_slot_to_arrayidx(ary8_parent, ary8_begin)); + gc_assert_parent_validity(ary8_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(ary8_parent, &new_obj); } @@ -2158,6 +2186,7 @@ STATIC_INLINE void gc_mark_array16(jl_ptls_t ptls, jl_value_t *ary16_parent, jl_ if (new_obj != NULL) { verify_parent2("array", ary16_parent, &new_obj, "elem(%d)", gc_slot_to_arrayidx(ary16_parent, ary16_begin)); + gc_assert_parent_validity(ary16_parent, new_obj); gc_try_claim_and_push(mq, new_obj, &nptr); gc_heap_snapshot_record_array_edge(ary16_parent, &new_obj); } @@ -2311,12 +2340,16 @@ STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent if (b == (jl_binding_t *)jl_nothing) continue; verify_parent1("module", mb_parent, mb_begin, "binding_buff"); + gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)b); gc_try_claim_and_push(mq, b, &nptr); } jl_value_t *bindings = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindings); + gc_assert_parent_validity((jl_value_t *)mb_parent, bindings); gc_try_claim_and_push(mq, bindings, &nptr); jl_value_t *bindingkeyset = (jl_value_t *)jl_atomic_load_relaxed(&mb_parent->bindingkeyset); + gc_assert_parent_validity((jl_value_t *)mb_parent, bindingkeyset); gc_try_claim_and_push(mq, bindingkeyset, &nptr); + gc_assert_parent_validity((jl_value_t *)mb_parent, (jl_value_t *)mb_parent->parent); gc_try_claim_and_push(mq, (jl_value_t *)mb_parent->parent, &nptr); size_t nusings = mb_parent->usings.len; if (nusings > 0) { @@ -2346,7 +2379,7 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t * } for (; fl_begin < fl_end; fl_begin++) { new_obj = *fl_begin; - if (__unlikely(!new_obj)) + if (__unlikely(new_obj == NULL)) continue; if (gc_ptr_tag(new_obj, 1)) { new_obj = (jl_value_t *)gc_ptr_clear_tag(new_obj, 1); @@ -2535,11 +2568,6 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_ } return; } - else { - jl_datatype_t *vt = (jl_datatype_t *)vtag; - if (__unlikely(!jl_is_datatype(vt) || vt->smalltag)) - gc_assert_datatype_fail(ptls, vt, mq); - } jl_datatype_t *vt = (jl_datatype_t *)vtag; if (vt->name == jl_array_typename) { jl_array_t *a = (jl_array_t *)new_obj; From 5dafc844be4ca0fc79f03b45daa8e4b141b9e6f0 Mon Sep 17 00:00:00 2001 From: N5N3 <2642243996@qq.com> Date: Sat, 20 May 2023 09:14:15 +0800 Subject: [PATCH 041/290] subtype: add a fast-path for Union parameters (#49878) For #49857 performance The union explosion is caused by the following MWE: `Type{Vector{Union{....}} <: Type{Array{T}} where {T}` 280f9993608956f76eac30fc85e1c6ebbca4f5e6 only fixes for `Union{......}` without free `Typevar`. This fast-path makes sure the remaining get fixed. --- src/subtype.c | 3 +++ test/subtype.jl | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/subtype.c b/src/subtype.c index fd9bd3e8be00f..2c11bd733ec9e 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -1510,6 +1510,9 @@ static int local_forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t { int16_t oldRmore = e->Runions.more; int sub; + // fast-path for #49857 + if (obviously_in_union(y, x)) + return 1; int kindx = !jl_has_free_typevars(x); int kindy = !jl_has_free_typevars(y); if (kindx && kindy) diff --git a/test/subtype.jl b/test/subtype.jl index 4a3e55c039e94..c637fccb6552f 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -2547,3 +2547,6 @@ let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val) @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)} end + +#issue #49857 +@test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T}) From 1ef9f3784f2a8c983946f1575af29167a9cff9e7 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 20 May 2023 13:49:20 +0900 Subject: [PATCH 042/290] [NFC] cosmetic refactor of `abstract_call_method_with_const_args` (#49889) This commit is a collection of minor NFC (No Functional Change) modifications. Essentially, it is a cosmetic refactor, so there should be no changes in terms of compiler functionality. The specific changes include: - Making `concrete_eval_eligible` always return a value of `Symbol`, either of `:concrete_eval`, `:semi_concrete_eval` and `:none`, clarifying its return value's meaning - Splitting `abstract_call_method_with_const_args` into more granular subroutines - Rearranged the subroutines in `abstract_call_method_with_const_args` to ensure that the processing flow of the code can be followed when read from top to bottom --- base/compiler/abstractinterpretation.jl | 430 ++++++++++++------------ 1 file changed, 223 insertions(+), 207 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 7de4577c1f42a..b2dff2199c0f0 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -758,32 +758,100 @@ struct MethodCallResult end end -# - true: eligible for concrete evaluation -# - false: eligible for semi-concrete evaluation -# - nothing: not eligible for either of it +struct InvokeCall + types # ::Type + lookupsig # ::Type + InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig) +end + +struct ConstCallResults + rt::Any + const_result::ConstResult + effects::Effects + edge::MethodInstance + ConstCallResults(@nospecialize(rt), + const_result::ConstResult, + effects::Effects, + edge::MethodInstance) = + new(rt, const_result, effects, edge) +end + +function abstract_call_method_with_const_args(interp::AbstractInterpreter, + result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, + match::MethodMatch, sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) + if !const_prop_enabled(interp, sv, match) + return nothing + end + if bail_out_const_call(interp, result, si) + add_remark!(interp, sv, "[constprop] No more information to be gained") + return nothing + end + eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv) + if eligibility === :concrete_eval + return concrete_eval_call(interp, f, result, arginfo, invokecall) + end + mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv) + mi === nothing && return nothing + if is_constprop_recursed(result, mi, sv) + add_remark!(interp, sv, "[constprop] Edge cycle encountered") + return nothing + end + # try semi-concrete evaluation + if eligibility === :semi_concrete_eval + res = semi_concrete_eval_call(interp, mi, result, arginfo, sv) + if res !== nothing + return res + end + end + # try constant prop' + return const_prop_call(interp, mi, result, arginfo, sv) +end + +function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch) + if !InferenceParams(interp).ipo_constant_propagation + add_remark!(interp, sv, "[constprop] Disabled by parameter") + return false + end + if is_no_constprop(match.method) + add_remark!(interp, sv, "[constprop] Disabled by method parameter") + return false + end + return true +end + +function bail_out_const_call(interp::AbstractInterpreter, result::MethodCallResult, si::StmtInfo) + if is_removable_if_unused(result.effects) + if isa(result.rt, Const) || call_result_unused(si) + return true + end + end + return false +end + function concrete_eval_eligible(interp::AbstractInterpreter, @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) - # disable all concrete-evaluation if this function call is tainted by some overlayed - # method since currently there is no direct way to execute overlayed methods if inbounds_option() === :off # Disable concrete evaluation in `--check-bounds=no` mode, since we cannot be sure # that inferred effects are accurate. - return nothing + return :none elseif !result.effects.noinbounds && stmt_taints_inbounds_consistency(sv) # If the current statement is @inbounds or we propagate inbounds, the call's consistency # is tainted and not consteval eligible. add_remark!(interp, sv, "[constprop] Concrete evel disabled for inbounds") - return nothing + return :none + elseif isoverlayed(method_table(interp)) && !is_nonoverlayed(result.effects) + # disable all concrete-evaluation if this function call is tainted by some overlayed + # method since currently there is no direct way to execute overlayed methods + return :none end - isoverlayed(method_table(interp)) && !is_nonoverlayed(result.effects) && return nothing if result.edge !== nothing && is_foldable(result.effects) if f !== nothing && is_all_const_arg(arginfo, #=start=#2) - return true - else - return false + return :concrete_eval + elseif !any_conditional(arginfo) + return :semi_concrete_eval end end - return nothing + return :none end is_all_const_arg(arginfo::ArgInfo, start::Int) = is_all_const_arg(arginfo.argtypes, start::Int) @@ -795,6 +863,9 @@ function is_all_const_arg(argtypes::Vector{Any}, start::Int) return true end +any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes) +any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes) + collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.argtypes, start) function collect_const_args(argtypes::Vector{Any}, start::Int) return Any[ let a = widenslotwrapper(argtypes[i]) @@ -804,128 +875,23 @@ function collect_const_args(argtypes::Vector{Any}, start::Int) end for i = start:length(argtypes) ] end -struct InvokeCall - types # ::Type - lookupsig # ::Type - InvokeCall(@nospecialize(types), @nospecialize(lookupsig)) = new(types, lookupsig) -end - function concrete_eval_call(interp::AbstractInterpreter, - @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, si::StmtInfo, - sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) - eligible = concrete_eval_eligible(interp, f, result, arginfo, sv) - eligible === nothing && return false - if eligible - args = collect_const_args(arginfo, #=start=#2) - if invokecall !== nothing - # this call should be `invoke`d, rewrite `args` back now - pushfirst!(args, f, invokecall.types) - f = invoke - end - world = get_world_counter(interp) - edge = result.edge::MethodInstance - value = try - Core._call_in_world_total(world, f, args...) - catch - # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime - return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge) - end - return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge) - else # eligible for semi-concrete evaluation - return true + @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, invokecall::Union{InvokeCall,Nothing}) + args = collect_const_args(arginfo, #=start=#2) + if invokecall !== nothing + # this call should be `invoke`d, rewrite `args` back now + pushfirst!(args, f, invokecall.types) + f = invoke end -end - -any_conditional(argtypes::Vector{Any}) = any(@nospecialize(x)->isa(x, Conditional), argtypes) -any_conditional(arginfo::ArgInfo) = any_conditional(arginfo.argtypes) - -function const_prop_enabled(interp::AbstractInterpreter, sv::AbsIntState, match::MethodMatch) - if !InferenceParams(interp).ipo_constant_propagation - add_remark!(interp, sv, "[constprop] Disabled by parameter") - return false - end - if is_no_constprop(match.method) - add_remark!(interp, sv, "[constprop] Disabled by method parameter") - return false - end - return true -end - -struct ConstCallResults - rt::Any - const_result::ConstResult - effects::Effects - edge::MethodInstance - ConstCallResults(@nospecialize(rt), - const_result::ConstResult, - effects::Effects, - edge::MethodInstance) = - new(rt, const_result, effects, edge) -end - -# TODO implement MustAlias forwarding - -struct ConditionalArgtypes <: ForwardableArgtypes - arginfo::ArgInfo - sv::InferenceState -end - -""" - matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ConditionalArgtypes) - -The implementation is able to forward `Conditional` of `argtypes`, -as well as the other general extended lattice inforamtion. -""" -function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, argtypes::ConditionalArgtypes) - (; arginfo, sv) = argtypes - (; fargs, argtypes) = arginfo - given_argtypes = Vector{Any}(undef, length(argtypes)) - def = linfo.def::Method - nargs = Int(def.nargs) - cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo) - local condargs = nothing - for i in 1:length(argtypes) - argtype = argtypes[i] - # forward `Conditional` if it conveys a constraint on any other argument - if isa(argtype, Conditional) && fargs !== nothing - cnd = argtype - slotid = find_constrained_arg(cnd, fargs, sv) - if slotid !== nothing - # using union-split signature, we may be able to narrow down `Conditional` - sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid]) - thentype = tmeet(cnd.thentype, sigt) - elsetype = tmeet(cnd.elsetype, sigt) - if thentype === Bottom && elsetype === Bottom - # we accidentally proved this method match is impossible - # TODO bail out here immediately rather than just propagating Bottom ? - given_argtypes[i] = Bottom - else - if condargs === nothing - condargs = Tuple{Int,Int}[] - end - push!(condargs, (slotid, i)) - given_argtypes[i] = Conditional(slotid, thentype, elsetype) - end - continue - end - end - given_argtypes[i] = widenslotwrapper(argtype) - end - if condargs !== nothing - given_argtypes = let condargs=condargs - va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int - # invalidate `Conditional` imposed on varargs - for (slotid, i) in condargs - if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise - isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i]) - end - end - end - end - else - given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo) + world = get_world_counter(interp) + edge = result.edge::MethodInstance + value = try + Core._call_in_world_total(world, f, args...) + catch + # The evaluation threw. By :consistent-cy, we're guaranteed this would have happened at runtime + return ConstCallResults(Union{}, ConcreteResult(edge, result.effects), result.effects, edge) end - return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes) + return ConstCallResults(Const(value), ConcreteResult(edge, EFFECTS_TOTAL, value), EFFECTS_TOTAL, edge) end # check if there is a cycle and duplicated inference of `mi` @@ -942,82 +908,6 @@ function is_constprop_recursed(result::MethodCallResult, mi::MethodInstance, sv: end end -function abstract_call_method_with_const_args(interp::AbstractInterpreter, - result::MethodCallResult, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, match::MethodMatch, - sv::AbsIntState, invokecall::Union{Nothing,InvokeCall}=nothing) - if !const_prop_enabled(interp, sv, match) - return nothing - end - if is_removable_if_unused(result.effects) - if isa(result.rt, Const) || call_result_unused(si) - add_remark!(interp, sv, "[constprop] No more information to be gained") - return nothing - end - end - res = concrete_eval_call(interp, f, result, arginfo, si, sv, invokecall) - isa(res, ConstCallResults) && return res - mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv) - mi === nothing && return nothing - if is_constprop_recursed(result, mi, sv) - add_remark!(interp, sv, "[constprop] Edge cycle encountered") - return nothing - end - # try semi-concrete evaluation - if res::Bool && !any_conditional(arginfo) - world = frame_world(sv) - mi_cache = WorldView(code_cache(interp), world) - code = get(mi_cache, mi, nothing) - if code !== nothing - irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world) - if irsv !== nothing - irsv.parent = sv - rt, nothrow = ir_abstract_constant_propagation(interp, irsv) - @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp" - if !(isa(rt, Type) && hasintersect(rt, Bool)) - ir = irsv.ir - # TODO (#48913) enable double inlining pass when there are any calls - # that are newly resovled by irinterp - # state = InliningState(interp) - # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv)) - new_effects = Effects(result.effects; nothrow) - return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi) - end - end - end - end - # try constant prop' - inf_cache = get_inference_cache(interp) - 𝕃ᵢ = typeinf_lattice(interp) - inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache) - if inf_result === nothing - # fresh constant prop' - argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes) - inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp)) - if !any(inf_result.overridden_by_const) - add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes") - return nothing - end - frame = InferenceState(inf_result, #=cache=#:local, interp) - if frame === nothing - add_remark!(interp, sv, "[constprop] Could not retrieve the source") - return nothing # this is probably a bad generated function (unsound), but just ignore it - end - frame.parent = sv - if !typeinf(interp, frame) - add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle") - return nothing - end - @assert inf_result.result !== nothing - else - # found the cache for this constant prop' - if inf_result.result === nothing - add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle") - return nothing - end - end - return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi) -end - # if there's a possibility we could get a better result with these constant arguments # (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise function maybe_get_const_prop_profitable(interp::AbstractInterpreter, @@ -1240,6 +1130,132 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful end +function semi_concrete_eval_call(interp::AbstractInterpreter, + mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) + world = frame_world(sv) + mi_cache = WorldView(code_cache(interp), world) + code = get(mi_cache, mi, nothing) + if code !== nothing + irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world) + if irsv !== nothing + irsv.parent = sv + rt, nothrow = ir_abstract_constant_propagation(interp, irsv) + @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp" + if !(isa(rt, Type) && hasintersect(rt, Bool)) + ir = irsv.ir + # TODO (#48913) enable double inlining pass when there are any calls + # that are newly resovled by irinterp + # state = InliningState(interp) + # ir = ssa_inlining_pass!(irsv.ir, state, propagate_inbounds(irsv)) + new_effects = Effects(result.effects; nothrow) + return ConstCallResults(rt, SemiConcreteResult(mi, ir, new_effects), new_effects, mi) + end + end + end + return nothing +end + +function const_prop_call(interp::AbstractInterpreter, + mi::MethodInstance, result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) + inf_cache = get_inference_cache(interp) + 𝕃ᵢ = typeinf_lattice(interp) + inf_result = cache_lookup(𝕃ᵢ, mi, arginfo.argtypes, inf_cache) + if inf_result === nothing + # fresh constant prop' + argtypes = has_conditional(𝕃ᵢ, sv) ? ConditionalArgtypes(arginfo, sv) : SimpleArgtypes(arginfo.argtypes) + inf_result = InferenceResult(mi, argtypes, typeinf_lattice(interp)) + if !any(inf_result.overridden_by_const) + add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes") + return nothing + end + frame = InferenceState(inf_result, #=cache=#:local, interp) + if frame === nothing + add_remark!(interp, sv, "[constprop] Could not retrieve the source") + return nothing # this is probably a bad generated function (unsound), but just ignore it + end + frame.parent = sv + if !typeinf(interp, frame) + add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle") + return nothing + end + @assert inf_result.result !== nothing + else + # found the cache for this constant prop' + if inf_result.result === nothing + add_remark!(interp, sv, "[constprop] Found cached constant inference in a cycle") + return nothing + end + end + return ConstCallResults(inf_result.result, ConstPropResult(inf_result), inf_result.ipo_effects, mi) +end + +# TODO implement MustAlias forwarding + +struct ConditionalArgtypes <: ForwardableArgtypes + arginfo::ArgInfo + sv::InferenceState +end + +""" + matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, + conditional_argtypes::ConditionalArgtypes) + +The implementation is able to forward `Conditional` of `conditional_argtypes`, +as well as the other general extended lattice inforamtion. +""" +function matching_cache_argtypes(𝕃::AbstractLattice, linfo::MethodInstance, + conditional_argtypes::ConditionalArgtypes) + (; arginfo, sv) = conditional_argtypes + (; fargs, argtypes) = arginfo + given_argtypes = Vector{Any}(undef, length(argtypes)) + def = linfo.def::Method + nargs = Int(def.nargs) + cache_argtypes, overridden_by_const = matching_cache_argtypes(𝕃, linfo) + local condargs = nothing + for i in 1:length(argtypes) + argtype = argtypes[i] + # forward `Conditional` if it conveys a constraint on any other argument + if isa(argtype, Conditional) && fargs !== nothing + cnd = argtype + slotid = find_constrained_arg(cnd, fargs, sv) + if slotid !== nothing + # using union-split signature, we may be able to narrow down `Conditional` + sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid]) + thentype = tmeet(cnd.thentype, sigt) + elsetype = tmeet(cnd.elsetype, sigt) + if thentype === Bottom && elsetype === Bottom + # we accidentally proved this method match is impossible + # TODO bail out here immediately rather than just propagating Bottom ? + given_argtypes[i] = Bottom + else + if condargs === nothing + condargs = Tuple{Int,Int}[] + end + push!(condargs, (slotid, i)) + given_argtypes[i] = Conditional(slotid, thentype, elsetype) + end + continue + end + end + given_argtypes[i] = widenslotwrapper(argtype) + end + if condargs !== nothing + given_argtypes = let condargs=condargs + va_process_argtypes(𝕃, given_argtypes, linfo) do isva_given_argtypes::Vector{Any}, last::Int + # invalidate `Conditional` imposed on varargs + for (slotid, i) in condargs + if slotid ≥ last && (1 ≤ i ≤ length(isva_given_argtypes)) # `Conditional` is already widened to vararg-tuple otherwise + isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i]) + end + end + end + end + else + given_argtypes = va_process_argtypes(𝕃, given_argtypes, linfo) + end + return pick_const_args!(𝕃, cache_argtypes, overridden_by_const, given_argtypes) +end + # This is only for use with `Conditional`. # In general, usage of this is wrong. function ssa_def_slot(@nospecialize(arg), sv::InferenceState) From 4d3000bdacb112d657fd89715893846fc2dfb1af Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 20 May 2023 16:08:45 +0900 Subject: [PATCH 043/290] follow up #49889, pass `sv::AbsIntState` to `concrete_eval_call` (#49904) `sv` is not used by `NativeInterpreter`, but is used by external `AbstractInterpreter` like JET.jl. --- base/compiler/abstractinterpretation.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index b2dff2199c0f0..692a7c8c5336f 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -788,7 +788,7 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, end eligibility = concrete_eval_eligible(interp, f, result, arginfo, sv) if eligibility === :concrete_eval - return concrete_eval_call(interp, f, result, arginfo, invokecall) + return concrete_eval_call(interp, f, result, arginfo, sv, invokecall) end mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, si, match, sv) mi === nothing && return nothing @@ -876,7 +876,8 @@ function collect_const_args(argtypes::Vector{Any}, start::Int) end function concrete_eval_call(interp::AbstractInterpreter, - @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, invokecall::Union{InvokeCall,Nothing}) + @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, + sv::AbsIntState, invokecall::Union{InvokeCall,Nothing}) args = collect_const_args(arginfo, #=start=#2) if invokecall !== nothing # this call should be `invoke`d, rewrite `args` back now From d2f5bbd7cfbac902db952b465b83d242efcf6f08 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 20 May 2023 19:45:15 +0900 Subject: [PATCH 044/290] REPLCompletions: use a fixed world age for `REPLInterpreter` inference (#49880) This commit uses a fixed world age for `REPLInterpreter` inference, making `REPLInterpreter` robust against potential invalidations of `Core.Compiler` methods. It also generates code cache for `REPLinterpreter` at the fixed world age so that the first-time to completion stays the (almost) same. --- stdlib/REPL/src/REPLCompletions.jl | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index e09e3b2aa9e6b..6ec7074f105fd 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -409,7 +409,7 @@ const REPL_INTERPRETER_CACHE = REPLInterpreterCache() function get_code_cache() # XXX Avoid storing analysis results into the cache that persists across precompilation, # as [sys|pkg]image currently doesn't support serializing externally created `CodeInstance`. - # Otherwise, `CodeInstance`s created by `REPLInterpreter``, that are much less optimized + # Otherwise, `CodeInstance`s created by `REPLInterpreter`, that are much less optimized # that those produced by `NativeInterpreter`, will leak into the native code cache, # potentially causing runtime slowdown. # (see https://github.com/JuliaLang/julia/issues/48453). @@ -524,9 +524,9 @@ function CC.concrete_eval_eligible(interp::REPLInterpreter, @nospecialize(f), result = CC.MethodCallResult(result.rt, result.edgecycle, result.edgelimited, result.edge, neweffects) end -return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any, - result::CC.MethodCallResult, arginfo::CC.ArgInfo, - sv::CC.InferenceState) + return @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter, f::Any, + result::CC.MethodCallResult, arginfo::CC.ArgInfo, + sv::CC.InferenceState) end function resolve_toplevel_symbols!(mod::Module, src::Core.CodeInfo) @@ -565,13 +565,28 @@ function repl_eval_ex(@nospecialize(ex), context_module::Module) interp = REPLInterpreter(result) frame = CC.InferenceState(result, src, #=cache=#:no, interp)::CC.InferenceState - CC.typeinf(interp, frame) + # NOTE Use the fixed world here to make `REPLInterpreter` robust against + # potential invalidations of `Core.Compiler` methods. + Base.invoke_in_world(COMPLETION_WORLD[], CC.typeinf, interp, frame) result = frame.result.result result === Union{} && return nothing # for whatever reason, callers expect this as the Bottom and/or Top type instead return result end +# `COMPLETION_WORLD[]` will be initialized within `__init__` +# (to allow us to potentially remove REPL from the sysimage in the future). +# Note that inference from the `code_typed` call below will use the current world age +# rather than `typemax(UInt)`, since `Base.invoke_in_world` uses the current world age +# when the given world age is higher than the current one. +const COMPLETION_WORLD = Ref{UInt}(typemax(UInt)) + +# Generate code cache for `REPLInterpreter` now: +# This code cache will be available at the world of `COMPLETION_WORLD`, +# assuming no invalidation will happen before initializing REPL. +# Once REPL is loaded, `REPLInterpreter` will be resilient against future invalidations. +code_typed(CC.typeinf, (REPLInterpreter, CC.InferenceState)) + # Method completion on function call expression that look like :(max(1)) MAX_METHOD_COMPLETIONS::Int = 40 function _complete_methods(ex_org::Expr, context_module::Module, shift::Bool) @@ -1175,6 +1190,7 @@ end function __init__() Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError) + COMPLETION_WORLD[] = Base.get_world_counter() nothing end From 8e03be1eb2a1c56afaa93734ab85a73ddeeabc33 Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Sat, 20 May 2023 16:52:22 -0400 Subject: [PATCH 045/290] Improve `isassigned` implementation (#49827) Unless `isassigned` is called on `Array` with `Int`s, it uses a try catch, which is notoriously slow. This PR provides changes the default implementation of `isassigned` to coerce the indices provided to `Int`s and converts to linear or cartesian indices, depending on the arrays `IndexStyle`. This also overloads `isassigned` for many of the array types defined in Base. Fixes: https://github.com/JuliaLang/julia/issues/44720 --- base/abstractarray.jl | 14 ------------ base/indices.jl | 1 + base/multidimensional.jl | 30 ++++++++++++++++++++++++- base/permuteddimsarray.jl | 6 +++++ base/range.jl | 2 ++ base/reshapedarray.jl | 13 +++++++++++ base/subarray.jl | 31 ++++++++++++++++++++++++++ stdlib/LinearAlgebra/src/adjtrans.jl | 2 ++ stdlib/LinearAlgebra/src/bidiag.jl | 13 +++++++++++ stdlib/LinearAlgebra/src/diagonal.jl | 10 +++++++++ stdlib/LinearAlgebra/src/hessenberg.jl | 3 +++ stdlib/LinearAlgebra/src/symmetric.jl | 9 ++++++++ stdlib/LinearAlgebra/src/triangular.jl | 9 ++++++++ stdlib/LinearAlgebra/src/tridiag.jl | 26 +++++++++++++++++++++ 14 files changed, 154 insertions(+), 15 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index cb3956eb7c6d4..76047f055d41e 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -604,20 +604,6 @@ end size_to_strides(s, d) = (s,) size_to_strides(s) = () - -function isassigned(a::AbstractArray, i::Integer...) - try - a[i...] - true - catch e - if isa(e, BoundsError) || isa(e, UndefRefError) - return false - else - rethrow() - end - end -end - function isstored(A::AbstractArray{<:Any,N}, I::Vararg{Integer,N}) where {N} @boundscheck checkbounds(A, I...) return true diff --git a/base/indices.jl b/base/indices.jl index a9189865048cd..15a2a2f3c0ac7 100644 --- a/base/indices.jl +++ b/base/indices.jl @@ -504,6 +504,7 @@ promote_rule(a::Type{IdentityUnitRange{T1}}, b::Type{IdentityUnitRange{T2}}) whe IndexStyle(::Type{<:LinearIndices}) = IndexLinear() axes(iter::LinearIndices) = map(axes1, iter.indices) size(iter::LinearIndices) = map(length, iter.indices) +isassigned(iter::LinearIndices, i::Int) = checkbounds(Bool, iter, i) function getindex(iter::LinearIndices, i::Int) @inline @boundscheck checkbounds(iter, i) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index ce1b6c39adb43..b76c2637d44f0 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -344,6 +344,13 @@ module IteratorsMD Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices) Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian() Base.has_offset_axes(iter::CartesianIndices) = Base.has_offset_axes(iter.indices...) + @propagate_inbounds function isassigned(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R} + for i in 1:N + isassigned(iter.indices[i], I[i]) || return false + end + return true + end + # getindex for a 0D CartesianIndices is necessary for disambiguation @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R} CartesianIndex() @@ -1565,7 +1572,28 @@ end end isassigned(a::AbstractArray, i::CartesianIndex) = isassigned(a, Tuple(i)...) -isassigned(a::AbstractArray, i::Union{Integer, CartesianIndex}...) = isassigned(a, CartesianIndex(i)) +function isassigned(A::AbstractArray, i::Union{Integer, CartesianIndex}...) + isa(i, Tuple{Vararg{Int}}) || return isassigned(A, CartesianIndex(i...)) + @boundscheck checkbounds(Bool, A, i...) || return false + S = IndexStyle(A) + ninds = length(i) + if (isa(S, IndexLinear) && ninds != 1) + return @inbounds isassigned(A, _to_linear_index(A, i...)) + elseif (!isa(S, IndexLinear) && ninds != ndims(A)) + return @inbounds isassigned(A, _to_subscript_indices(A, i...)...) + else + try + A[i...] + true + catch e + if isa(e, BoundsError) || isa(e, UndefRefError) + return false + else + rethrow() + end + end + end +end ## permutedims diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl index 80685332a85dc..41c3636b40216 100644 --- a/base/permuteddimsarray.jl +++ b/base/permuteddimsarray.jl @@ -78,6 +78,12 @@ end val end +function Base.isassigned(A::PermutedDimsArray{T,N,perm,iperm}, I::Vararg{Int,N}) where {T,N,perm,iperm} + @boundscheck checkbounds(Bool, A, I...) || return false + @inbounds x = isassigned(A.parent, genperm(I, iperm)...) + x +end + @inline genperm(I::NTuple{N,Any}, perm::Dims{N}) where {N} = ntuple(d -> I[perm[d]], Val(N)) @inline genperm(I, perm::AbstractVector{Int}) = genperm(I, (perm...,)) diff --git a/base/range.jl b/base/range.jl index f7dc35703a196..f0bcc0dd20ae8 100644 --- a/base/range.jl +++ b/base/range.jl @@ -901,6 +901,8 @@ end ## indexing +isassigned(r::AbstractRange, i::Int) = firstindex(r) <= i <= lastindex(r) + _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start function getindex(v::UnitRange{T}, i::Integer) where T diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 4037aff246a81..bcb47a9359392 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -226,6 +226,19 @@ end offset_if_vec(i::Integer, axs::Tuple{<:AbstractUnitRange}) = i + first(axs[1]) - 1 offset_if_vec(i::Integer, axs::Tuple) = i +@inline function isassigned(A::ReshapedArrayLF, index::Int) + @boundscheck checkbounds(Bool, A, index) || return false + @inbounds ret = isassigned(parent(A), index) + ret +end +@inline function isassigned(A::ReshapedArray{T,N}, indices::Vararg{Int, N}) where {T,N} + @boundscheck checkbounds(Bool, A, indices...) || return false + axp = axes(A.parent) + i = offset_if_vec(_sub2ind(size(A), indices...), axp) + I = ind2sub_rs(axp, A.mi, i) + @inbounds isassigned(A.parent, I...) +end + @inline function getindex(A::ReshapedArrayLF, index::Int) @boundscheck checkbounds(A, index) @inbounds ret = parent(A)[index] diff --git a/base/subarray.jl b/base/subarray.jl index 214a2f98afe31..f33edd82ad510 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -352,6 +352,37 @@ function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int) V end +function isassigned(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N} + @inline + @boundscheck checkbounds(Bool, V, I...) || return false + @inbounds r = isassigned(V.parent, reindex(V.indices, I)...) + r +end +function isassigned(V::FastSubArray, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i) + r +end +function isassigned(V::FastContiguousSubArray, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + i) + r +end +function isassigned(V::FastSubArray{<:Any, 1}, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + V.stride1*i) + r +end +function isassigned(V::FastContiguousSubArray{<:Any, 1}, i::Int) + @inline + @boundscheck checkbounds(Bool, V, i) || return false + @inbounds r = isassigned(V.parent, V.offset1 + i) + r +end + IndexStyle(::Type{<:FastSubArray}) = IndexLinear() IndexStyle(::Type{<:SubArray}) = IndexCartesian() diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 2f5c5508e0ee3..e12cb9f9f98a9 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -335,6 +335,8 @@ axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...) axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent)) IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear() IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian() +@propagate_inbounds Base.isassigned(v::AdjOrTransAbsVec, i::Int) = isassigned(v.parent, i-1+first(axes(v.parent)[1])) +@propagate_inbounds Base.isassigned(v::AdjOrTransAbsMat, i::Int, j::Int) = isassigned(v.parent, j, i) @propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T @propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T @propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v) diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index dd3783d67b0cf..855d05c3f94e8 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -130,6 +130,19 @@ function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j) end end +@inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + if i == j + return @inbounds isassigned(A.dv, i) + elseif A.uplo == 'U' && (i == j - 1) + return @inbounds isassigned(A.ev, i) + elseif A.uplo == 'L' && (i == j + 1) + return @inbounds isassigned(A.ev, j) + else + return true + end +end + @inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index b9fa98a9b12b3..1813c04c46351 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -139,6 +139,16 @@ function size(D::Diagonal,d::Integer) return d<=2 ? length(D.diag) : 1 end +@inline function Base.isassigned(D::Diagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, D, i, j) || return false + if i == j + @inbounds r = isassigned(D.diag, i) + else + r = true + end + r +end + @inline function getindex(D::Diagonal, i::Int, j::Int) @boundscheck checkbounds(D, i, j) if i == j diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index 75b3e121f9086..b5071b178de10 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -80,6 +80,9 @@ function Matrix{T}(H::UpperHessenberg) where T return triu!(copyto!(Matrix{T}(undef, m, n), H.data), -1) end +Base.isassigned(H::UpperHessenberg, i::Int, j::Int) = + i <= j+1 ? isassigned(H.data, i, j) : true + getindex(H::UpperHessenberg{T}, i::Integer, j::Integer) where {T} = i <= j+1 ? convert(T, H.data[i,j]) : zero(T) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index f96ca812ea0ec..ee25a4b31db3a 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -221,6 +221,15 @@ const RealHermSymComplexSym{T<:Real,S} = Union{Hermitian{T,S}, Symmetric{T,S}, S size(A::HermOrSym, d) = size(A.data, d) size(A::HermOrSym) = size(A.data) +@inline function Base.isassigned(A::HermOrSym, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + @inbounds if i == j || ((A.uplo == 'U') == (i < j)) + return isassigned(A.data, i, j) + else + return isassigned(A.data, j, i) + end +end + @inline function getindex(A::Symmetric, i::Integer, j::Integer) @boundscheck checkbounds(A, i, j) @inbounds if i == j diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 1e4ba4119393d..295a46f1522a5 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -223,6 +223,15 @@ function full!(A::UnitUpperTriangular) B end +Base.isassigned(A::UnitLowerTriangular, i::Int, j::Int) = + i > j ? isassigned(A.data, i, j) : true +Base.isassigned(A::LowerTriangular, i::Int, j::Int) = + i >= j ? isassigned(A.data, i, j) : true +Base.isassigned(A::UnitUpperTriangular, i::Int, j::Int) = + i < j ? isassigned(A.data, i, j) : true +Base.isassigned(A::UpperTriangular, i::Int, j::Int) = + i <= j ? isassigned(A.data, i, j) : true + getindex(A::UnitLowerTriangular{T}, i::Integer, j::Integer) where {T} = i > j ? A.data[i,j] : ifelse(i == j, oneunit(T), zero(T)) getindex(A::LowerTriangular, i::Integer, j::Integer) = diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl index 2739400bb393c..07a47d917d914 100644 --- a/stdlib/LinearAlgebra/src/tridiag.jl +++ b/stdlib/LinearAlgebra/src/tridiag.jl @@ -413,6 +413,19 @@ end det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift) logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift)) +@inline function Base.isassigned(A::SymTridiagonal, i::Int, j::Int) + @boundscheck checkbounds(Bool, A, i, j) || return false + if i == j + return @inbounds isassigned(A.dv, i) + elseif i == j + 1 + return @inbounds isassigned(A.ev, j) + elseif i + 1 == j + return @inbounds isassigned(A.ev, i) + else + return true + end +end + @inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j @@ -604,6 +617,19 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T end end +@inline function Base.isassigned(A::Tridiagonal, i::Int, j::Int) + @boundscheck checkbounds(A, i, j) + if i == j + return @inbounds isassigned(A.d, i) + elseif i == j + 1 + return @inbounds isassigned(A.dl, j) + elseif i + 1 == j + return @inbounds isassigned(A.du, i) + else + return true + end +end + @inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T @boundscheck checkbounds(A, i, j) if i == j From 046f610493df5f2eb2f14c0014ff73dccfb1080a Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Sat, 20 May 2023 21:00:18 -0400 Subject: [PATCH 046/290] t0 is in counts_ctx --- src/timing.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/timing.h b/src/timing.h index 603f9c0a061ed..cbf32cf643526 100644 --- a/src/timing.h +++ b/src/timing.h @@ -353,13 +353,15 @@ STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { uint64_t t = cycleclock(); (void)t; _COUNTS_START(&block->counts_ctx, t); +#ifdef USE_TIMING_COUNTS + block->counts_ctx.t0 = t; +#endif _ITTAPI_START(block); _TRACY_START(block); jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; block->prev = *prevp; block->is_running = 1; - block->t0 = t; if (block->prev) { _COUNTS_STOP(&block->prev->counts_ctx, t); } From 9cbee21ea5602735745fd488254545bcd76cbc93 Mon Sep 17 00:00:00 2001 From: Alexander Plavin Date: Sun, 21 May 2023 10:58:19 +0300 Subject: [PATCH 047/290] parentindices and parent of substring (#49511) * parentindices and parent of substring * upd docs * Update base/abstractarray.jl Co-authored-by: Jakob Nybo Nissen * Update test/strings/basic.jl Co-authored-by: Jakob Nybo Nissen --------- Co-authored-by: Jakob Nybo Nissen --- NEWS.md | 1 + base/abstractarray.jl | 4 +++- base/strings/substring.jl | 3 +++ base/subarray.jl | 4 +++- test/strings/basic.jl | 7 +++++-- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index ad39de5de7d61..5a518104d3770 100644 --- a/NEWS.md +++ b/NEWS.md @@ -52,6 +52,7 @@ New library features * `binomial(x, k)` now supports non-integer `x` ([#48124]). * A `CartesianIndex` is now treated as a "scalar" for broadcasting ([#47044]). * `printstyled` now supports italic output ([#45164]). +* `parent` and `parentindices` support `SubString`s Standard library changes ------------------------ diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 76047f055d41e..e8d2e956b00f8 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1428,7 +1428,7 @@ end """ parent(A) -Return the underlying "parent array”. This parent array of objects of types `SubArray`, `ReshapedArray` +Return the underlying parent object of the view. This parent of objects of types `SubArray`, `SubString`, `ReshapedArray` or `LinearAlgebra.Transpose` is what was passed as an argument to `view`, `reshape`, `transpose`, etc. during object creation. If the input is not a wrapped object, return the input itself. If the input is wrapped multiple times, only the outermost wrapper will be removed. @@ -1451,6 +1451,8 @@ julia> parent(V) 3 4 ``` """ +function parent end + parent(a::AbstractArray) = a ## rudimentary aliasing detection ## diff --git a/base/strings/substring.jl b/base/strings/substring.jl index 5ba08ac2f7fff..6c169624c72f5 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -103,6 +103,9 @@ end thisind(s::SubString{String}, i::Int) = _thisind_str(s, i) nextind(s::SubString{String}, i::Int) = _nextind_str(s, i) +parent(s::SubString) = s.string +parentindices(s::SubString) = (s.offset + 1 : thisind(s.string, s.offset + s.ncodeunits),) + function ==(a::Union{String, SubString{String}}, b::Union{String, SubString{String}}) sizeof(a) == sizeof(b) && _memcmp(a, b) == 0 end diff --git a/base/subarray.jl b/base/subarray.jl index f33edd82ad510..901410e908d1e 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -81,7 +81,7 @@ parentindices(V::SubArray) = V.indices """ parentindices(A) -Return the indices in the [`parent`](@ref) which correspond to the array view `A`. +Return the indices in the [`parent`](@ref) which correspond to the view `A`. # Examples ```jldoctest @@ -96,6 +96,8 @@ julia> parentindices(V) (1, Base.Slice(Base.OneTo(2))) ``` """ +function parentindices end + parentindices(a::AbstractArray) = map(oneto, size(a)) ## Aliasing detection diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 602c38551f6d8..7151a4d4fd60a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -176,8 +176,11 @@ end x = "abcdefg" @testset "basic unit range" begin @test SubString(x, 2:4) == "bcd" - @test view(x, 2:4) == "bcd" - @test view(x, 2:4) isa SubString + sx = view(x, 2:4) + @test sx == "bcd" + @test sx isa SubString + @test parent(sx) === x + @test parentindices(sx) == (2:4,) @test (@view x[4:end]) == "defg" @test (@view x[4:end]) isa SubString end From 049527837ed38235f64d5bcd051da3e04e1e2d8a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sun, 21 May 2023 04:00:10 -0400 Subject: [PATCH 048/290] lowering: avoid copying the entire module back to flisp just to get its name (#49898) --- src/ast.c | 10 +++++----- src/jlfrontend.scm | 11 +++-------- src/julia_internal.h | 3 ++- src/toplevel.c | 15 +++++++++++++-- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/ast.c b/src/ast.c index b6d88ab62dcfe..cf675efe95de3 100644 --- a/src/ast.c +++ b/src/ast.c @@ -745,7 +745,7 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_vali // GC Note: jl_fieldref(v, 0) allocates for GotoNode // but we don't need a GC root here because julia_to_list2_noalloc // shouldn't allocate in this case. - if (jl_typetagis(v, jl_linenumbernode_type)) { + if (jl_is_linenode(v)) { jl_value_t *file = jl_fieldref_noalloc(v,1); jl_value_t *line = jl_fieldref(v,0); value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid); @@ -834,7 +834,7 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len, } // returns either an expression or a thunk -jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule) +static jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule) { jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule); fl_context_t *fl_ctx = &ctx->fl; @@ -847,8 +847,8 @@ jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module return result; } -static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, - jl_module_t *inmodule, const char *file, int line) +jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, + jl_module_t *inmodule, const char *file, int line) { jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule); fl_context_t *fl_ctx = &ctx->fl; @@ -1011,7 +1011,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule // __source__ argument jl_value_t *lno = jl_array_ptr_ref(args, 1); margs[1] = lno; - if (!jl_typetagis(lno, jl_linenumbernode_type)) { + if (!jl_is_linenode(lno)) { margs[1] = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing); } margs[2] = (jl_value_t*)inmodule; diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index f72c79f281480..aefac6d102aea 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -179,14 +179,9 @@ ;; construct default definitions of `eval` for non-bare modules ;; called by jl_eval_module_expr -(define (module-default-defs e) +(define (module-default-defs name file line) (jl-expand-to-thunk - (let* ((name (caddr e)) - (body (cadddr e)) - (loc (if (null? (cdr body)) () (cadr body))) - (loc (if (and (pair? loc) (eq? (car loc) 'line)) - (list loc) - '())) + (let* ((loc (if (and (eq? file 'none) (eq? line 0)) '() `((line ,line ,file)))) (x (if (eq? name 'x) 'y 'x)) (mex (if (eq? name 'mapexpr) 'map_expr 'mapexpr))) `(block @@ -202,7 +197,7 @@ (block ,@loc (call (core _call_latest) (top include) ,mex ,name ,x))))) - 'none 0)) + file line)) ; run whole frontend on a string. useful for testing. (define (fe str) diff --git a/src/julia_internal.h b/src/julia_internal.h index 49f0b19ec4209..3b9da93fcf802 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -742,7 +742,8 @@ jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e, jl_code_info_t *src, jl_svec_t *sparam_vals); JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT; -jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule); +jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr, + jl_module_t *inmodule, const char *file, int line); jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world); diff --git a/src/toplevel.c b/src/toplevel.c index 8f148727e0249..5daf27043e938 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -185,17 +185,28 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex size_t last_age = ct->world_age; // add standard imports unless baremodule + jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args; + int lineno = 0; + const char *filename = "none"; + if (jl_array_len(exprs) > 0) { + jl_value_t *lineex = jl_array_ptr_ref(exprs, 0); + if (jl_is_linenode(lineex)) { + lineno = jl_linenode_line(lineex); + jl_value_t *file = jl_linenode_file(lineex); + if (jl_is_symbol(file)) + filename = jl_symbol_name((jl_sym_t*)file); + } + } if (std_imports) { if (jl_base_module != NULL) { jl_add_standard_imports(newm); } // add `eval` function - form = jl_call_scm_on_ast("module-default-defs", (jl_value_t*)ex, newm); + form = jl_call_scm_on_ast_and_loc("module-default-defs", (jl_value_t*)name, newm, filename, lineno); jl_toplevel_eval_flex(newm, form, 0, 1); form = NULL; } - jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args; for (int i = 0; i < jl_array_len(exprs); i++) { // process toplevel form ct->world_age = jl_atomic_load_acquire(&jl_world_counter); From a6ad9ea099fb9557e53186e5974a0d68395602a2 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Sun, 21 May 2023 04:01:27 -0400 Subject: [PATCH 049/290] Fix jl_timing_show_method_instance for top-level thunks (#49862) This was causing invalid pointer dereferences when the method instance had no backing method. --- src/timing.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/timing.c b/src/timing.c index eca29d5191c07..a1e599b2efaa4 100644 --- a/src/timing.c +++ b/src/timing.c @@ -228,8 +228,13 @@ JL_DLLEXPORT void jl_timing_show_location(const char *file, int line, jl_module_ JL_DLLEXPORT void jl_timing_show_method_instance(jl_method_instance_t *mi, jl_timing_block_t *cur_block) { jl_timing_show_func_sig(mi->specTypes, cur_block); - jl_method_t *def = mi->def.method; - jl_timing_show_location(jl_symbol_name(def->file), def->line, def->module, cur_block); + if (jl_is_method(mi->def.value)) { + jl_method_t *def = mi->def.method; + jl_timing_show_location(jl_symbol_name(def->file), def->line, def->module, cur_block); + } else { + jl_timing_printf(cur_block, " in %s", + jl_symbol_name(mi->def.module->name)); + } } JL_DLLEXPORT void jl_timing_show_method(jl_method_t *method, jl_timing_block_t *cur_block) From e4a563a3d56f633a2b2b8704bad0e2ad8d09cc40 Mon Sep 17 00:00:00 2001 From: Tim Stahlhut Date: Sat, 25 Feb 2023 11:34:49 -0500 Subject: [PATCH 050/290] deps/libgit2: Add LIBSSH2 paths --- deps/libgit2.mk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deps/libgit2.mk b/deps/libgit2.mk index 9bd7bd555d89d..014fdc0108f7c 100644 --- a/deps/libgit2.mk +++ b/deps/libgit2.mk @@ -16,6 +16,11 @@ endif LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DUSE_THREADS=ON -DUSE_BUNDLED_ZLIB=ON -DUSE_SSH=ON -DBUILD_CLI=OFF ifeq ($(OS),WINNT) LIBGIT2_OPTS += -DWIN32=ON -DMINGW=ON +ifeq ($(USE_SYSTEM_LIBSSH2), 0) +LIBGIT2_OPTS += -DLIBSSH2_LIBRARIES=libssh2.dll +LIBGIT2_OPTS += -DLIBSSH2_LIBRARY_DIRS=$(build_prefix)/lib +LIBGIT2_OPTS += -DLIBSSH2_INCLUDE_DIRS=$(build_prefix)/include +endif # USE_SYSTEM_LIBSSH2=0 ifneq ($(ARCH),x86_64) ifneq ($(USECLANG),1) LIBGIT2_OPTS += -DCMAKE_C_FLAGS="-mincoming-stack-boundary=2" From 3b84b08eb40013750464235f54b5f209933f185f Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 22 May 2023 06:50:56 -0400 Subject: [PATCH 051/290] xref and doctest for regexmatch and occursin (#49911) --- base/regex.jl | 2 +- base/strings/search.jl | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/base/regex.jl b/base/regex.jl index 400784e1b27d7..c8d66265e0784 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -167,7 +167,7 @@ abstract type AbstractMatch end """ RegexMatch <: AbstractMatch -A type representing a single match to a `Regex` found in a string. +A type representing a single match to a [`Regex`](@ref) found in a string. Typically created from the [`match`](@ref) function. The `match` field stores the substring of the entire matched string. diff --git a/base/strings/search.jl b/base/strings/search.jl index 1bb4936661c51..1a3085e084ccd 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -709,6 +709,17 @@ The returned function is of type `Base.Fix2{typeof(occursin)}`. !!! compat "Julia 1.6" This method requires Julia 1.6 or later. + +# Examples +```jldoctest +julia> search_f = occursin("JuliaLang is a programming language"); + +julia> search_f("JuliaLang") +true + +julia> search_f("Python") +false +``` """ occursin(haystack) = Base.Fix2(occursin, haystack) From 672f609f97c8fb51ad7e8c7178b57c23972e2374 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 22 May 2023 07:06:39 -0400 Subject: [PATCH 052/290] Two simple examples for Threads (#49704) --- base/threadingconstructs.jl | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index d150fd3ea1af4..714a7054313d0 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -8,6 +8,20 @@ export threadid, nthreads, @threads, @spawn, Get the ID number of the current thread of execution. The master thread has ID `1`. + +# Examples +```julia-repl +julia> Threads.threadid() +1 + +julia> Threads.@threads for i in 1:4 + println(Threads.threadid()) + end +4 +2 +5 +4 +``` """ threadid() = Int(ccall(:jl_threadid, Int16, ())+1) @@ -352,6 +366,17 @@ the variable's value in the current task. !!! compat "Julia 1.9" A threadpool may be specified as of Julia 1.9. + +# Examples +```julia-repl +julia> t() = println("Hello from ", Threads.threadid()); + +julia> tasks = fetch.([Threads.@spawn t() for i in 1:4]); +Hello from 1 +Hello from 1 +Hello from 3 +Hello from 4 +``` """ macro spawn(args...) tp = :default From e02d3ba719a1102e7f481d763a94887dc473b144 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 22 May 2023 11:21:03 -0400 Subject: [PATCH 053/290] jltypes: always run parameter normalization (#49845) This simplifies the types, which may help subtyping other other similar lookup code any time this is later used as a parameter, so it is probably worthwhile to do. This is a followup to #49820, where we reorganized the code to make this more straightforward. --- src/builtins.c | 6 ++-- src/gf.c | 4 +-- src/jltypes.c | 65 ++++++++++++++++++++++++-------------------- src/julia_internal.h | 2 +- src/subtype.c | 6 ++-- 5 files changed, 45 insertions(+), 38 deletions(-) diff --git a/src/builtins.c b/src/builtins.c index a6c904c851c95..b664b8d73710f 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1363,11 +1363,11 @@ JL_CALLABLE(jl_f_apply_type) jl_vararg_t *vm = (jl_vararg_t*)args[0]; if (!vm->T) { JL_NARGS(apply_type, 2, 3); - return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL); + return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL, 1); } else if (!vm->N) { JL_NARGS(apply_type, 2, 2); - return (jl_value_t*)jl_wrap_vararg(vm->T, args[1]); + return (jl_value_t*)jl_wrap_vararg(vm->T, args[1], 1); } } else if (jl_is_unionall(args[0])) { @@ -2060,7 +2060,7 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin("Tuple", (jl_value_t*)jl_anytuple_type); add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type); add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type); - add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL)); + add_builtin("Vararg", (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0)); add_builtin("Module", (jl_value_t*)jl_module_type); add_builtin("MethodTable", (jl_value_t*)jl_methtable_type); diff --git a/src/gf.c b/src/gf.c index 8bfbad4b0f7ca..b49fc32bf4e0b 100644 --- a/src/gf.c +++ b/src/gf.c @@ -735,7 +735,7 @@ static jl_value_t *inst_varargp_in_env(jl_value_t *decl, jl_svec_t *sparams) vm = T_has_tv ? jl_type_unionall(v, T) : T; if (N_has_tv) N = NULL; - vm = (jl_value_t*)jl_wrap_vararg(vm, N); // this cannot throw for these inputs + vm = (jl_value_t*)jl_wrap_vararg(vm, N, 1); // this cannot throw for these inputs } sp++; decl = ((jl_unionall_t*)decl)->body; @@ -984,7 +984,7 @@ static void jl_compilation_sig( // avoid Vararg{Type{Type{...}}} if (jl_is_type_type(type_i) && jl_is_type_type(jl_tparam0(type_i))) type_i = (jl_value_t*)jl_type_type; - type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL); // this cannot throw for these inputs + type_i = (jl_value_t*)jl_wrap_vararg(type_i, (jl_value_t*)NULL, 1); // this cannot throw for these inputs } else { type_i = inst_varargp_in_env(decl, sparams); diff --git a/src/jltypes.c b/src/jltypes.c index 5fc98194775b5..59889c9b5a740 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -847,14 +847,14 @@ JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body) if (T_has_tv) { jl_value_t *wrapped = jl_type_unionall(v, vm->T); JL_GC_PUSH1(&wrapped); - wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N); + wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N, 1); JL_GC_POP(); return wrapped; } else { assert(N_has_tv); assert(vm->N == (jl_value_t*)v); - return (jl_value_t*)jl_wrap_vararg(vm->T, NULL); + return (jl_value_t*)jl_wrap_vararg(vm->T, NULL, 1); } } if (!jl_is_type(body) && !jl_is_typevar(body)) @@ -1889,7 +1889,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value } // if some normalization might be needed, do that now // it is probably okay to mutate iparams, and we only store globally rooted objects here - if (check && cacheable) { + if (check) { size_t i; for (i = 0; i < ntp; i++) { jl_value_t *pi = iparams[i]; @@ -1898,8 +1898,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value if (jl_is_datatype(pi)) continue; if (jl_is_vararg(pi)) - // This would require some special handling, but is not needed - // at the moment (and might be better handled in jl_wrap_vararg instead). + // This is already handled in jl_wrap_vararg instead continue; if (!cacheable && jl_has_free_typevars(pi)) continue; @@ -2327,7 +2326,7 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t N = inst_type_w_(v->N, env, stack, check); } if (T != v->T || N != v->N) { - t = (jl_value_t*)jl_wrap_vararg(T, N); + t = (jl_value_t*)jl_wrap_vararg(T, N, check); } JL_GC_POP(); return t; @@ -2400,36 +2399,44 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t) return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t); } -jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n) +jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check) { - if (n) { - if (jl_is_typevar(n) || jl_is_uniontype(jl_unwrap_unionall(n))) { - // TODO: this is disabled due to #39698; it is also inconsistent - // with other similar checks, where we usually only check substituted - // values and not the bounds of variables. - /* - jl_tvar_t *N = (jl_tvar_t*)n; - if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) - jl_error("TypeVar in Vararg length must have bounds Union{} and Any"); - */ - } - else if (!jl_is_long(n)) { - jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n); - } - else if (jl_unbox_long(n) < 0) { - jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n)); + jl_task_t *ct = jl_current_task; + JL_GC_PUSH1(&t); + if (check) { + if (n) { + if (jl_is_typevar(n) || jl_is_uniontype(jl_unwrap_unionall(n))) { + // TODO: this is disabled due to #39698; it is also inconsistent + // with other similar checks, where we usually only check substituted + // values and not the bounds of variables. + /* + jl_tvar_t *N = (jl_tvar_t*)n; + if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type)) + jl_error("TypeVar in Vararg length must have bounds Union{} and Any"); + */ + } + else if (!jl_is_long(n)) { + jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n); + } + else if (jl_unbox_long(n) < 0) { + jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n)); + } } - } - if (t) { - if (!jl_valid_type_param(t)) { - jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t); + if (t) { + if (!jl_valid_type_param(t)) { + jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t); + } + t = normalize_unionalls(t); + jl_value_t *tw = extract_wrapper(t); + if (tw && t != tw && jl_types_equal(t, tw)) + t = tw; } } - jl_task_t *ct = jl_current_task; jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type); jl_set_typetagof(vm, jl_vararg_tag, 0); vm->T = t; vm->N = n; + JL_GC_POP(); return vm; } @@ -2712,7 +2719,7 @@ void jl_init_types(void) JL_GC_DISABLED // It seems like we probably usually end up needing the box for kinds (often used in an Any context), so force it to exist jl_vararg_type->name->mayinlinealloc = 0; - jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL)); + jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL, 0)); jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params, jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0); jl_tuple_typename = jl_anytuple_type->name; diff --git a/src/julia_internal.h b/src/julia_internal.h index 3b9da93fcf802..a43458f4c5a6e 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -712,7 +712,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_datatype_t *jl_new_uninitialized_datatype(void); void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable); JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t); // x -> Type{x} -jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n); +jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check); void jl_reinstantiate_inner_types(jl_datatype_t *t); jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type); void jl_cache_type_(jl_datatype_t *type); diff --git a/src/subtype.c b/src/subtype.c index 2c11bd733ec9e..078bcf99e574c 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -953,7 +953,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 if (R && ans && e->envidx < e->envsz) { jl_value_t *val; if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type) - val = (jl_value_t*)jl_wrap_vararg(NULL, NULL); // special token result that represents N::Int in the envout + val = (jl_value_t*)jl_wrap_vararg(NULL, NULL, 0); // special token result that represents N::Int in the envout else if (!vb.occurs_inv && vb.lb != jl_bottom_type) val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb); else if (vb.lb == vb.ub) @@ -3092,7 +3092,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t ii = (jl_value_t*)vmy; else { JL_GC_PUSH1(&ii); - ii = (jl_value_t*)jl_wrap_vararg(ii, NULL); + ii = (jl_value_t*)jl_wrap_vararg(ii, NULL, 1); JL_GC_POP(); } return ii; @@ -3133,7 +3133,7 @@ static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, ssize_t else if (yp2 && obviously_egal(yp1, ii) && obviously_egal(yp2, i2)) ii = (jl_value_t*)vmy; else - ii = (jl_value_t*)jl_wrap_vararg(ii, i2); + ii = (jl_value_t*)jl_wrap_vararg(ii, i2, 1); JL_GC_POP(); return ii; } From 944b28c9ec1f1629d0d9116b1dfc5cbc29002249 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Mon, 22 May 2023 16:29:39 -0300 Subject: [PATCH 054/290] show mark-queue on GC critical error (#49902) Re-adds the capability of showing the mark-queue on a GC critical error. --- src/gc.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/gc.c b/src/gc.c index 846ade33b271a..fbfb5f6f2573b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1871,6 +1871,28 @@ STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_pop(jl_gc_markqueue_t *mq) JL_NOTSAFEP return c; } +// Dump mark queue on critical error +JL_NORETURN NOINLINE void gc_dump_queue_and_abort(jl_ptls_t ptls, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + jl_safe_printf("GC error (probable corruption)\n"); + jl_gc_debug_print_status(); + jl_(vt); + jl_gc_debug_critical_error(); + if (jl_n_gcthreads == 0) { + jl_safe_printf("\n"); + jl_value_t *new_obj; + jl_gc_markqueue_t *mq = &ptls->mark_queue; + jl_safe_printf("thread %d ptr queue:\n", ptls->tid); + jl_safe_printf("~~~~~~~~~~ ptr queue top ~~~~~~~~~~\n"); + while ((new_obj = gc_ptr_queue_steal_from(mq)) != NULL) { + jl_(new_obj); + jl_safe_printf("==========\n"); + } + jl_safe_printf("~~~~~~~~~~ ptr queue bottom ~~~~~~~~~~\n"); + } + abort(); +} + // Steal chunk from `mq2` STATIC_INLINE jl_gc_chunk_t gc_chunkqueue_steal_from(jl_gc_markqueue_t *mq2) JL_NOTSAFEPOINT { @@ -2568,6 +2590,11 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_ } return; } + else { + jl_datatype_t *vt = (jl_datatype_t *)vtag; + if (__unlikely(!jl_is_datatype(vt) || vt->smalltag)) + gc_dump_queue_and_abort(ptls, vt); + } jl_datatype_t *vt = (jl_datatype_t *)vtag; if (vt->name == jl_array_typename) { jl_array_t *a = (jl_array_t *)new_obj; From 22551a2fba93c6289be1764d7bd640739a7dd582 Mon Sep 17 00:00:00 2001 From: Stephan Hilb Date: Tue, 23 May 2023 13:15:31 +0200 Subject: [PATCH 055/290] allow specializing `Base.hash` for enum types without overwriting method (#49777) Previously `@enum` defined `Base.hash(::MyEnumType, ::UInt)` on the user-defined enum type `MyEnumType`. When the user wants to specialize the hash function for his own enum type he will define exactly that method signature again which overwrites it and leads to the warning WARNING: Method definition hash(TestPackage.MyEnumType, UInt64) in module TestPackage at Enums.jl:210 overwritten at [...] ** incremental compilation may be fatally broken for this module ** This commit changes `@enum` so that an internal method is used instead which is called through a fallback `Base.hash(::Enum, ::UInt)`. --- base/Enums.jl | 16 ++++++++++++++-- test/enums.jl | 5 +++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/base/Enums.jl b/base/Enums.jl index 027677b432f37..2c18dbca72fcd 100644 --- a/base/Enums.jl +++ b/base/Enums.jl @@ -21,6 +21,14 @@ Base.cconvert(::Type{T}, x::Enum{T2}) where {T<:Integer,T2<:Integer} = T(x)::T Base.write(io::IO, x::Enum{T}) where {T<:Integer} = write(io, T(x)) Base.read(io::IO, ::Type{T}) where {T<:Enum} = T(read(io, basetype(T))) +""" + _enum_hash(x::Enum, h::UInt) + +Compute hash for an enum value `x`. This internal method will be specialized +for every enum type created through [`@enum`](@ref). +""" +_enum_hash(x::Enum, h::UInt) = hash(x, h) +Base.hash(x::Enum, h::UInt) = _enum_hash(x, h) Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y)) Base.Symbol(x::Enum) = namemap(typeof(x))[Integer(x)]::Symbol @@ -206,8 +214,12 @@ macro enum(T::Union{Symbol,Expr}, syms...) Enums.namemap(::Type{$(esc(typename))}) = $(esc(namemap)) Base.typemin(x::Type{$(esc(typename))}) = $(esc(typename))($lo) Base.typemax(x::Type{$(esc(typename))}) = $(esc(typename))($hi) - let enum_hash = hash($(esc(typename))) - Base.hash(x::$(esc(typename)), h::UInt) = hash(enum_hash, hash(Integer(x), h)) + let type_hash = hash($(esc(typename))) + # Use internal `_enum_hash` to allow users to specialize + # `Base.hash` for their own enum types without overwriting the + # method we would define here. This avoids a warning for + # precompilation. + Enums._enum_hash(x::$(esc(typename)), h::UInt) = hash(type_hash, hash(Integer(x), h)) end let insts = (Any[ $(esc(typename))(v) for v in $values ]...,) Base.instances(::Type{$(esc(typename))}) = insts diff --git a/test/enums.jl b/test/enums.jl index c7e3e3bf2abdb..757aa26a061be 100644 --- a/test/enums.jl +++ b/test/enums.jl @@ -179,6 +179,11 @@ end @enum HashEnum2 Enum2_a=1 @test hash(Enum1_a) != hash(Enum2_a) +# PR #49777: Check that `Base.hash` can be specialized by the user without +# overwriting a method definition. +@enum HashEnum3 Enum3_a=1 +@test which(hash, (HashEnum3, UInt)).sig != Tuple{typeof(hash), HashEnum3, UInt64} + @test (Vector{Fruit}(undef, 3) .= apple) == [apple, apple, apple] # long, discongruous From 1143b8fb57f1418f51c398794742226a4380d53a Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Tue, 23 May 2023 09:04:40 -0400 Subject: [PATCH 056/290] improve error message when calling argextype on invalid ir (#49923) --- base/compiler/optimize.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 71eeb15d53eb0..32386691554ff 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -372,7 +372,9 @@ function argextype( elseif x.head === :copyast return argextype(x.args[1], src, sptypes, slottypes) end - @assert false "argextype only works on argument-position values" + Core.println("argextype called on Expr with head ", x.head, + " which is not valid for IR in argument-position.") + @assert false elseif isa(x, SlotNumber) return slottypes[x.id] elseif isa(x, TypedSlot) From c470dc369865cf2f90ad34e25e110b72dd7fbd87 Mon Sep 17 00:00:00 2001 From: Kiran Date: Tue, 23 May 2023 09:26:41 -0400 Subject: [PATCH 057/290] Lock finalizers lists at exit (#49931) --- src/gc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gc.c b/src/gc.c index fbfb5f6f2573b..f3a57fffe09a8 100644 --- a/src/gc.c +++ b/src/gc.c @@ -526,12 +526,17 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + // this is called from `jl_atexit_hook`; threads could still be running + // so we have to guard the finalizers' lists + JL_LOCK_NOGC(&finalizers_lock); schedule_all_finalizers(&finalizer_list_marked); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) schedule_all_finalizers(&ptls2->finalizers); } + // unlock here because `run_finalizers` locks this + JL_UNLOCK_NOGC(&finalizers_lock); gc_n_threads = 0; gc_all_tls_states = NULL; run_finalizers(ct); From bee2c30a7e135b9c9391ab728c18f9fbbd1cc2ab Mon Sep 17 00:00:00 2001 From: Vasily Ilin Date: Wed, 24 May 2023 00:01:37 -0700 Subject: [PATCH 058/290] doc: add rand! and randn! to docstrings of rand and randn (#49808) --- stdlib/Random/src/Random.jl | 2 ++ stdlib/Random/src/normal.jl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index 8da2dd6f3e9c7..30bed9433de5a 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -326,6 +326,8 @@ When only one argument is passed besides the optional `rng` and is a `Tuple`, it as a collection of values (`S`) and not as `dims`. +See also [`randn`](@ref) for normally distributed numbers, and [`rand!`](@ref) and [`randn!`](@ref) for the in-place equivalents. + !!! compat "Julia 1.1" Support for `S` as a tuple requires at least Julia 1.1. diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl index 9d0f1595f052f..c2738653a0438 100644 --- a/stdlib/Random/src/normal.jl +++ b/stdlib/Random/src/normal.jl @@ -20,6 +20,8 @@ The `Base` module currently provides an implementation for the types [`Complex`](@ref) counterparts. When the type argument is complex, the values are drawn from the circularly symmetric complex normal distribution of variance 1 (corresponding to real and imaginary part having independent normal distribution with mean zero and variance `1/2`). +See also [`randn!`](@ref) to act in-place. + # Examples ```jldoctest julia> using Random From 310f59019856749fb85bc56a1e3c2e0592a134ad Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Wed, 24 May 2023 14:30:14 +0200 Subject: [PATCH 059/290] Include `HermOrSym` in character-based `mul!` dispatch (#49865) --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 28 +++ stdlib/LinearAlgebra/src/adjtrans.jl | 7 +- stdlib/LinearAlgebra/src/matmul.jl | 201 ++++++++++++++++------ stdlib/LinearAlgebra/src/symmetric.jl | 87 +--------- 4 files changed, 183 insertions(+), 140 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 5cda4af366814..50d82c497282d 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -457,6 +457,34 @@ const ⋅ = dot const × = cross export ⋅, × +wrapper_char(::AbstractArray) = 'N' +wrapper_char(::Adjoint) = 'C' +wrapper_char(::Adjoint{<:Real}) = 'T' +wrapper_char(::Transpose) = 'T' +wrapper_char(A::Hermitian) = A.uplo == 'U' ? 'H' : 'h' +wrapper_char(A::Hermitian{<:Real}) = A.uplo == 'U' ? 'S' : 's' +wrapper_char(A::Symmetric) = A.uplo == 'U' ? 'S' : 's' + +function wrap(A::AbstractVecOrMat, tA::AbstractChar) + if tA == 'N' + return A + elseif tA == 'T' + return transpose(A) + elseif tA == 'C' + return adjoint(A) + elseif tA == 'H' + return Hermitian(A, :U) + elseif tA == 'h' + return Hermitian(A, :L) + elseif tA == 'S' + return Symmetric(A, :U) + else # tA == 's' + return Symmetric(A, :L) + end +end + +_unwrap(A::AbstractVecOrMat) = A + ## convenience methods ## return only the solution of a least squares problem while avoiding promoting ## vectors to matrices. diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index e12cb9f9f98a9..7e650664906b9 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -94,11 +94,8 @@ inplace_adj_or_trans(::Type{<:AbstractArray}) = copyto! inplace_adj_or_trans(::Type{<:Adjoint}) = adjoint! inplace_adj_or_trans(::Type{<:Transpose}) = transpose! -adj_or_trans_char(::T) where {T<:AbstractArray} = adj_or_trans_char(T) -adj_or_trans_char(::Type{<:AbstractArray}) = 'N' -adj_or_trans_char(::Type{<:Adjoint}) = 'C' -adj_or_trans_char(::Type{<:Adjoint{<:Real}}) = 'T' -adj_or_trans_char(::Type{<:Transpose}) = 'T' +_unwrap(A::Adjoint) = parent(A) +_unwrap(A::Transpose) = parent(A) Base.dataids(A::Union{Adjoint, Transpose}) = Base.dataids(A.parent) Base.unaliascopy(A::Union{Adjoint,Transpose}) = typeof(A)(Base.unaliascopy(A.parent)) diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index 170aacee6682f..e9839857f93e6 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -68,24 +68,24 @@ end @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector, alpha::Number, beta::Number) = - generic_matvecmul!(y, adj_or_trans_char(A), _parent(A), x, MulAddMul(alpha, beta)) + generic_matvecmul!(y, wrapper_char(A), _unwrap(A), x, MulAddMul(alpha, beta)) # BLAS cases # equal eltypes @inline generic_matvecmul!(y::StridedVector{T}, tA, A::StridedVecOrMat{T}, x::StridedVector{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Real (possibly transposed) matrix times complex vector. # Multiply the matrix with the real and imaginary parts separately @inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Complex matrix times real vector. # Reinterpret the matrix as a real matrix and do real matvec computation. # works only in cooperation with BLAS when A is untransposed (tA == 'N') # but that check is included in gemv! anyway @inline generic_matvecmul!(y::StridedVector{Complex{T}}, tA, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} = - gemv!(y, tA, _parent(A), x, _add.alpha, _add.beta) + gemv!(y, tA, A, x, _add.alpha, _add.beta) # Vector-Matrix multiplication (*)(x::AdjointAbsVec, A::AbstractMatrix) = (A'*x')' @@ -267,10 +267,10 @@ julia> C @inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, B::AbstractVecOrMat, α::Number, β::Number) = generic_matmatmul!( C, - adj_or_trans_char(A), - adj_or_trans_char(B), - _parent(A), - _parent(B), + wrapper_char(A), + wrapper_char(B), + _unwrap(A), + _unwrap(B), MulAddMul(α, β) ) @@ -340,25 +340,45 @@ julia> lmul!(F.Q, B) """ lmul!(A, B) +# THE one big BLAS dispatch @inline function generic_matmatmul!(C::StridedMatrix{T}, tA, tB, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}, - _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} - if tA == 'T' && tB == 'N' && A === B - return syrk_wrapper!(C, 'T', A, _add) - elseif tA == 'N' && tB == 'T' && A === B - return syrk_wrapper!(C, 'N', A, _add) - elseif tA == 'C' && tB == 'N' && A === B - return herk_wrapper!(C, 'C', A, _add) - elseif tA == 'N' && tB == 'C' && A === B - return herk_wrapper!(C, 'N', A, _add) - else - return gemm_wrapper!(C, tA, tB, A, B, _add) + _add::MulAddMul=MulAddMul()) where {T<:BlasFloat} + if all(in(('N', 'T', 'C')), (tA, tB)) + if tA == 'T' && tB == 'N' && A === B + return syrk_wrapper!(C, 'T', A, _add) + elseif tA == 'N' && tB == 'T' && A === B + return syrk_wrapper!(C, 'N', A, _add) + elseif tA == 'C' && tB == 'N' && A === B + return herk_wrapper!(C, 'C', A, _add) + elseif tA == 'N' && tB == 'C' && A === B + return herk_wrapper!(C, 'N', A, _add) + else + return gemm_wrapper!(C, tA, tB, A, B, _add) + end + end + alpha, beta = promote(_add.alpha, _add.beta, zero(T)) + if alpha isa Union{Bool,T} && beta isa Union{Bool,T} + if (tA == 'S' || tA == 's') && tB == 'N' + return BLAS.symm!('L', tA == 'S' ? 'U' : 'L', alpha, A, B, beta, C) + elseif (tB == 'S' || tB == 's') && tA == 'N' + return BLAS.symm!('R', tB == 'S' ? 'U' : 'L', alpha, B, A, beta, C) + elseif (tA == 'H' || tA == 'h') && tB == 'N' + return BLAS.hemm!('L', tA == 'H' ? 'U' : 'L', alpha, A, B, beta, C) + elseif (tB == 'H' || tB == 'h') && tA == 'N' + return BLAS.hemm!('R', tB == 'H' ? 'U' : 'L', alpha, B, A, beta, C) + end end + return _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) end # Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency. @inline function generic_matmatmul!(C::StridedVecOrMat{Complex{T}}, tA, tB, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T}, _add::MulAddMul=MulAddMul()) where {T<:BlasReal} - gemm_wrapper!(C, tA, tB, A, B, _add) + if all(in(('N', 'T', 'C')), (tA, tB)) + gemm_wrapper!(C, tA, tB, A, B, _add) + else + _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) + end end @@ -394,8 +414,19 @@ function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x:: alpha, beta = promote(α, β, zero(T)) if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && - !iszero(stride(x, 1)) # We only check input's stride here. - return BLAS.gemv!(tA, alpha, A, x, beta, y) + !iszero(stride(x, 1)) && # We only check input's stride here. + if tA in ('N', 'T', 'C') + return BLAS.gemv!(tA, alpha, A, x, beta, y) + elseif tA in ('S', 's') + return BLAS.symv!(tA == 'S' ? 'U' : 'L', alpha, A, x, beta, y) + elseif tA in ('H', 'h') + return BLAS.hemv!(tA == 'H' ? 'U' : 'L', alpha, A, x, beta, y) + end + end + if tA in ('S', 's', 'H', 'h') + # re-wrap again and use plain ('N') matvec mul algorithm, + # because _generic_matvecmul! can't handle the HermOrSym cases specifically + return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β)) else return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) end @@ -418,7 +449,8 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y)) return y else - return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) + Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA) + return _generic_matvecmul!(y, ta, Anew, x, MulAddMul(α, β)) end end @@ -434,12 +466,16 @@ function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMa alpha, beta = promote(α, β, zero(T)) @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) && - !iszero(stride(x, 1)) + !iszero(stride(x, 1)) && tA in ('N', 'T', 'C') xfl = reinterpret(reshape, T, x) # Use reshape here. yfl = reinterpret(reshape, T, y) BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :]) BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :]) return y + elseif tA in ('S', 's', 'H', 'h') + # re-wrap again and use plain ('N') matvec mul algorithm, + # because _generic_matvecmul! can't handle the HermOrSym cases specifically + return _generic_matvecmul!(y, 'N', wrap(A, tA), x, MulAddMul(α, β)) else return _generic_matvecmul!(y, tA, A, x, MulAddMul(α, β)) end @@ -528,7 +564,11 @@ function gemm_wrapper(tA::AbstractChar, tB::AbstractChar, mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) C = similar(B, T, mA, nB) - gemm_wrapper!(C, tA, tB, A, B) + if all(in(('N', 'T', 'C')), (tA, tB)) + gemm_wrapper!(C, tA, tB, A, B) + else + _generic_matmatmul!(C, 'N', 'N', wrap(A, tA), wrap(B, tB), _add) + end end function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar, @@ -607,7 +647,7 @@ function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::Abs stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 && stride(A, 2) >= size(A, 1) && stride(B, 2) >= size(B, 1) && - stride(C, 2) >= size(C, 1)) && tA == 'N' + stride(C, 2) >= size(C, 1) && tA == 'N') BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C)) return C end @@ -645,13 +685,16 @@ end # NOTE: the generic version is also called as fallback for # strides != 1 cases -generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, - _add::MulAddMul = MulAddMul()) = - _generic_matvecmul!(C, tA, A, B, _add) +@inline function generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, + _add::MulAddMul = MulAddMul()) + Anew, ta = tA in ('S', 's', 'H', 'h') ? (wrap(A, tA), 'N') : (A, tA) + return _generic_matvecmul!(C, ta, Anew, B, _add) +end function _generic_matvecmul!(C::AbstractVector, tA, A::AbstractVecOrMat, B::AbstractVector, _add::MulAddMul = MulAddMul()) require_one_based_indexing(C, A, B) + @assert tA in ('N', 'T', 'C') mB = length(B) mA, nA = lapack_size(tA, A) if mB != nA @@ -725,8 +768,7 @@ end const tilebufsize = 10800 # Approximately 32k/3 -function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix, - _add::MulAddMul=MulAddMul()) +function generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) mC, nC = size(C) @@ -740,15 +782,16 @@ function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::Abs if mA == nA == mB == nB == mC == nC == 3 return matmul3x3!(C, tA, tB, A, B, _add) end + A, tA = tA in ('H', 'h', 'S', 's') ? (wrap(A, tA), 'N') : (A, tA) + B, tB = tB in ('H', 'h', 'S', 's') ? (wrap(B, tB), 'N') : (B, tB) _generic_matmatmul!(C, tA, tB, A, B, _add) end -generic_matmatmul!(C::AbstractVecOrMat, tA, tB, A::AbstractVecOrMat, B::AbstractVecOrMat, _add::MulAddMul) = - _generic_matmatmul!(C, tA, tB, A, B, _add) - function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}, _add::MulAddMul) where {T,S,R} + @assert tA in ('N', 'T', 'C') && tB in ('N', 'T', 'C') require_one_based_indexing(C, A, B) + mA, nA = lapack_size(tA, A) mB, nB = lapack_size(tB, B) if mB != nA @@ -931,7 +974,9 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @inbounds begin - if tA == 'T' + if tA == 'N' + A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2] + elseif tA == 'T' # TODO making these lazy could improve perf A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])) A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])) @@ -939,10 +984,23 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat # TODO making these lazy could improve perf A11 = copy(A[1,1]'); A12 = copy(A[2,1]') A21 = copy(A[1,2]'); A22 = copy(A[2,2]') - else - A11 = A[1,1]; A12 = A[1,2]; A21 = A[2,1]; A22 = A[2,2] + elseif tA == 'S' + A11 = symmetric(A[1,1], :U); A12 = A[1,2] + A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U) + elseif tA == 's' + A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])) + A21 = A[2,1]; A22 = symmetric(A[2,2], :L) + elseif tA == 'H' + A11 = hermitian(A[1,1], :U); A12 = A[1,2] + A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U) + else # if tA == 'h' + A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])) + A21 = A[2,1]; A22 = hermitian(A[2,2], :L) end - if tB == 'T' + if tB == 'N' + B11 = B[1,1]; B12 = B[1,2]; + B21 = B[2,1]; B22 = B[2,2] + elseif tB == 'T' # TODO making these lazy could improve perf B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])) B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])) @@ -950,9 +1008,18 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat # TODO making these lazy could improve perf B11 = copy(B[1,1]'); B12 = copy(B[2,1]') B21 = copy(B[1,2]'); B22 = copy(B[2,2]') - else - B11 = B[1,1]; B12 = B[1,2]; - B21 = B[2,1]; B22 = B[2,2] + elseif tA == 'S' + B11 = symmetric(A[1,1], :U); B12 = A[1,2] + B21 = copy(transpose(A[1,2])); B22 = symmetric(A[2,2], :U) + elseif tA == 's' + B11 = symmetric(A[1,1], :L); B12 = copy(transpose(A[2,1])) + B21 = A[2,1]; B22 = symmetric(A[2,2], :L) + elseif tA == 'H' + B11 = hermitian(A[1,1], :U); B12 = A[1,2] + B21 = copy(adjoint(A[1,2])); B22 = hermitian(A[2,2], :U) + else # if tA == 'h' + B11 = hermitian(A[1,1], :L); B12 = copy(adjoint(A[2,1])) + B21 = A[2,1]; B22 = hermitian(A[2,2], :L) end _modify!(_add, A11*B11 + A12*B21, C, (1,1)) _modify!(_add, A11*B12 + A12*B22, C, (1,2)) @@ -974,7 +1041,11 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))")) end @inbounds begin - if tA == 'T' + if tA == 'N' + A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3] + A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3] + A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3] + elseif tA == 'T' # TODO making these lazy could improve perf A11 = copy(transpose(A[1,1])); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1])) A21 = copy(transpose(A[1,2])); A22 = copy(transpose(A[2,2])); A23 = copy(transpose(A[3,2])) @@ -984,13 +1055,29 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat A11 = copy(A[1,1]'); A12 = copy(A[2,1]'); A13 = copy(A[3,1]') A21 = copy(A[1,2]'); A22 = copy(A[2,2]'); A23 = copy(A[3,2]') A31 = copy(A[1,3]'); A32 = copy(A[2,3]'); A33 = copy(A[3,3]') - else - A11 = A[1,1]; A12 = A[1,2]; A13 = A[1,3] - A21 = A[2,1]; A22 = A[2,2]; A23 = A[2,3] - A31 = A[3,1]; A32 = A[3,2]; A33 = A[3,3] + elseif tA == 'S' + A11 = symmetric(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3] + A21 = copy(transpose(A[1,2])); A22 = symmetric(A[2,2], :U); A23 = A[2,3] + A31 = copy(transpose(A[1,3])); A32 = copy(transpose(A[2,3])); A33 = symmetric(A[3,3], :U) + elseif tA == 's' + A11 = symmetric(A[1,1], :L); A12 = copy(transpose(A[2,1])); A13 = copy(transpose(A[3,1])) + A21 = A[2,1]; A22 = symmetric(A[2,2], :L); A23 = copy(transpose(A[3,2])) + A31 = A[3,1]; A32 = A[3,2]; A33 = symmetric(A[3,3], :L) + elseif tA == 'H' + A11 = hermitian(A[1,1], :U); A12 = A[1,2]; A13 = A[1,3] + A21 = copy(adjoint(A[1,2])); A22 = hermitian(A[2,2], :U); A23 = A[2,3] + A31 = copy(adjoint(A[1,3])); A32 = copy(adjoint(A[2,3])); A33 = hermitian(A[3,3], :U) + else # if tA == 'h' + A11 = hermitian(A[1,1], :L); A12 = copy(adjoint(A[2,1])); A13 = copy(adjoint(A[3,1])) + A21 = A[2,1]; A22 = hermitian(A[2,2], :L); A23 = copy(adjoint(A[3,2])) + A31 = A[3,1]; A32 = A[3,2]; A33 = hermitian(A[3,3], :L) end - if tB == 'T' + if tB == 'N' + B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3] + B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3] + B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3] + elseif tB == 'T' # TODO making these lazy could improve perf B11 = copy(transpose(B[1,1])); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1])) B21 = copy(transpose(B[1,2])); B22 = copy(transpose(B[2,2])); B23 = copy(transpose(B[3,2])) @@ -1000,10 +1087,22 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat B11 = copy(B[1,1]'); B12 = copy(B[2,1]'); B13 = copy(B[3,1]') B21 = copy(B[1,2]'); B22 = copy(B[2,2]'); B23 = copy(B[3,2]') B31 = copy(B[1,3]'); B32 = copy(B[2,3]'); B33 = copy(B[3,3]') - else - B11 = B[1,1]; B12 = B[1,2]; B13 = B[1,3] - B21 = B[2,1]; B22 = B[2,2]; B23 = B[2,3] - B31 = B[3,1]; B32 = B[3,2]; B33 = B[3,3] + elseif tB == 'S' + B11 = symmetric(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3] + B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U); B23 = B[2,3] + B31 = copy(transpose(B[1,3])); B32 = copy(transpose(B[2,3])); B33 = symmetric(B[3,3], :U) + elseif tB == 's' + B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])); B13 = copy(transpose(B[3,1])) + B21 = B[2,1]; B22 = symmetric(B[2,2], :L); B23 = copy(transpose(B[3,2])) + B31 = B[3,1]; B32 = B[3,2]; B33 = symmetric(B[3,3], :L) + elseif tB == 'H' + B11 = hermitian(B[1,1], :U); B12 = B[1,2]; B13 = B[1,3] + B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U); B23 = B[2,3] + B31 = copy(adjoint(B[1,3])); B32 = copy(adjoint(B[2,3])); B33 = hermitian(B[3,3], :U) + else # if tB == 'h' + B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])); B13 = copy(adjoint(B[3,1])) + B21 = B[2,1]; B22 = hermitian(B[2,2], :L); B23 = copy(adjoint(B[3,2])) + B31 = B[3,1]; B32 = B[3,2]; B33 = hermitian(B[3,3], :L) end _modify!(_add, A11*B11 + A12*B21 + A13*B31, C, (1,1)) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index ee25a4b31db3a..dc553a7492778 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -185,6 +185,9 @@ function hermitian_type(::Type{T}) where {S<:AbstractMatrix, T<:AbstractMatrix{S end hermitian_type(::Type{T}) where {T<:Number} = T +_unwrap(A::Hermitian) = parent(A) +_unwrap(A::Symmetric) = parent(A) + for (S, H) in ((:Symmetric, :Hermitian), (:Hermitian, :Symmetric)) @eval begin $S(A::$S) = A @@ -521,90 +524,6 @@ for f in (:+, :-) end end -## Matvec -@inline function mul!(y::StridedVector{T}, A::Symmetric{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -@inline function mul!(y::StridedVector{T}, A::Hermitian{T,<:StridedMatrix}, x::StridedVector{T}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemv!(A.uplo, alpha, A.data, x, beta, y) - else - return generic_matvecmul!(y, 'N', A, x, MulAddMul(α, β)) - end -end -## Matmat -@inline function mul!(C::StridedMatrix{T}, A::Symmetric{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Symmetric{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasFloat} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasReal} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.symm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::Hermitian{T,<:StridedMatrix}, B::StridedMatrix{T}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemm!('L', A.uplo, alpha, A.data, B, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end -@inline function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::Hermitian{T,<:StridedMatrix}, - α::Number, β::Number) where {T<:BlasComplex} - alpha, beta = promote(α, β, zero(T)) - if alpha isa Union{Bool,T} && beta isa Union{Bool,T} - return BLAS.hemm!('R', B.uplo, alpha, B.data, A, beta, C) - else - return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta)) - end -end - *(A::HermOrSym, B::HermOrSym) = A * copyto!(similar(parent(B)), B) function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector) From aea56a9d9547cff43c3bcfb3dac0fff91bd53793 Mon Sep 17 00:00:00 2001 From: Kiran Date: Wed, 24 May 2023 11:01:20 -0400 Subject: [PATCH 060/290] Ensure Distributed workers inherit threads spec properly (#49942) --- stdlib/Distributed/src/cluster.jl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl index 3fd3d63108297..6dc6bd086df16 100644 --- a/stdlib/Distributed/src/cluster.jl +++ b/stdlib/Distributed/src/cluster.jl @@ -1317,6 +1317,20 @@ end write_cookie(io::IO) = print(io.in, string(cluster_cookie(), "\n")) +function get_threads_spec(opts) + if opts.nthreads > 0 + @assert opts.nthreadpools >= 1 + @assert opts.nthreads_per_pool != C_NULL + thr = "$(unsafe_load(opts.nthreads_per_pool))" + if opts.nthreadpools == 2 + thr = "$(thr),$(unsafe_load(opts.nthreads_per_pool, 2))" + end + `--threads=$(thr)` + else + `` + end +end + # Starts workers specified by (-n|--procs) and --machine-file command line options function process_opts(opts) # startup worker. @@ -1331,7 +1345,7 @@ function process_opts(opts) end # Propagate --threads to workers - threads = opts.nthreads > 0 ? `--threads=$(opts.nthreads)` : `` + threads = get_threads_spec(opts) gcthreads = opts.ngcthreads > 0 ? `--gcthreads=$(opts.ngcthreads)` : `` exeflags = `$threads $gcthreads` From e370e4ea4743e8858ded169f78eaa2adc7517c07 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Wed, 24 May 2023 14:26:14 -0400 Subject: [PATCH 061/290] Slightly re-factor timing counts back-end This should make it clearer what the responsibilities for each function are, and fix a bug where conditionally-triggered events could end up with an invalid total time. --- src/timing.c | 4 ++-- src/timing.h | 41 +++++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/timing.c b/src/timing.c index d120432f2f56f..878e4e0aac76c 100644 --- a/src/timing.c +++ b/src/timing.c @@ -156,7 +156,7 @@ void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t ptls->timing_stack = prev_blk; if (prev_blk != NULL) { - _COUNTS_START(&prev_blk->counts_ctx, cycleclock()); + _COUNTS_RESUME(&prev_blk->counts_ctx, cycleclock()); } } @@ -189,7 +189,7 @@ jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls) ptls->timing_stack = NULL; if (blk != NULL) { - _COUNTS_STOP(&blk->counts_ctx, cycleclock()); + _COUNTS_PAUSE(&blk->counts_ctx, cycleclock()); } return blk; } diff --git a/src/timing.h b/src/timing.h index cbf32cf643526..a3ae283e98b3d 100644 --- a/src/timing.h +++ b/src/timing.h @@ -233,16 +233,16 @@ enum jl_timing_counter_types { #ifdef USE_TIMING_COUNTS #define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_CTOR(block) _jl_timing_counts_ctor(block) -#define _COUNTS_DESTROY(block, event, t) _jl_timing_counts_destroy(block, event, t) #define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) -#define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) +#define _COUNTS_STOP(block, event, t) _jl_timing_counts_stop(block, event, t) +#define _COUNTS_PAUSE(block, t) _jl_timing_counts_pause(block, t) +#define _COUNTS_RESUME(block, t) _jl_timing_counts_resume(block, t) #else #define _COUNTS_CTX_MEMBER -#define _COUNTS_CTOR(block) -#define _COUNTS_DESTROY(block, event, t) #define _COUNTS_START(block, t) -#define _COUNTS_STOP(block, t) +#define _COUNTS_STOP(block, event, t) +#define _COUNTS_PAUSE(block, t) +#define _COUNTS_RESUME(block, t) #endif /** @@ -297,7 +297,7 @@ typedef struct _jl_timing_counts_t { #endif } jl_timing_counts_t; -STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_pause(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { #ifdef JL_DEBUG_BUILD assert(block->running); block->running = 0; @@ -305,7 +305,7 @@ STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, uint64_t t) block->total += t - block->start; } -STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_resume(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { #ifdef JL_DEBUG_BUILD assert(!block->running); block->running = 1; @@ -313,14 +313,21 @@ STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t block->start = t; } -STATIC_INLINE void _jl_timing_counts_ctor(jl_timing_counts_t *block) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { block->total = 0; + block->start = t; + block->t0 = t; #ifdef JL_DEBUG_BUILD - block->running = 0; + block->running = 1; #endif } -STATIC_INLINE void _jl_timing_counts_destroy(jl_timing_counts_t *block, int event, uint64_t t) JL_NOTSAFEPOINT { +STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, int event, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(block->running); + block->running = 0; +#endif + block->total += t - block->start; jl_atomic_fetch_add_relaxed(jl_timing_self_counts + event, block->total); jl_atomic_fetch_add_relaxed(jl_timing_full_counts + event, t - block->t0); } @@ -353,9 +360,6 @@ STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { uint64_t t = cycleclock(); (void)t; _COUNTS_START(&block->counts_ctx, t); -#ifdef USE_TIMING_COUNTS - block->counts_ctx.t0 = t; -#endif _ITTAPI_START(block); _TRACY_START(block); @@ -363,7 +367,7 @@ STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { block->prev = *prevp; block->is_running = 1; if (block->prev) { - _COUNTS_STOP(&block->prev->counts_ctx, t); + _COUNTS_PAUSE(&block->prev->counts_ctx, t); } *prevp = block; } @@ -372,26 +376,23 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int subsystem block->subsystem = subsystem; block->event = event; block->is_running = 0; - _COUNTS_CTOR(&block->counts_ctx); } STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { if (block->is_running) { uint64_t t = cycleclock(); (void)t; _ITTAPI_STOP(block); - _COUNTS_STOP(&block->counts_ctx, t); _TRACY_STOP(block->tracy_ctx); + _COUNTS_STOP(&block->counts_ctx, block->event, cycleclock()); jl_task_t *ct = jl_current_task; jl_timing_block_t **pcur = &ct->ptls->timing_stack; assert(*pcur == block); *pcur = block->prev; if (block->prev) { - _COUNTS_START(&block->prev->counts_ctx, t); + _COUNTS_RESUME(&block->prev->counts_ctx, t); } } - - _COUNTS_DESTROY(&block->counts_ctx, block->event, cycleclock()); } typedef struct _jl_timing_suspend_t { From 01ddf80f18fc618e20df307945a9c19e74005270 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 25 May 2023 09:52:51 -0400 Subject: [PATCH 062/290] lowering: preserve line numbers over julia-expand-macroscope pass (#44995) This is to preserve the line number of the macro caller in the output, in case we don't have context from eval on where it occured. But we make slightly more changes than strictly necessary to prepare for future improvements in this area. --- base/boot.jl | 3 +- base/docs/Docs.jl | 32 ++++++++++---- base/osutils.jl | 2 +- base/threadcall.jl | 2 +- base/util.jl | 2 +- src/ast.c | 87 +++++++++++++++++++++++++++++++++----- src/ast.scm | 9 ++-- src/jlfrontend.scm | 28 ++++++++++-- src/julia-syntax.scm | 6 +-- src/macroexpand.scm | 11 +++-- stdlib/Test/src/Test.jl | 39 ++++++++--------- test/compiler/inference.jl | 2 +- test/deprecation_exec.jl | 2 - test/docs.jl | 2 +- test/errorshow.jl | 2 +- test/goto.jl | 2 +- test/syntax.jl | 49 ++++++++++++--------- 17 files changed, 195 insertions(+), 85 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 43ced22c043d5..ec25fa2bc0b6d 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -533,11 +533,10 @@ import Core: CodeInfo, MethodInstance, CodeInstance, GotoNode, GotoIfNot, Return end # module IR # docsystem basics -const unescape = Symbol("hygienic-scope") macro doc(x...) docex = atdoc(__source__, __module__, x...) isa(docex, Expr) && docex.head === :escape && return docex - return Expr(:escape, Expr(unescape, docex, typeof(atdoc).name.module)) + return Expr(:escape, Expr(:var"hygienic-scope", docex, typeof(atdoc).name.module, __source__)) end macro __doc__(x) return Expr(:escape, Expr(:block, Expr(:meta, :doc), x)) diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl index e0d21715c2147..e0733280e7c7d 100644 --- a/base/docs/Docs.jl +++ b/base/docs/Docs.jl @@ -286,12 +286,26 @@ catdoc(xs...) = vcat(xs...) const keywords = Dict{Symbol, DocStr}() function unblock(@nospecialize ex) + while isexpr(ex, :var"hygienic-scope") + isexpr(ex.args[1], :escape) || break + ex = ex.args[1].args[1] + end isexpr(ex, :block) || return ex exs = filter(ex -> !(isa(ex, LineNumberNode) || isexpr(ex, :line)), ex.args) length(exs) == 1 || return ex return unblock(exs[1]) end +# peek through ex to figure out what kind of expression it may eventually act like +# but ignoring scopes and line numbers +function unescape(@nospecialize ex) + ex = unblock(ex) + while isexpr(ex, :escape) || isexpr(ex, :var"hygienic-scope") + ex = unblock(ex.args[1]) + end + return ex +end + uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef} @@ -351,18 +365,19 @@ function metadata(__source__, __module__, expr, ismodule) fields = P[] last_docstr = nothing for each in (expr.args[3]::Expr).args - if isa(each, Symbol) || isexpr(each, :(::)) + eachex = unescape(each) + if isa(eachex, Symbol) || isexpr(eachex, :(::)) # a field declaration if last_docstr !== nothing - push!(fields, P(namify(each::Union{Symbol,Expr}), last_docstr)) + push!(fields, P(namify(eachex::Union{Symbol,Expr}), last_docstr)) last_docstr = nothing end - elseif isexpr(each, :function) || isexpr(each, :(=)) + elseif isexpr(eachex, :function) || isexpr(eachex, :(=)) break - elseif isa(each, String) || isexpr(each, :string) || isexpr(each, :call) || - (isexpr(each, :macrocall) && each.args[1] === Symbol("@doc_str")) + elseif isa(eachex, String) || isexpr(eachex, :string) || isexpr(eachex, :call) || + (isexpr(eachex, :macrocall) && eachex.args[1] === Symbol("@doc_str")) # forms that might be doc strings - last_docstr = each::Union{String,Expr} + last_docstr = each end end dict = :($(Dict{Symbol,Any})($([(:($(P)($(quot(f)), $d)))::Expr for (f, d) in fields]...))) @@ -627,8 +642,9 @@ function loaddocs(docs::Vector{Core.SimpleVector}) for (mod, ex, str, file, line) in docs data = Dict{Symbol,Any}(:path => string(file), :linenumber => line) doc = docstr(str, data) - docstring = docm(LineNumberNode(line, file), mod, doc, ex, false) # expand the real @doc macro now - Core.eval(mod, Expr(Core.unescape, docstring, Docs)) + lno = LineNumberNode(line, file) + docstring = docm(lno, mod, doc, ex, false) # expand the real @doc macro now + Core.eval(mod, Expr(:var"hygienic-scope", docstring, Docs, lno)) end empty!(docs) nothing diff --git a/base/osutils.jl b/base/osutils.jl index 1f5a708d30c7a..95d0562540e5a 100644 --- a/base/osutils.jl +++ b/base/osutils.jl @@ -16,7 +16,7 @@ macro static(ex) @label loop hd = ex.head if hd ∈ (:if, :elseif, :&&, :||) - cond = Core.eval(__module__, ex.args[1]) + cond = Core.eval(__module__, ex.args[1])::Bool if xor(cond, hd === :||) return esc(ex.args[2]) elseif length(ex.args) == 3 diff --git a/base/threadcall.jl b/base/threadcall.jl index 45965fdbc6c65..7548c5063671f 100644 --- a/base/threadcall.jl +++ b/base/threadcall.jl @@ -47,7 +47,7 @@ macro threadcall(f, rettype, argtypes, argvals...) push!(body, :(return Int(Core.sizeof($rettype)))) # return code to generate wrapper function and send work request thread queue - wrapper = Expr(Symbol("hygienic-scope"), wrapper, @__MODULE__) + wrapper = Expr(:var"hygienic-scope", wrapper, @__MODULE__, __source__) return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) # use cglobal to look up the function on the calling thread do_threadcall(fun_ptr, cglobal($f), $rettype, Any[$(argtypes...)], Any[$(argvals...)]) diff --git a/base/util.jl b/base/util.jl index 6f424f80d13b6..ec99bc6f40c4f 100644 --- a/base/util.jl +++ b/base/util.jl @@ -604,7 +604,7 @@ macro kwdef(expr) kwdefs = nothing end return quote - Base.@__doc__ $(esc(expr)) + $(esc(:($Base.@__doc__ $expr))) $kwdefs end end diff --git a/src/ast.c b/src/ast.c index b1c69db2f0bc9..bd1ffee5b76b1 100644 --- a/src/ast.c +++ b/src/ast.c @@ -436,6 +436,8 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo } JL_CATCH { // if expression cannot be converted, replace with error expr + //jl_(jl_current_exception()); + //jlbacktrace(); jl_expr_t *ex = jl_exprn(jl_error_sym, 1); v = (jl_value_t*)ex; jl_array_ptr_set(ex->args, 0, jl_cstr_to_string("invalid AST")); @@ -1000,7 +1002,59 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT return 0; } -static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error) +// Utility function to return whether `e` is any of the special AST types or +// will always evaluate to itself exactly unchanged. This corresponds to +// `is_self_quoting` in Core.Compiler utilities. +int jl_is_ast_node(jl_value_t *e) JL_NOTSAFEPOINT +{ + return jl_is_newvarnode(e) + || jl_is_code_info(e) + || jl_is_linenode(e) + || jl_is_gotonode(e) + || jl_is_gotoifnot(e) + || jl_is_returnnode(e) + || jl_is_ssavalue(e) + || jl_is_slotnumber(e) + || jl_is_argument(e) + || jl_is_quotenode(e) + || jl_is_globalref(e) + || jl_is_symbol(e) + || jl_is_pinode(e) + || jl_is_phinode(e) + || jl_is_phicnode(e) + || jl_is_upsilonnode(e) + || jl_is_expr(e); +} + +static int is_self_quoting_expr(jl_expr_t *e) JL_NOTSAFEPOINT +{ + return (e->head == jl_inert_sym || + e->head == jl_core_sym || + e->head == jl_line_sym || + e->head == jl_lineinfo_sym || + e->head == jl_meta_sym || + e->head == jl_boundscheck_sym || + e->head == jl_inline_sym || + e->head == jl_noinline_sym); +} + +// any AST, except those that cannot contain symbols +// and have no side effects +int need_esc_node(jl_value_t *e) JL_NOTSAFEPOINT +{ + if (jl_is_linenode(e) + || jl_is_ssavalue(e) + || jl_is_slotnumber(e) + || jl_is_argument(e) + || jl_is_quotenode(e)) + return 0; + if (jl_is_expr(e)) + return !is_self_quoting_expr((jl_expr_t*)e); + // note: jl_is_globalref(e) is not included here, since we care a little about about having a line number for it + return jl_is_ast_node(e); +} + +static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, jl_value_t **lineinfo, size_t world, int throw_load_error) { jl_task_t *ct = jl_current_task; JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION); @@ -1012,10 +1066,9 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule margs[0] = jl_array_ptr_ref(args, 0); // __source__ argument jl_value_t *lno = jl_array_ptr_ref(args, 1); + if (!jl_is_linenode(lno)) + lno = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing); margs[1] = lno; - if (!jl_is_linenode(lno)) { - margs[1] = jl_new_struct(jl_linenumbernode_type, jl_box_long(0), jl_nothing); - } margs[2] = (jl_value_t*)inmodule; for (i = 3; i < nargs; i++) margs[i] = jl_array_ptr_ref(args, i - 1); @@ -1054,6 +1107,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule } } ct->world_age = last_age; + *lineinfo = margs[1]; JL_GC_POP(); return result; } @@ -1076,14 +1130,18 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str JL_GC_POP(); return expr; } - if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) == 2) { + if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) >= 2) { struct macroctx_stack newctx; newctx.m = (jl_module_t*)jl_exprarg(e, 1); JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m); newctx.parent = macroctx; jl_value_t *a = jl_exprarg(e, 0); jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world, throw_load_error); - if (a != a2) + if (jl_is_expr(a2) && ((jl_expr_t*)a2)->head == jl_escape_sym && !need_esc_node(jl_exprarg(a2, 0))) + expr = jl_exprarg(a2, 0); + else if (!need_esc_node(a2)) + expr = a2; + else if (a != a2) jl_array_ptr_set(e->args, 0, a2); return expr; } @@ -1091,21 +1149,28 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str struct macroctx_stack newctx; newctx.m = macroctx ? macroctx->m : inmodule; newctx.parent = macroctx; - jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world, throw_load_error); + jl_value_t *lineinfo = NULL; + jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, &lineinfo, world, throw_load_error); + if (!need_esc_node(result)) + return result; jl_value_t *wrap = NULL; - JL_GC_PUSH3(&result, &wrap, &newctx.m); + JL_GC_PUSH4(&result, &wrap, &newctx.m, &lineinfo); // copy and wrap the result in `(hygienic-scope ,result ,newctx) if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym) result = jl_exprarg(result, 0); else - wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 2); + wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 3); result = jl_copy_ast(result); if (!onelevel) result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error); - if (wrap) { + if (wrap && need_esc_node(result)) { jl_exprargset(wrap, 0, result); jl_exprargset(wrap, 1, newctx.m); - result = wrap; + jl_exprargset(wrap, 2, lineinfo); + if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym) + result = jl_exprarg(result, 0); + else + result = wrap; } JL_GC_POP(); return result; diff --git a/src/ast.scm b/src/ast.scm index 88220c03a7aa6..87db8449b3992 100644 --- a/src/ast.scm +++ b/src/ast.scm @@ -479,12 +479,13 @@ (define (eq-sym? a b) (or (eq? a b) (and (ssavalue? a) (ssavalue? b) (eqv? (cdr a) (cdr b))))) -(define (blockify e) +(define (blockify e (lno #f)) + (set! lno (if lno (list lno) '())) (if (and (pair? e) (eq? (car e) 'block)) (if (null? (cdr e)) - `(block (null)) - e) - `(block ,e))) + `(block ,@lno (null)) + (if (null? lno) e `(block ,@lno ,@(cdr e)))) + `(block ,@lno ,e))) (define (make-var-info name) (list name '(core Any) 0)) (define vinfo:name car) diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index aefac6d102aea..d376bc27085ab 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -93,18 +93,38 @@ ;; lowering entry points +; find the first line number in this expression, before we might eliminate them +(define (first-lineno blk) + (cond ((not (pair? blk)) #f) + ((eq? (car blk) 'line) blk) + ((and (eq? (car blk) 'hygienic-scope) (pair? (cdddr blk)) (pair? (cadddr blk)) (eq? (car (cadddr blk)) 'line)) + (cadddr blk)) + ((memq (car blk) '(escape hygienic-scope)) + (first-lineno (cadr blk))) + ((memq (car blk) '(toplevel block)) + (let loop ((xs (cdr blk))) + (and (pair? xs) + (let ((elt (first-lineno (car xs)))) + (or elt (loop (cdr xs))))))) + (else #f))) + ;; return a lambda expression representing a thunk for a top-level expression ;; note: expansion of stuff inside module is delayed, so the contents obey ;; toplevel expansion order (don't expand until stuff before is evaluated). (define (expand-toplevel-expr-- e file line) - (let ((ex0 (julia-expand-macroscope e))) + (let ((lno (first-lineno e)) + (ex0 (julia-expand-macroscope e))) + (if (and lno (or (not (length= lno 3)) (not (atom? (caddr lno))))) (set! lno #f)) (if (toplevel-only-expr? ex0) - ex0 - (let* ((ex (julia-expand0 ex0 file line)) + (if (and (pair? e) (memq (car ex0) '(error incomplete))) + ex0 + (if lno `(toplevel ,lno ,ex0) ex0)) + (let* ((linenode (if (and lno (or (= line 0) (eq? file 'none))) lno `(line ,line ,file))) + (ex (julia-expand0 ex0 linenode)) (th (julia-expand1 `(lambda () () (scope-block - ,(blockify ex))) + ,(blockify ex lno))) file line))) (if (and (null? (cdadr (caddr th))) (and (length= (lam:body th) 2) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index cac8c7b5228b9..df4e791e1fa10 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -5090,8 +5090,8 @@ f(x) = yt(x) (define *current-desugar-loc* #f) -(define (julia-expand0 ex file line) - (with-bindings ((*current-desugar-loc* `(line ,line ,file))) +(define (julia-expand0 ex lno) + (with-bindings ((*current-desugar-loc* lno)) (trycatch (expand-forms ex) (lambda (e) (if (and (pair? e) (eq? (car e) 'error)) @@ -5106,4 +5106,4 @@ f(x) = yt(x) (define (julia-expand ex (file 'none) (line 0)) (julia-expand1 (julia-expand0 - (julia-expand-macroscope ex) file line) file line)) + (julia-expand-macroscope ex) `(line ,line ,file)) file line)) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 2933ca4888c4e..14d1fe1c5ab94 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -448,7 +448,8 @@ ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly (let ((parent-scope (cons (list env m) parent-scope)) (body (cadr e)) - (m (caddr e))) + (m (caddr e)) + (lno (cdddr e))) (resolve-expansion-vars-with-new-env body env m parent-scope inarg #t))) ((tuple) (cons (car e) @@ -574,7 +575,8 @@ ((eq? (car e) 'module) e) ((eq? (car e) 'hygienic-scope) (let ((form (cadr e)) ;; form is the expression returned from expand-macros - (modu (caddr e))) ;; m is the macro's def module + (modu (caddr e)) ;; m is the macro's def module + (lno (cdddr e))) ;; lno is (optionally) the line number node (resolve-expansion-vars form modu))) (else (map julia-expand-macroscopes- e)))) @@ -585,8 +587,9 @@ ((eq? (car e) 'hygienic-scope) (let ((parent-scope (list relabels parent-scope)) (body (cadr e)) - (m (caddr e))) - `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m))) + (m (caddr e)) + (lno (cdddr e))) + `(hygienic-scope ,(rename-symbolic-labels- (cadr e) (table) parent-scope) ,m ,@lno))) ((and (eq? (car e) 'escape) (not (null? parent-scope))) `(escape ,(apply rename-symbolic-labels- (cadr e) parent-scope))) ((or (eq? (car e) 'symbolicgoto) (eq? (car e) 'symboliclabel)) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 392b736c09837..11bb6229ec0a1 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -27,7 +27,7 @@ export TestLogger, LogRecord using Random using Random: AbstractRNG, default_rng using InteractiveUtils: gen_call_with_extracted_types -using Base: typesplit +using Base: typesplit, remove_linenums! using Serialization: Serialization const DISPLAY_FAILED = ( @@ -500,19 +500,20 @@ macro test(ex, kws...) # Build the test expression test_expr!("@test", ex, kws...) - orig_ex = Expr(:inert, ex) result = get_test_result(ex, __source__) - return quote + ex = Expr(:inert, ex) + result = quote if $(length(skip) > 0 && esc(skip[1])) - record(get_testset(), Broken(:skipped, $orig_ex)) + record(get_testset(), Broken(:skipped, $ex)) else let _do = $(length(broken) > 0 && esc(broken[1])) ? do_broken_test : do_test - _do($result, $orig_ex) + _do($result, $ex) end end end + return result end """ @@ -540,10 +541,10 @@ Test Broken """ macro test_broken(ex, kws...) test_expr!("@test_broken", ex, kws...) - orig_ex = Expr(:inert, ex) result = get_test_result(ex, __source__) # code to call do_test with execution result and original expr - :(do_broken_test($result, $orig_ex)) + ex = Expr(:inert, ex) + return :(do_broken_test($result, $ex)) end """ @@ -570,9 +571,9 @@ Test Broken """ macro test_skip(ex, kws...) test_expr!("@test_skip", ex, kws...) - orig_ex = Expr(:inert, ex) - testres = :(Broken(:skipped, $orig_ex)) - :(record(get_testset(), $testres)) + ex = Expr(:inert, ex) + testres = :(Broken(:skipped, $ex)) + return :(record(get_testset(), $testres)) end # An internal function, called by the code generated by the @test @@ -660,7 +661,8 @@ function get_test_result(ex, source) $negate, )) else - testret = :(Returned($(esc(orig_ex)), nothing, $(QuoteNode(source)))) + ex = Expr(:block, source, esc(orig_ex)) + testret = :(Returned($ex, nothing, $(QuoteNode(source)))) end result = quote try @@ -670,7 +672,6 @@ function get_test_result(ex, source) Threw(_e, Base.current_exceptions(), $(QuoteNode(source))) end end - Base.remove_linenums!(result) result end @@ -759,9 +760,10 @@ In the final example, instead of matching a single string it could alternatively """ macro test_throws(extype, ex) orig_ex = Expr(:inert, ex) + ex = Expr(:block, __source__, esc(ex)) result = quote try - Returned($(esc(ex)), nothing, $(QuoteNode(__source__))) + Returned($ex, nothing, $(QuoteNode(__source__))) catch _e if $(esc(extype)) != InterruptException && _e isa InterruptException rethrow() @@ -769,8 +771,7 @@ macro test_throws(extype, ex) Threw(_e, nothing, $(QuoteNode(__source__))) end end - Base.remove_linenums!(result) - :(do_test_throws($result, $orig_ex, $(esc(extype)))) + return :(do_test_throws($result, $orig_ex, $(esc(extype)))) end const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand")) @@ -1828,10 +1829,9 @@ function _inferred(ex, mod, allow = :(Union{})) ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted), farg, ex.args[2:end]...) end - Base.remove_linenums!(let ex = ex; + result = let ex = ex quote - let - allow = $(esc(allow)) + let allow = $(esc(allow)) allow isa Type || throw(ArgumentError("@inferred requires a type as second argument")) $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args) # Has keywords @@ -1855,7 +1855,8 @@ function _inferred(ex, mod, allow = :(Union{})) result end end - end) + end + return remove_linenums!(result) end function is_in_mods(m::Module, recursive::Bool, mods) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 21b5b66142fbd..da5772744607d 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -3804,7 +3804,7 @@ end end end end - @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 5)", string(timingmod.children)) + @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children)) # END LINE NUMBER SENSITIVITY # Recursive function diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl index 5b465e05f0a12..61ffcc2a59ac6 100644 --- a/test/deprecation_exec.jl +++ b/test/deprecation_exec.jl @@ -8,8 +8,6 @@ using Test using Logging -using Base: remove_linenums! - module DeprecationTests # to test @deprecate f() = true diff --git a/test/docs.jl b/test/docs.jl index 6707278c53847..7f6ece4e76ab4 100644 --- a/test/docs.jl +++ b/test/docs.jl @@ -642,7 +642,7 @@ macro m1_11993() end macro m2_11993() - Symbol("@m1_11993") + esc(Symbol("@m1_11993")) end @doc "This should document @m1... since its the result of expansion" @m2_11993 diff --git a/test/errorshow.jl b/test/errorshow.jl index 94722b803865f..5c6d8e3bea08c 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -531,7 +531,7 @@ end ex = :(@nest2b 42) @test _macroexpand1(ex) != macroexpand(M,ex) @test _macroexpand1(_macroexpand1(ex)) == macroexpand(M, ex) - @test (@macroexpand1 @nest2b 42) == _macroexpand1(ex) + @test (@macroexpand1 @nest2b 42) == _macroexpand1(:(@nest2b 42)) end foo_9965(x::Float64; w=false) = x diff --git a/test/goto.jl b/test/goto.jl index 011ec32a851bd..e069058f38d52 100644 --- a/test/goto.jl +++ b/test/goto.jl @@ -87,7 +87,7 @@ end @test goto_test5_3() -@test Expr(:error, "goto from a try/finally block is not permitted") == +@test Expr(:error, "goto from a try/finally block is not permitted around $(@__FILE__):$(3 + @__LINE__)") == Meta.lower(@__MODULE__, quote function goto_test6() try diff --git a/test/syntax.jl b/test/syntax.jl index 8bba5f9205613..aa854bfa0d19b 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3,6 +3,7 @@ # tests for parser and syntax lowering using Random +using Base: remove_linenums! import Base.Meta.ParseError @@ -38,13 +39,8 @@ end # issue #9704 let a = :a - @test :(try - catch $a - end) == :(try - catch a - end) - @test :(module $a end) == :(module a - end) + @test :(try catch $a end) == :(try catch a end) + @test :(module $a end) == :(module a end) end # string literals @@ -706,7 +702,7 @@ m1_exprs = get_expr_list(Meta.lower(@__MODULE__, quote @m1 end)) let low3 = Meta.lower(@__MODULE__, quote @m3 end) m3_exprs = get_expr_list(low3) ci = low3.args[1]::Core.CodeInfo - @test ci.codelocs == [3, 1] + @test ci.codelocs == [4, 2] @test is_return_ssavalue(m3_exprs[end]) end @@ -1186,10 +1182,13 @@ end @test Meta.parse("@Mdl.foo [1] + [2]") == Meta.parse("@Mdl.foo([1] + [2])") # issue #24289 +module M24289 macro m24289() :(global $(esc(:x24289)) = 1) end -@test (@macroexpand @m24289) == :(global x24289 = 1) +end +M24289.@m24289 +@test x24289 === 1 # parsing numbers with _ and . @test Meta.parse("1_2.3_4") == 12.34 @@ -1664,10 +1663,12 @@ end macro foo28244(sym) x = :(bar()) push!(x.args, Expr(sym)) - x + esc(x) +end +@test @macroexpand(@foo28244(kw)) == Expr(:call, :bar, Expr(:kw)) +let x = @macroexpand @foo28244(var"let") + @test Meta.lower(@__MODULE__, x) == Expr(:error, "malformed expression") end -@test (@macroexpand @foo28244(kw)) == Expr(:call, GlobalRef(@__MODULE__,:bar), Expr(:kw)) -@test eval(:(@macroexpand @foo28244($(Symbol("let"))))) == Expr(:error, "malformed expression") # #16356 @test_throws ParseError Meta.parse("0xapi") @@ -1932,8 +1933,8 @@ macro id28992(x) x end @test Meta.@lower(.+(a,b) = 0) == Expr(:error, "invalid function name \".+\"") @test Meta.@lower((.+)(a,b) = 0) == Expr(:error, "invalid function name \"(.+)\"") let m = @__MODULE__ - @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\"") - @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\"") + @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\" around $(@__FILE__):$(@__LINE__)") + @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\" around $(@__FILE__):$(@__LINE__)") end @test @id28992([1] .< [2] .< [3]) == [true] @test @id28992(2 ^ -2) == 0.25 @@ -2639,10 +2640,10 @@ import .TestImportAs.Mod2 as M2 end @testset "issue #37393" begin - @test :(for outer i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) + @test remove_linenums!(:(for outer i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) i = :i - @test :(for outer $i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) - @test :(for outer = 1:3; end) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;)) + @test remove_linenums!(:(for outer $i = 1:3; end)) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;)) + @test remove_linenums!(:(for outer = 1:3; end)) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;)) # TIL that this is possible for outer $ i = 1:3 @test 1 $ 2 in 1:3 @@ -2900,13 +2901,13 @@ macro m_underscore_hygiene() return :(_ = 1) end -@test @macroexpand(@m_underscore_hygiene()) == :(_ = 1) +@test Meta.@lower(@m_underscore_hygiene()) === 1 macro m_begin_hygiene(a) return :($(esc(a))[begin]) end -@test @m_begin_hygiene([1, 2, 3]) == 1 +@test @m_begin_hygiene([1, 2, 3]) === 1 # issue 40258 @test "a $("b $("c")")" == "a b c" @@ -3226,8 +3227,14 @@ end @test Meta.parseatom("@foo", 1; filename="foo", lineno=7) == (Expr(:macrocall, :var"@foo", LineNumberNode(7, :foo)), 5) @test Meta.parseall("@foo"; filename="foo", lineno=3) == Expr(:toplevel, LineNumberNode(3, :foo), Expr(:macrocall, :var"@foo", LineNumberNode(3, :foo))) -let ex = :(const $(esc(:x)) = 1; (::typeof(2))() = $(esc(:x))) - @test macroexpand(Main, Expr(:var"hygienic-scope", ex, Main)).args[3].args[1] == :((::$(GlobalRef(Main, :typeof))(2))()) +module M43993 +function foo43993 end +const typeof = error +end +let ex = :(const $(esc(:x)) = 1; (::typeof($(esc(:foo43993))))() = $(esc(:x))) + Core.eval(M43993, Expr(:var"hygienic-scope", ex, Core)) + @test M43993.x === 1 + @test invokelatest(M43993.foo43993) === 1 end struct Foo44013 From 229269badb899cfcb52ad3e2103058560cec5573 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 25 May 2023 14:37:20 -0300 Subject: [PATCH 063/290] Don't permalloc the pkgimgs, but with an option (#49940) --- base/options.jl | 1 + src/jloptions.c | 12 ++++++++++++ src/jloptions.h | 1 + src/staticdata.c | 29 +++++++++++++++++++---------- test/precompile.jl | 25 ++++++++++++++----------- 5 files changed, 47 insertions(+), 21 deletions(-) diff --git a/base/options.jl b/base/options.jl index 23a3dbc802b5f..fb043672dc19a 100644 --- a/base/options.jl +++ b/base/options.jl @@ -54,6 +54,7 @@ struct JLOptions rr_detach::Int8 strip_metadata::Int8 strip_ir::Int8 + permalloc_pkgimg::Int8 heap_size_hint::UInt64 end diff --git a/src/jloptions.c b/src/jloptions.c index 4c0b59f811643..7a622f117f1b1 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -87,6 +87,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // rr-detach 0, // strip-metadata 0, // strip-ir + 0, // permalloc_pkgimg 0, // heap-size-hint }; jl_options_initialized = 1; @@ -209,6 +210,7 @@ static const char opts_hidden[] = " --trace-compile={stderr,name}\n" " Print precompile statements for methods compiled during execution or save to a path\n" " --image-codegen Force generate code in imaging mode\n" + " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n" ; JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) @@ -254,6 +256,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_strip_ir, opt_heap_size_hint, opt_gc_threads, + opt_permalloc_pkgimg }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:"; static const struct option longopts[] = { @@ -313,6 +316,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "rr-detach", no_argument, 0, opt_rr_detach }, { "strip-metadata", no_argument, 0, opt_strip_metadata }, { "strip-ir", no_argument, 0, opt_strip_ir }, + { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg }, { "heap-size-hint", required_argument, 0, opt_heap_size_hint }, { 0, 0, 0, 0 } }; @@ -827,6 +831,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) jl_errorf("julia: --gcthreads=; n must be an integer >= 1"); jl_options.ngcthreads = (int16_t)ngcthreads; break; + case opt_permalloc_pkgimg: + if (!strcmp(optarg,"yes")) + jl_options.permalloc_pkgimg = 1; + else if (!strcmp(optarg,"no")) + jl_options.permalloc_pkgimg = 0; + else + jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg); + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index c44a8cfe05770..93f6d321f38d6 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -58,6 +58,7 @@ typedef struct { int8_t rr_detach; int8_t strip_metadata; int8_t strip_ir; + int8_t permalloc_pkgimg; uint64_t heap_size_hint; } jl_options_t; diff --git a/src/staticdata.c b/src/staticdata.c index 435148581f4fc..df080bc68c88f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -71,6 +71,7 @@ External links: */ #include #include +#include #include // printf #include // PRIxPTR @@ -3364,7 +3365,7 @@ static jl_value_t *jl_validate_cache_file(ios_t *f, jl_array_t *depmods, uint64_ } // TODO?: refactor to make it easier to create the "package inspector" -static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname) +static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *image, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) { JL_TIMING(LOAD_IMAGE, LOAD_Pkgimg); jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, pkgname); @@ -3377,7 +3378,7 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im return verify_fail; assert(datastartpos > 0 && datastartpos < dataendpos); - + needs_permalloc = jl_options.permalloc_pkgimg || needs_permalloc; jl_value_t *restored = NULL; jl_array_t *init_order = NULL, *extext_methods = NULL, *new_specializations = NULL, *method_roots_list = NULL, *ext_targets = NULL, *edges = NULL; jl_svec_t *cachesizes_sv = NULL; @@ -3389,14 +3390,22 @@ static jl_value_t *jl_restore_package_image_from_stream(ios_t *f, jl_image_t *im ios_bufmode(f, bm_none); JL_SIGATOMIC_BEGIN(); size_t len = dataendpos - datastartpos; - char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + char *sysimg; + bool success = !needs_permalloc; ios_seek(f, datastartpos); - if (ios_readall(f, sysimg, len) != len || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { - restored = jl_get_exceptionf(jl_errorexception_type, "Error reading system image file."); + if (needs_permalloc) + sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + else + sysimg = &f->buf[f->bpos]; + if (needs_permalloc) + success = ios_readall(f, sysimg, len) == len; + if (!success || jl_crc32c(0, sysimg, len) != (uint32_t)checksum) { + restored = jl_get_exceptionf(jl_errorexception_type, "Error reading package image file."); JL_SIGATOMIC_END(); } else { - ios_close(f); + if (needs_permalloc) + ios_close(f); ios_static_buffer(f, sysimg, len); pkgcachesizes cachesizes; jl_restore_system_image_from_stream_(f, image, depmods, checksum, (jl_array_t**)&restored, &init_order, &extext_methods, &new_specializations, &method_roots_list, &ext_targets, &edges, &base, &ccallable_list, &cachesizes); @@ -3442,11 +3451,11 @@ static void jl_restore_system_image_from_stream(ios_t *f, jl_image_t *image, uin jl_restore_system_image_from_stream_(f, image, NULL, checksum | ((uint64_t)0xfdfcfbfa << 32), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); } -JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname) +JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, jl_image_t *image, size_t sz, jl_array_t *depmods, int completeinfo, const char *pkgname, bool needs_permalloc) { ios_t f; ios_static_buffer(&f, (char*)buf, sz); - jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, image, depmods, completeinfo, pkgname, needs_permalloc); ios_close(&f); return ret; } @@ -3459,7 +3468,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *d "Cache file \"%s\" not found.\n", fname); } jl_image_t pkgimage = {}; - jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname); + jl_value_t *ret = jl_restore_package_image_from_stream(&f, &pkgimage, depmods, completeinfo, pkgname, true); ios_close(&f); return ret; } @@ -3530,7 +3539,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); - jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname); + jl_value_t* mod = jl_restore_incremental_from_buf(pkgimg_data, &pkgimage, *plen, depmods, completeinfo, pkgname, false); return mod; } diff --git a/test/precompile.jl b/test/precompile.jl index 9ed3654437675..de3510d49118d 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -339,17 +339,20 @@ precompile_test_harness(false) do dir cachedir = joinpath(dir, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachedir2 = joinpath(dir2, "compiled", "v$(VERSION.major).$(VERSION.minor)") cachefile = joinpath(cachedir, "$Foo_module.ji") - if Base.JLOptions().use_pkgimages == 1 - ocachefile = Base.ocachefile_from_cachefile(cachefile) - else - ocachefile = nothing - end - # use _require_from_serialized to ensure that the test fails if - # the module doesn't reload from the image: - @test_warn "@ccallable was already defined for this method name" begin - @test_logs (:warn, "Replacing module `$Foo_module`") begin - m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile) - @test isa(m, Module) + do_pkgimg = Base.JLOptions().use_pkgimages == 1 && Base.JLOptions().permalloc_pkgimg == 1 + if do_pkgimg || Base.JLOptions().use_pkgimages == 0 + if do_pkgimg + ocachefile = Base.ocachefile_from_cachefile(cachefile) + else + ocachefile = nothing + end + # use _require_from_serialized to ensure that the test fails if + # the module doesn't reload from the image: + @test_warn "@ccallable was already defined for this method name" begin + @test_logs (:warn, "Replacing module `$Foo_module`") begin + m = Base._require_from_serialized(Base.PkgId(Foo), cachefile, ocachefile) + @test isa(m, Module) + end end end From 91cd5213280ee6a699aaff74eb84d234e868f9d5 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Thu, 25 May 2023 21:55:35 -0400 Subject: [PATCH 064/290] Extend comparison lifting to `Core.ifelse` (#49882) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change extends our existing transformation for: φ(a,b) === Const(c) => φ(a === c, b === c) to perform the analogous transformation for `Core.ifelse`: Core.ifelse(cond, a, b) === Const(c) => Core.ifelse(cond, a === c, b === c) --- base/compiler/ssair/passes.jl | 204 ++++++++++++++++++++++------------ test/compiler/irpasses.jl | 31 +++++- 2 files changed, 162 insertions(+), 73 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 4bfb5f3fcde56..44409cfbcd486 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -176,11 +176,12 @@ function find_def_for_use( return def, useblock, curblock end -function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice) +function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice, + predecessors = ((@nospecialize(def), compact::IncrementalCompact) -> isa(def, PhiNode) ? def.values : nothing)) if isa(val, Union{OldSSAValue, SSAValue}) val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end - return walk_to_defs(compact, val, typeconstraint, 𝕃ₒ) + return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ) end function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), @@ -235,16 +236,21 @@ function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defss end """ - walk_to_defs(compact, val, typeconstraint) + walk_to_defs(compact, val, typeconstraint, predecessors) Starting at `val` walk use-def chains to get all the leaves feeding into this `val` -(pruning those leaves rules out by path conditions). +(pruning those leaves ruled out by path conditions). + +`predecessors(def, compact)` is a callback which should return the set of possible +predecessors for a "phi-like" node (PhiNode or Core.ifelse) or `nothing` otherwise. """ -function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), 𝕃ₒ::AbstractLattice) - visited_phinodes = AnySSAValue[] - isa(defssa, AnySSAValue) || return Any[defssa], visited_phinodes +function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), predecessors, 𝕃ₒ::AbstractLattice) + visited_philikes = AnySSAValue[] + isa(defssa, AnySSAValue) || return Any[defssa], visited_philikes def = compact[defssa][:inst] - isa(def, PhiNode) || return Any[defssa], visited_phinodes + if predecessors(def, compact) === nothing + return Any[defssa], visited_philikes + end visited_constraints = IdDict{AnySSAValue, Any}() worklist_defs = AnySSAValue[] worklist_constraints = Any[] @@ -256,12 +262,14 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe typeconstraint = pop!(worklist_constraints) visited_constraints[defssa] = typeconstraint def = compact[defssa][:inst] - if isa(def, PhiNode) - push!(visited_phinodes, defssa) + values = predecessors(def, compact) + if values !== nothing + push!(visited_philikes, defssa) possible_predecessors = Int[] - for n in 1:length(def.edges) - isassigned(def.values, n) || continue - val = def.values[n] + + for n in 1:length(values) + isassigned(values, n) || continue + val = values[n] if is_old(compact, defssa) && isa(val, SSAValue) val = OldSSAValue(val.id) end @@ -270,8 +278,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe push!(possible_predecessors, n) end for n in possible_predecessors - pred = def.edges[n] - val = def.values[n] + val = values[n] if is_old(compact, defssa) && isa(val, SSAValue) val = OldSSAValue(val.id) end @@ -306,7 +313,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe push!(leaves, defssa) end end - return leaves, visited_phinodes + return leaves, visited_philikes end function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compact::IncrementalCompact) @@ -566,7 +573,13 @@ function lift_comparison_leaves!(@specialize(tfunc), val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint) end isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting - leaves, visited_phinodes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ) + + predecessors = function (@nospecialize(def), compact::IncrementalCompact) + isa(def, PhiNode) && return def.values + is_known_call(def, Core.ifelse, compact) && return def.args[3:4] + return nothing + end + leaves, visited_philikes = collect_leaves(compact, val, typeconstraint, 𝕃ₒ, predecessors) length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves # check if we can evaluate the comparison for each one of the leaves @@ -586,18 +599,51 @@ function lift_comparison_leaves!(@specialize(tfunc), # perform lifting lifted_val = perform_lifting!(compact, - visited_phinodes, cmp, lifting_cache, Bool, + visited_philikes, cmp, lifting_cache, Bool, lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue compact[idx] = lifted_val.val end -struct LiftedPhi +struct IfElseCall + call::Expr +end + +# An intermediate data structure used for lifting expressions through a +# "phi-like" instruction (either a PhiNode or a call to Core.ifelse) +struct LiftedPhilike ssa::AnySSAValue - node::PhiNode + node::Union{PhiNode,IfElseCall} need_argupdate::Bool end +struct SkipToken end; const SKIP_TOKEN = SkipToken() + +function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value), + lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::LiftedLeaves, reverse_mapping::IdDict{AnySSAValue, Int}) + val = old_value + if is_old(compact, old_node_ssa) && isa(val, SSAValue) + val = OldSSAValue(val.id) + end + if isa(val, AnySSAValue) + val = simple_walk(compact, val) + end + if val in keys(lifted_leaves) + lifted_val = lifted_leaves[val] + lifted_val === nothing && return UNDEF_TOKEN + val = lifted_val.val + if isa(val, AnySSAValue) + callback = (@nospecialize(pi), @nospecialize(idx)) -> true + val = simple_walk(compact, val, callback) + end + return val + elseif isa(val, AnySSAValue) && val in keys(reverse_mapping) + return lifted_philikes[reverse_mapping[val]].ssa + else + return SKIP_TOKEN # Probably ignored by path condition, skip this + end +end + function is_old(compact, @nospecialize(old_node_ssa)) isa(old_node_ssa, OldSSAValue) && !is_pending(compact, old_node_ssa) && @@ -605,13 +651,13 @@ function is_old(compact, @nospecialize(old_node_ssa)) end function perform_lifting!(compact::IncrementalCompact, - visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key), + visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key), lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val), lazydomtree::Union{LazyDomtree,Nothing}) reverse_mapping = IdDict{AnySSAValue, Int}() - for id in 1:length(visited_phinodes) - reverse_mapping[visited_phinodes[id]] = id + for id in 1:length(visited_philikes) + reverse_mapping[visited_philikes[id]] = id end # Check if all the lifted leaves are the same @@ -636,7 +682,7 @@ function perform_lifting!(compact::IncrementalCompact, dominates_all = true if lazydomtree !== nothing domtree = get!(lazydomtree) - for item in visited_phinodes + for item in visited_philikes if !dominates_ssa(compact, domtree, the_leaf_val, item) dominates_all = false break @@ -649,64 +695,82 @@ function perform_lifting!(compact::IncrementalCompact, end # Insert PhiNodes - nphis = length(visited_phinodes) - lifted_phis = Vector{LiftedPhi}(undef, nphis) - for i = 1:nphis - item = visited_phinodes[i] + nphilikes = length(visited_philikes) + lifted_philikes = Vector{LiftedPhilike}(undef, nphilikes) + for i = 1:nphilikes + old_ssa = visited_philikes[i] + old_inst = compact[old_ssa] + old_node = old_inst[:inst]::Union{PhiNode,Expr} # FIXME this cache is broken somehow - # ckey = Pair{AnySSAValue, Any}(item, cache_key) + # ckey = Pair{AnySSAValue, Any}(old_ssa, cache_key) # cached = ckey in keys(lifting_cache) cached = false if cached ssa = lifting_cache[ckey] - lifted_phis[i] = LiftedPhi(ssa, compact[ssa][:inst]::PhiNode, false) + if isa(old_node, PhiNode) + lifted_philikes[i] = LiftedPhilike(ssa, old_node, false) + else + lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(old_node), false) + end continue end - n = PhiNode() - ssa = insert_node!(compact, item, effect_free(NewInstruction(n, result_t))) + if isa(old_node, PhiNode) + new_node = PhiNode() + ssa = insert_node!(compact, old_ssa, effect_free(NewInstruction(new_node, result_t))) + lifted_philikes[i] = LiftedPhilike(ssa, new_node, true) + else + @assert is_known_call(old_node, Core.ifelse, compact) + ifelse_func, condition, then_result, else_result = old_node.args + if is_old(compact, old_ssa) && isa(condition, SSAValue) + condition = OldSSAValue(condition.id) + end + + new_node = Expr(:call, ifelse_func, condition, then_result, else_result) + new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag]) + + ssa = insert_node!(compact, old_ssa, new_inst) + lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true) + end # lifting_cache[ckey] = ssa - lifted_phis[i] = LiftedPhi(ssa, n, true) end # Fix up arguments - for i = 1:nphis - (old_node_ssa, lf) = visited_phinodes[i], lifted_phis[i] - old_node = compact[old_node_ssa][:inst]::PhiNode - new_node = lf.node - should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa) + for i = 1:nphilikes + (old_node_ssa, lf) = visited_philikes[i], lifted_philikes[i] lf.need_argupdate || continue - for i = 1:length(old_node.edges) - edge = old_node.edges[i] - isassigned(old_node.values, i) || continue - val = old_node.values[i] - if is_old(compact, old_node_ssa) && isa(val, SSAValue) - val = OldSSAValue(val.id) - end - if isa(val, AnySSAValue) - val = simple_walk(compact, val) - end - if val in keys(lifted_leaves) - push!(new_node.edges, edge) - lifted_val = lifted_leaves[val] - if lifted_val === nothing + should_count = !isa(lf.ssa, OldSSAValue) || already_inserted(compact, lf.ssa) + + lfnode = lf.node + if isa(lfnode, PhiNode) + old_node = compact[old_node_ssa][:inst]::PhiNode + new_node = lfnode + for i = 1:length(old_node.values) + isassigned(old_node.values, i) || continue + val = lifted_value(compact, old_node_ssa, old_node.values[i], + lifted_philikes, lifted_leaves, reverse_mapping) + val !== SKIP_TOKEN && push!(new_node.edges, old_node.edges[i]) + if val === UNDEF_TOKEN resize!(new_node.values, length(new_node.values)+1) - continue - end - val = lifted_val.val - if isa(val, AnySSAValue) - callback = (@nospecialize(pi), @nospecialize(idx)) -> true - val = simple_walk(compact, val, callback) + elseif val !== SKIP_TOKEN + should_count && _count_added_node!(compact, val) + push!(new_node.values, val) end - should_count && _count_added_node!(compact, val) - push!(new_node.values, val) - elseif isa(val, AnySSAValue) && val in keys(reverse_mapping) - push!(new_node.edges, edge) - newval = lifted_phis[reverse_mapping[val]].ssa - should_count && _count_added_node!(compact, newval) - push!(new_node.values, newval) - else - # Probably ignored by path condition, skip this end + elseif isa(lfnode, IfElseCall) + then_result, else_result = lfnode.call.args[3], lfnode.call.args[4] + + then_result = lifted_value(compact, old_node_ssa, then_result, + lifted_philikes, lifted_leaves, reverse_mapping) + else_result = lifted_value(compact, old_node_ssa, else_result, + lifted_philikes, lifted_leaves, reverse_mapping) + + should_count && _count_added_node!(compact, then_result) + should_count && _count_added_node!(compact, else_result) + + @assert then_result !== SKIP_TOKEN && then_result !== UNDEF_TOKEN + @assert else_result !== SKIP_TOKEN && else_result !== UNDEF_TOKEN + + lfnode.call.args[3], lfnode.call.args[4] = then_result, else_result end end @@ -718,7 +782,7 @@ function perform_lifting!(compact::IncrementalCompact, if stmt_val in keys(lifted_leaves) return lifted_leaves[stmt_val] elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping) - return LiftedValue(lifted_phis[reverse_mapping[stmt_val]].ssa) + return LiftedValue(lifted_philikes[reverse_mapping[stmt_val]].ssa) end return stmt_val # N.B. should never happen @@ -1006,7 +1070,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) field = try_compute_fieldidx_stmt(compact, stmt, struct_typ) field === nothing && continue - leaves, visited_phinodes = collect_leaves(compact, val, struct_typ, 𝕃ₒ) + leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ) isempty(leaves) && continue result_t = argextype(SSAValue(idx), compact) @@ -1019,7 +1083,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) end val = perform_lifting!(compact, - visited_phinodes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) + visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) # Insert the undef check if necessary if any_undef && val === nothing diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index c704a8cf1c434..f3c74df884cad 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -537,7 +537,7 @@ end # comparison lifting # ================== -let # lifting `===` +let # lifting `===` through PhiNode src = code_typed1((Bool,Int,)) do c, x y = c ? x : nothing y === nothing # => ϕ(false, true) @@ -557,7 +557,15 @@ let # lifting `===` end end -let # lifting `isa` +let # lifting `===` through Core.ifelse + src = code_typed1((Bool,Int,)) do c, x + y = Core.ifelse(c, x, nothing) + y === nothing # => Core.ifelse(c, false, true) + end + @test count(iscall((src, ===)), src.code) == 0 +end + +let # lifting `isa` through PhiNode src = code_typed1((Bool,Int,)) do c, x y = c ? x : nothing isa(y, Int) # => ϕ(true, false) @@ -580,7 +588,16 @@ let # lifting `isa` end end -let # lifting `isdefined` +let # lifting `isa` through Core.ifelse + src = code_typed1((Bool,Int,)) do c, x + y = Core.ifelse(c, x, nothing) + isa(y, Int) # => Core.ifelse(c, true, false) + end + @test count(iscall((src, isa)), src.code) == 0 +end + + +let # lifting `isdefined` through PhiNode src = code_typed1((Bool,Some{Int},)) do c, x y = c ? x : nothing isdefined(y, 1) # => ϕ(true, false) @@ -603,6 +620,14 @@ let # lifting `isdefined` end end +let # lifting `isdefined` through Core.ifelse + src = code_typed1((Bool,Some{Int},)) do c, x + y = Core.ifelse(c, x, nothing) + isdefined(y, 1) # => Core.ifelse(c, true, false) + end + @test count(iscall((src, isdefined)), src.code) == 0 +end + mutable struct Foo30594; x::Float64; end Base.copy(x::Foo30594) = Foo30594(x.x) function add!(p::Foo30594, off::Foo30594) From 4ef9fb193dce16cc185551db32a423448fd1b364 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 26 May 2023 14:25:37 +0200 Subject: [PATCH 065/290] When adopting a thread, spin until GC isn't running. (#49934) Co-authored-by: Valentin Churavy --- src/gc.c | 11 +++++------ src/julia_internal.h | 1 + src/safepoint.c | 8 ++++++++ src/threading.c | 18 ++++++++++++++---- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/gc.c b/src/gc.c index f3a57fffe09a8..a7ff7c6c06201 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3100,7 +3100,7 @@ static void sweep_finalizer_list(arraylist_t *list) } // collector entry point and control -static _Atomic(uint32_t) jl_gc_disable_counter = 1; +_Atomic(uint32_t) jl_gc_disable_counter = 1; JL_DLLEXPORT int jl_gc_enable(int on) { @@ -3497,7 +3497,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); @@ -3508,11 +3508,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); - // `jl_safepoint_start_gc()` makes sure only one thread can - // run the GC. + // `jl_safepoint_start_gc()` makes sure only one thread can run the GC. uint64_t t0 = jl_hrtime(); if (!jl_safepoint_start_gc()) { - // Multithread only. See assertion in `safepoint.c` + // either another thread is running GC, or the GC got disabled just now. jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING); return; } @@ -3549,7 +3548,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) gc_invoke_callbacks(jl_gc_cb_pre_gc_t, gc_cblist_pre_gc, (collection)); - if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ if (_jl_gc_collect(ptls, collection)) { diff --git a/src/julia_internal.h b/src/julia_internal.h index c518a348cb5fd..e649819f4a3e0 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -884,6 +884,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3; } extern _Atomic(uint32_t) jl_gc_running; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/safepoint.c b/src/safepoint.c index d64df084b0349..c6f9a42059d1a 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -124,6 +124,14 @@ int jl_safepoint_start_gc(void) jl_safepoint_wait_gc(); return 0; } + // Foreign thread adoption disables the GC and waits for it to finish, however, that may + // introduce a race between it and this thread checking if the GC is enabled and only + // then setting jl_gc_running. To avoid that, check again now that we won that race. + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { + jl_atomic_store_release(&jl_gc_running, 0); + uv_mutex_unlock(&safepoint_lock); + return 0; + } jl_safepoint_enable(1); jl_safepoint_enable(2); uv_mutex_unlock(&safepoint_lock); diff --git a/src/threading.c b/src/threading.c index 83d2e942e960f..691fa931f1a3f 100644 --- a/src/threading.c +++ b/src/threading.c @@ -406,18 +406,28 @@ jl_ptls_t jl_init_threadtls(int16_t tid) return ptls; } -JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) JL_NOTSAFEPOINT_LEAVE -{ +JL_DLLEXPORT jl_gcframe_t **jl_adopt_thread(void) +{ + // `jl_init_threadtls` puts us in a GC unsafe region, so ensure GC isn't running. + // we can't use a normal safepoint because we don't have signal handlers yet. + // we also can't use jl_safepoint_wait_gc because that assumes we're in a task. + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + while (jl_atomic_load_acquire(&jl_gc_running)) { + jl_cpu_pause(); + } + // this check is coupled with the one in `jl_safepoint_wait_gc`, where we observe if a + // foreign thread has asked to disable the GC, guaranteeing the order of events. + // initialize this thread (assign tid, create heap, set up root task) jl_ptls_t ptls = jl_init_threadtls(-1); void *stack_lo, *stack_hi; jl_init_stack_limits(0, &stack_lo, &stack_hi); - (void)jl_gc_unsafe_enter(ptls); // warning: this changes `jl_current_task`, so be careful not to call that from this function - jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); + jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); // assumes the GC is disabled JL_GC_PROMISE_ROOTED(ct); uv_random(NULL, NULL, &ct->rngState, sizeof(ct->rngState), 0, NULL); + jl_atomic_fetch_add(&jl_gc_disable_counter, -1); return &ct->gcstack; } From 666d325999118c7840cc681319a62998b345069b Mon Sep 17 00:00:00 2001 From: Thomas Graham <132324050+tjgrhm@users.noreply.github.com> Date: Fri, 26 May 2023 12:53:34 +0000 Subject: [PATCH 066/290] Fix conversion of infinities to integers (#49624) Fixes #49422 --- base/float.jl | 4 ++-- test/numbers.jl | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/base/float.jl b/base/float.jl index fad7146655ade..eb30c087c0f8c 100644 --- a/base/float.jl +++ b/base/float.jl @@ -875,7 +875,7 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn end end function (::Type{$Ti})(x::$Tf) - if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x) + if ($(Tf(typemin(Ti))) <= x <= $(Tf(typemax(Ti)))) && isinteger(x) return unsafe_trunc($Ti,x) else throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) @@ -896,7 +896,7 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn end end function (::Type{$Ti})(x::$Tf) - if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && (round(x, RoundToZero) == x) + if ($(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))) && isinteger(x) return unsafe_trunc($Ti,x) else throw(InexactError($(Expr(:quote,Ti.name.name)), $Ti, x)) diff --git a/test/numbers.jl b/test/numbers.jl index efb2702aff1c2..b1523d690de99 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2264,6 +2264,17 @@ end @test_throws InexactError convert(Int16, big(2)^100) @test_throws InexactError convert(Int, typemax(UInt)) +@testset "infinity to integer conversion" begin + for T in ( + UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128, BigInt + ) + for S in (Float16, Float32, Float64, BigFloat) + @test_throws InexactError convert(T, typemin(S)) + @test_throws InexactError convert(T, typemax(S)) + end + end +end + @testset "issue #9789" begin @test_throws InexactError convert(Int8, typemax(UInt64)) @test_throws InexactError convert(Int16, typemax(UInt64)) From 23e0b2d319af2472b12d9325107458776ce637aa Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 26 May 2023 19:09:12 +0200 Subject: [PATCH 067/290] Hide safepoint prologue and CFI instructions from reflection. (#49948) --- src/codegen-stubs.c | 6 +- src/disasm.cpp | 11 ++-- src/jitlayers.cpp | 8 +-- src/julia_internal.h | 6 +- stdlib/InteractiveUtils/src/codeview.jl | 81 +++++++++++++++---------- test/compiler/codegen.jl | 2 +- test/reflection.jl | 5 +- 7 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 1c52f969a11f7..365ddec47df42 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -18,7 +18,7 @@ JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE JL_DLLEXPORT void jl_get_llvmf_defn_fallback(jl_llvmf_dump_t *dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE @@ -83,9 +83,9 @@ JL_DLLEXPORT void jl_dump_llvm_opt_fallback(void *s) { } -JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE +JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE -JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE +JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw) UNAVAILABLE JL_DLLEXPORT void jl_get_function_id_fallback(void *native_code, jl_code_instance_t *ncode, int32_t *func_idx, int32_t *specfunc_idx) UNAVAILABLE diff --git a/src/disasm.cpp b/src/disasm.cpp index 96595d4381987..9414c0a2a065d 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -575,7 +575,7 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset) // print a native disassembly for the function starting at fptr extern "C" JL_DLLEXPORT_CODEGEN -jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) +jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary) { assert(fptr != 0); std::string code; @@ -600,7 +600,7 @@ jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_va return jl_pchar_to_string("", 0); } - if (raw_mc) { + if (emit_mc) { return (jl_value_t*)jl_pchar_to_array((char*)fptr, symsize); } @@ -1203,7 +1203,7 @@ class LineNumberPrinterHandler : public AsmPrinterHandler { // get a native assembly for llvm::Function extern "C" JL_DLLEXPORT_CODEGEN -jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) +jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw) { // precise printing via IR assembler SmallVector ObjBufferSV; @@ -1217,12 +1217,15 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char raw_mc, const if (f != &f2 && !f->isDeclaration()) f2.deleteBody(); } + // add a nounwind attribute to get rid of cfi instructions + if (!raw) + f->addFnAttr(Attribute::NoUnwind); }); auto TMBase = jl_ExecutionEngine->cloneTargetMachine(); LLVMTargetMachine *TM = static_cast(TMBase.get()); legacy::PassManager PM; addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis()); - if (raw_mc) { + if (emit_mc) { raw_svector_ostream obj_OS(ObjBufferSV); if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr)) return jl_an_empty_string; diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index fde7ba9e30130..780f5d91847e0 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -580,14 +580,14 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) // get a native disassembly for a compiled method extern "C" JL_DLLEXPORT_CODEGEN jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) { // printing via disassembly jl_code_instance_t *codeinst = jl_generate_fptr(mi, world); if (codeinst) { uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke); if (getwrapper) - return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_fptr_asm(fptr, emit_mc, asm_variant, debuginfo, binary); uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) { // normally we prevent native code from being generated for these functions, @@ -635,7 +635,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, } } if (specfptr != 0) - return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_fptr_asm(specfptr, emit_mc, asm_variant, debuginfo, binary); } // whatever, that didn't work - use the assembler output instead @@ -643,7 +643,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world, jl_get_llvmf_defn(&llvmf_dump, mi, world, getwrapper, true, jl_default_cgparams); if (!llvmf_dump.F) return jl_an_empty_string; - return jl_dump_function_asm(&llvmf_dump, raw_mc, asm_variant, debuginfo, binary); + return jl_dump_function_asm(&llvmf_dump, emit_mc, asm_variant, debuginfo, binary, false); } CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) diff --git a/src/julia_internal.h b/src/julia_internal.h index e649819f4a3e0..2a8c2f54fe116 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1659,11 +1659,11 @@ typedef struct { } jl_llvmf_dump_t; JL_DLLIMPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world, - char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary); + char emit_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary); JL_DLLIMPORT void jl_get_llvmf_defn(jl_llvmf_dump_t* dump, jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params); -JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); +JL_DLLIMPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char emit_mc, const char* asm_variant, const char *debuginfo, char binary); JL_DLLIMPORT jl_value_t *jl_dump_function_ir(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo); -JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char raw_mc, const char* asm_variant, const char *debuginfo, char binary); +JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_mc, const char* asm_variant, const char *debuginfo, char binary, char raw); JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world); JL_DLLIMPORT void jl_dump_native(void *native_code, diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl index 29a64343b8370..9ce5be9706bac 100644 --- a/stdlib/InteractiveUtils/src/codeview.jl +++ b/stdlib/InteractiveUtils/src/codeview.jl @@ -167,10 +167,18 @@ const OC_MISMATCH_WARNING = """ # Printing code representations in IR and assembly + +function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool, + raw::Bool, dump_module::Bool, syntax::Symbol, + optimize::Bool, debuginfo::Symbol, binary::Bool) + params = CodegenParams(debug_info_kind=Cint(0), + safepoint_on_entry=raw) + _dump_function(f, t, native, wrapper, raw, dump_module, syntax, + optimize, debuginfo, binary, params) +end function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool, - strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol, - optimize::Bool, debuginfo::Symbol, binary::Bool, - params::CodegenParams=CodegenParams(debug_info_kind=Cint(0))) + raw::Bool, dump_module::Bool, syntax::Symbol, + optimize::Bool, debuginfo::Symbol, binary::Bool, params::CodegenParams) ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions") if isa(f, Core.Builtin) throw(ArgumentError("argument is not a generic function")) @@ -180,21 +188,21 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe if !isa(f, Core.OpaqueClosure) world = Base.get_world_counter() match = Base._which(signature_type(f, t); world) - linfo = Core.Compiler.specialize_method(match) + mi = Core.Compiler.specialize_method(match) # TODO: use jl_is_cacheable_sig instead of isdispatchtuple - isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING) + isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING) else world = UInt64(f.world) if Core.Compiler.is_source_inferred(f.source.source) # OC was constructed from inferred source. There's only one # specialization and we can't infer anything more precise either. world = f.source.primary_world - linfo = f.source.specializations::Core.MethodInstance + mi = f.source.specializations::Core.MethodInstance Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING) else - linfo = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec()) - actual = isdispatchtuple(linfo.specTypes) - isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING) + mi = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec()) + actual = isdispatchtuple(mi.specTypes) + isdispatchtuple(mi.specTypes) || (warning = GENERIC_SIG_WARNING) end end # get the code for it @@ -208,21 +216,25 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe throw(ArgumentError("'syntax' must be either :intel or :att")) end if dump_module - str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params) + # we want module metadata, so use LLVM to generate assembly output + str = _dump_function_native_assembly(mi, world, wrapper, syntax, debuginfo, binary, raw, params) else - str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary) + # if we don't want the module metadata, just disassemble what our JIT has + str = _dump_function_native_disassembly(mi, world, wrapper, syntax, debuginfo, binary) end else - str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params) + str = _dump_function_llvm(mi, world, wrapper, !raw, dump_module, optimize, debuginfo, params) end str = warning * str return str end -function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool) - str = ccall(:jl_dump_method_asm, Ref{String}, - (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool), - linfo, world, false, wrapper, syntax, debuginfo, binary) +function _dump_function_native_disassembly(mi::Core.MethodInstance, world::UInt, + wrapper::Bool, syntax::Symbol, + debuginfo::Symbol, binary::Bool) + str = @ccall jl_dump_method_asm(mi::Any, world::UInt, false::Bool, wrapper::Bool, + syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8}, + binary::Bool)::Ref{String} return str end @@ -231,27 +243,30 @@ struct LLVMFDump f::Ptr{Cvoid} # opaque end -function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams) +function _dump_function_native_assembly(mi::Core.MethodInstance, world::UInt, + wrapper::Bool, syntax::Symbol, debuginfo::Symbol, + binary::Bool, raw::Bool, params::CodegenParams) llvmf_dump = Ref{LLVMFDump}() - ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, true, params) + @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump},mi::Any, world::UInt, wrapper::Bool, + true::Bool, params::CodegenParams)::Cvoid llvmf_dump[].f == C_NULL && error("could not compile the specified method") - str = ccall(:jl_dump_function_asm, Ref{String}, - (Ptr{LLVMFDump}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool), - llvmf_dump, false, syntax, debuginfo, binary) + str = @ccall jl_dump_function_asm(llvmf_dump::Ptr{LLVMFDump}, false::Bool, + syntax::Ptr{UInt8}, debuginfo::Ptr{UInt8}, + binary::Bool, raw::Bool)::Ref{String} return str end -function _dump_function_linfo_llvm( - linfo::Core.MethodInstance, world::UInt, wrapper::Bool, +function _dump_function_llvm( + mi::Core.MethodInstance, world::UInt, wrapper::Bool, strip_ir_metadata::Bool, dump_module::Bool, optimize::Bool, debuginfo::Symbol, params::CodegenParams) llvmf_dump = Ref{LLVMFDump}() - ccall(:jl_get_llvmf_defn, Cvoid, (Ptr{LLVMFDump}, Any, UInt, Bool, Bool, CodegenParams), llvmf_dump, linfo, world, wrapper, optimize, params) + @ccall jl_get_llvmf_defn(llvmf_dump::Ptr{LLVMFDump}, mi::Any, world::UInt, + wrapper::Bool, optimize::Bool, params::CodegenParams)::Cvoid llvmf_dump[].f == C_NULL && error("could not compile the specified method") - str = ccall(:jl_dump_function_ir, Ref{String}, - (Ptr{LLVMFDump}, Bool, Bool, Ptr{UInt8}), - llvmf_dump, strip_ir_metadata, dump_module, debuginfo) + str = @ccall jl_dump_function_ir(llvmf_dump::Ptr{LLVMFDump}, strip_ir_metadata::Bool, + dump_module::Bool, debuginfo::Ptr{UInt8})::Ref{String} return str end @@ -268,7 +283,7 @@ Keyword argument `debuginfo` may be one of source (default) or none, to specify """ function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) - d = _dump_function(f, types, false, false, !raw, dump_module, :intel, optimize, debuginfo, false) + d = _dump_function(f, types, false, false, raw, dump_module, :intel, optimize, debuginfo, false) if highlighting[:llvm] && get(io, :color, false)::Bool print_llvm(io, d) else @@ -290,20 +305,22 @@ generic function and type signature to `io`. * Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`. * If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address. * If `dump_module` is `false`, do not print metadata such as rodata or directives. +* If `raw` is `false`, uninteresting instructions (like the safepoint function prologue) are elided. See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref) """ function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); - dump_module::Bool=true, syntax::Symbol=:intel, debuginfo::Symbol=:default, binary::Bool=false) - d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary) + dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, + debuginfo::Symbol=:default, binary::Bool=false) + d = _dump_function(f, types, true, false, raw, dump_module, syntax, true, debuginfo, binary) if highlighting[:native] && get(io, :color, false)::Bool print_native(io, d) else print(io, d) end end -code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, debuginfo::Symbol=:default, binary::Bool=false) = - code_native(stdout, f, types; dump_module, syntax, debuginfo, binary) +code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:intel, raw::Bool=false, debuginfo::Symbol=:default, binary::Bool=false) = + code_native(stdout, f, types; dump_module, syntax, raw, debuginfo, binary) code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call ## colorized IR and assembly printing diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 8a3949212ea16..c29f82bfd6008 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -18,7 +18,7 @@ end # The tests below assume a certain format and safepoint_on_entry=true breaks that. function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) params = Base.CodegenParams(safepoint_on_entry=false) - d = InteractiveUtils._dump_function(f, t, false, false, !raw, dump_module, :att, optimize, :none, false, params) + d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params) sprint(print, d) end diff --git a/test/reflection.jl b/test/reflection.jl index 0ae8cb3f9d393..c13e7d88d8cfd 100644 --- a/test/reflection.jl +++ b/test/reflection.jl @@ -909,10 +909,9 @@ _test_at_locals2(1,1,0.5f0) f31687_parent() = f31687_child(0) params = Base.CodegenParams() _dump_function(f31687_parent, Tuple{}, - #=native=#false, #=wrapper=#false, #=strip=#false, + #=native=#false, #=wrapper=#false, #=raw=#true, #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none, - #=binary=#false, - params) + #=binary=#false) end @test nameof(Any) === :Any From ba1391aa7c0daf6b3dde75018915611992495a75 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 26 May 2023 14:51:09 -0400 Subject: [PATCH 068/290] some minor hygiene fixes (#49897) This code does a bad job handling `(escape symbol)` in a lot of places. This attempts to fix some of them by peeking through it more. --- src/julia-syntax.scm | 2 +- src/macroexpand.scm | 125 ++++++++++++++++++++++++------------------- 2 files changed, 70 insertions(+), 57 deletions(-) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index df4e791e1fa10..c764577a6c89a 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -187,7 +187,7 @@ ;; a bound is #f if not specified (define (analyze-typevar e) (define (check-sym s) - (if (symbol? s) + (if (symbol? (unescape s)) ; unescape for macroexpand.scm use s (error (string "invalid type parameter name \"" (deparse s) "\"")))) (cond ((atom? e) (list (check-sym e) #f #f)) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 14d1fe1c5ab94..89c9564e2e24a 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -99,31 +99,32 @@ (vars '())) (if (null? binds) (cons 'varlist vars) - (cond - ((or (symbol? (car binds)) (decl? (car binds))) - ;; just symbol -> add local - (loop (cdr binds) - (cons (decl-var (car binds)) vars))) - ((and (length= (car binds) 3) - (eq? (caar binds) '=)) - ;; some kind of assignment - (cond - ((or (symbol? (cadar binds)) - (decl? (cadar binds))) - ;; a=b -> add argument - (loop (cdr binds) - (cons (decl-var (cadar binds)) vars))) - ((eventually-call? (cadar binds)) - ;; f()=c - (let ((asgn (cadr (julia-expand0 (car binds) 'none 0)))) - (loop (cdr binds) - (cons (cadr asgn) vars)))) - ((and (pair? (cadar binds)) - (eq? (caadar binds) 'tuple)) - (loop (cdr binds) - (append (map decl-var (lhs-vars (cadar binds))) vars))) - (else '()))) - (else '()))))) + (let ((ux (unescape (car binds)))) + (cond + ((or (symbol? ux) (decl? ux)) + ;; just symbol -> add local + (loop (cdr binds) + (cons (let-decl-var ux) vars))) + ((and (length= (car binds) 3) + (eq? (caar binds) '=)) + (set! ux (unescape (cadar binds))) + ;; some kind of assignment + (cond + ((or (symbol? ux) (decl? ux)) + ;; a=b -> add argument + (loop (cdr binds) + (cons (let-decl-var ux) vars))) + ((eventually-call? (cadar binds)) + ;; f()=c + (let ((name (assigned-name (cadar binds)))) + (loop (cdr binds) + (cons name vars)))) + ((and (pair? (cadar binds)) + (eq? (caadar binds) 'tuple)) + (loop (cdr binds) + (append (map let-decl-var (lhs-vars (cadar binds))) vars))) + (else '()))) + (else '())))))) ;; macro definition (pattern-lambda (macro (call name . argl) body) @@ -180,12 +181,12 @@ (define (unescape e) (if (and (pair? e) (eq? (car e) 'escape)) - (cadr e) + (unescape (cadr e)) e)) (define (unescape-global-lhs e env m parent-scope inarg) (cond ((not (pair? e)) e) - ((eq? (car e) 'escape) (cadr e)) + ((eq? (car e) 'escape) (unescape-global-lhs (cadr e) env m parent-scope inarg)) ((memq (car e) '(parameters tuple)) (list* (car e) (map (lambda (e) (unescape-global-lhs e env m parent-scope inarg)) @@ -207,7 +208,7 @@ ((eq? (car e) 'curly) (cddr e)) (else '()))) -(define (typevar-expr-name e) (car (analyze-typevar e))) +(define (typevar-expr-name e) (unescape (car (analyze-typevar e)))) ;; get the list of names from a list of `where` variable expressions (define (typevar-names lst) @@ -276,13 +277,13 @@ (list (cadr name)) '())) -;; resolve-expansion-vars-with-new-env, but turn on `inarg` once we get inside -;; the formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`, +;; resolve-expansion-vars-with-new-env, but turn on `inarg` if we get inside +;; a formal argument list. `e` in general might be e.g. `(f{T}(x)::T) where T`, ;; and we want `inarg` to be true for the `(x)` part. -(define (resolve-in-function-lhs e env m parent-scope inarg) - (define (recur x) (resolve-in-function-lhs x env m parent-scope inarg)) +(define (resolve-in-lhs e env m parent-scope inarg) + (define (recur x) (resolve-in-lhs x env m parent-scope inarg)) (define (other x) (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) - (case (car e) + (case (and (pair? e) (car e)) ((where) `(where ,(recur (cadr e)) ,@(map other (cddr e)))) ((|::|) `(|::| ,(recur (cadr e)) ,(other (caddr e)))) ((call) `(call ,(other (cadr e)) @@ -337,6 +338,11 @@ (new-expansion-env-for x env outermost)) m parent-scope inarg)) +(define (reescape ux x) + (if (and (pair? x) (eq? (car x) 'escape)) + (reescape '(escape ,ux) (cadr x))) + ux) + (define (resolve-expansion-vars- e env m parent-scope inarg) (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e)) e) @@ -374,36 +380,35 @@ ;; type has special behavior: identifiers inside are ;; field names, not expressions. ,(map (lambda (x) - (cond ((atom? x) x) - ((and (pair? x) (eq? (car x) '|::|)) - `(|::| ,(cadr x) - ,(resolve-expansion-vars- (caddr x) env m parent-scope inarg))) - (else - (resolve-expansion-vars-with-new-env x env m parent-scope inarg)))) + (let ((ux (unescape x))) + (cond ((atom? ux) ux) + ((and (pair? ux) (eq? (car ux) '|::|)) + `(|::| ,(unescape (cadr ux)) + ,(resolve-expansion-vars- (reescape (caddr ux) x) env m parent-scope inarg))) + (else + (resolve-expansion-vars-with-new-env x env m parent-scope inarg))))) (cadddr e)))) ((parameters) (cons 'parameters (map (lambda (x) ;; `x` by itself after ; means `x=x` - (let ((x (if (and (not inarg) (symbol? x)) - `(kw ,x ,x) - x))) + (let* ((ux (unescape x)) + (x (if (and (not inarg) (symbol? ux)) + `(kw ,ux ,x) + x))) (resolve-expansion-vars- x env m parent-scope #f))) (cdr e)))) ((->) - `(-> ,(resolve-in-function-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg) + `(-> ,(resolve-in-lhs (tuple-wrap-arrow-sig (cadr e)) env m parent-scope inarg) ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))) ((= function) - (if (and (pair? (cadr e)) (function-def? e) (length> e 2)) - ;; in (kw x 1) inside an arglist, the x isn't actually a kwarg - `(,(car e) ,(resolve-in-function-lhs (cadr e) env m parent-scope inarg) - ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)) - `(,(car e) ,@(map (lambda (x) - (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) - (cdr e))))) + `(,(car e) ,(resolve-in-lhs (cadr e) env m parent-scope inarg) + ,@(map (lambda (x) + (resolve-expansion-vars-with-new-env x env m parent-scope inarg)) + (cddr e)))) ((kw) (cond @@ -442,7 +447,7 @@ newenv m parent-scope inarg)) ;; expand initial values in old env (resolve-expansion-vars- (caddr bind) env m parent-scope inarg)) - bind)) + (resolve-expansion-vars- bind env m parent-scope inarg))) binds)) ,body))) ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly @@ -471,13 +476,14 @@ (define (decl-var* e) (if (pair? e) (case (car e) + ((hygienic-scope) '()) ((escape) '()) ((call) (decl-var* (cadr e))) ((=) (decl-var* (cadr e))) ((curly) (decl-var* (cadr e))) ((|::|) (if (length= e 2) '() (decl-var* (cadr e)))) ((where) (decl-var* (cadr e))) - (else (decl-var e))) + (else e)) e)) (define (decl-vars* e) @@ -485,6 +491,17 @@ (apply append (map decl-vars* (cdr e))) (list (decl-var* e)))) +;; decl-var that can sort of handle scope hygiene, but very badly +(define (let-decl-var e) + (if (pair? e) + (case (car e) + ((hygienic-scope) (let-decl-var (cadr e))) + ((escape) (let-decl-var (cadr e))) + ((|::|) (if (length= e 2) '() (let-decl-var (cadr e)))) + (else e)) + e)) + + ;; count hygienic / escape pairs ;; and fold together a list resulting from applying the function to ;; any block at the same hygienic scope @@ -614,9 +631,5 @@ (rename-symbolic-labels (julia-expand-quotes e)))) -(define (contains-macrocall e) - (and (pair? e) - (contains (lambda (e) (and (pair? e) (eq? (car e) 'macrocall))) e))) - (define (julia-bq-macro x) (julia-bq-expand x 0)) From f8dd16e5c1b2f372bda0f787a4fefe4911d7f787 Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Fri, 26 May 2023 14:53:31 -0400 Subject: [PATCH 069/290] Add Libc methods for memmove/memcpy/memset/memcmp (#49550) These are used in many places (and are actually LLVM compiler intrinsics), so it probably makes more sense to define them one and export them to users. The Libc module contains some code that we might not care to have as part of bootstrapping. However, the C-memory methods are directly called throughout bootstrapping so these are now defined in a seperate "cmem.jl" file that is defined in Base then imported into `Libc` for the public interface. Co-authored-by: Jameson Nash --- NEWS.md | 1 + base/Base.jl | 3 +- base/array.jl | 57 ++++++++++++++----- base/bitset.jl | 2 +- base/cmem.jl | 53 +++++++++++++++++ base/compiler/compiler.jl | 1 + base/iddict.jl | 5 +- base/libc.jl | 13 +++-- base/mpfr.jl | 4 +- base/parse.jl | 8 ++- base/refpointer.jl | 1 + base/reinterpretarray.jl | 10 ++-- base/ryu/Ryu.jl | 1 + base/ryu/shortest.jl | 20 +++---- base/ryu/utils.jl | 3 - base/strings/string.jl | 8 ++- base/strings/substring.jl | 2 +- base/util.jl | 2 +- doc/src/base/libc.md | 4 ++ .../InteractiveUtils/src/InteractiveUtils.jl | 2 +- stdlib/InteractiveUtils/src/clipboard.jl | 2 +- stdlib/Random/src/DSFMT.jl | 7 ++- stdlib/Random/src/Random.jl | 1 - stdlib/Random/src/XoshiroSimd.jl | 5 +- 24 files changed, 158 insertions(+), 57 deletions(-) create mode 100644 base/cmem.jl diff --git a/NEWS.md b/NEWS.md index 5a518104d3770..404b2b11687af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,7 @@ New library functions * `tanpi` is now defined. It computes tan(πx) more accurately than `tan(pi*x)` ([#48575]). * `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`. It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot` ([#48899]). +* `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls. New library features -------------------- diff --git a/base/Base.jl b/base/Base.jl index 06df2edb276fd..65abe47f33d2d 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -163,6 +163,7 @@ include("int.jl") include("operators.jl") include("pointer.jl") include("refvalue.jl") +include("cmem.jl") include("refpointer.jl") # now replace the Pair constructor (relevant for NamedTuples) with one that calls our Base.convert @@ -316,7 +317,7 @@ include("version.jl") # system & environment include("sysinfo.jl") include("libc.jl") -using .Libc: getpid, gethostname, time +using .Libc: getpid, gethostname, time, memcpy, memset, memmove, memcmp # These used to be in build_h.jl and are retained for backwards compatibility. # NOTE: keep in sync with `libblastrampoline_jll.libblastrampoline`. diff --git a/base/array.jl b/base/array.jl index 68e3e38992731..ce400a7fa7154 100644 --- a/base/array.jl +++ b/base/array.jl @@ -280,8 +280,7 @@ segfault your program, in the same manner as C. function unsafe_copyto!(dest::Ptr{T}, src::Ptr{T}, n) where T # Do not use this to copy data between pointer arrays. # It can't be made safe no matter how carefully you checked. - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - dest, src, n * aligned_sizeof(T)) + memmove(dest, src, n * aligned_sizeof(T)) return dest end @@ -328,13 +327,11 @@ function unsafe_copyto!(dest::Array{T}, doffs, src::Array{T}, soffs, n) where T ccall(:jl_array_ptr_copy, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest, destp, src, srcp, n) elseif isbitstype(T) - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - destp, srcp, n * aligned_sizeof(T)) + memmove(destp, srcp, n * aligned_sizeof(T)) elseif isbitsunion(T) - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), - destp, srcp, n * aligned_sizeof(T)) + memmove(destp, srcp, n * aligned_sizeof(T)) # copy selector bytes - ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), + memmove( ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), dest) + doffs - 1, ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), src) + soffs - 1, n) @@ -467,7 +464,10 @@ end getindex(::Type{Any}) = Vector{Any}() function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer) - ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), a, x isa eltype(a) ? x : convert(eltype(a), x), length(a)) + t = @_gc_preserve_begin a + p = unsafe_convert(Ptr{Cvoid}, a) + memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a)) + @_gc_preserve_end t return a end @@ -1834,23 +1834,50 @@ function empty!(a::Vector) return a end -_memcmp(a, b, len) = ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, len % Csize_t) % Int - # use memcmp for cmp on byte arrays function cmp(a::Array{UInt8,1}, b::Array{UInt8,1}) - c = _memcmp(a, b, min(length(a),length(b))) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + pa = unsafe_convert(Ptr{Cvoid}, a) + pb = unsafe_convert(Ptr{Cvoid}, b) + c = memcmp(pa, pb, min(length(a),length(b))) + @_gc_preserve_end ta + @_gc_preserve_end tb return c < 0 ? -1 : c > 0 ? +1 : cmp(length(a),length(b)) end const BitIntegerArray{N} = Union{map(T->Array{T,N}, BitInteger_types)...} where N # use memcmp for == on bit integer types -==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} = - size(a) == size(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * length(a)) +function ==(a::Arr, b::Arr) where {Arr <: BitIntegerArray} + if size(a) == size(b) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + pa = unsafe_convert(Ptr{Cvoid}, a) + pb = unsafe_convert(Ptr{Cvoid}, b) + c = memcmp(pa, pb, sizeof(eltype(Arr)) * length(a)) + @_gc_preserve_end ta + @_gc_preserve_end tb + return c == 0 + else + return false + end +end -# this is ~20% faster than the generic implementation above for very small arrays function ==(a::Arr, b::Arr) where Arr <: BitIntegerArray{1} len = length(a) - len == length(b) && 0 == _memcmp(a, b, sizeof(eltype(Arr)) * len) + if len == length(b) + ta = @_gc_preserve_begin a + tb = @_gc_preserve_begin b + T = eltype(Arr) + pa = unsafe_convert(Ptr{T}, a) + pb = unsafe_convert(Ptr{T}, b) + c = memcmp(pa, pb, sizeof(T) * len) + @_gc_preserve_end ta + @_gc_preserve_end tb + return c == 0 + else + return false + end end """ diff --git a/base/bitset.jl b/base/bitset.jl index 5ce07389c771e..240be822fa263 100644 --- a/base/bitset.jl +++ b/base/bitset.jl @@ -391,7 +391,7 @@ function ==(s1::BitSet, s2::BitSet) if overlap > 0 t1 = @_gc_preserve_begin a1 t2 = @_gc_preserve_begin a2 - _memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false + memcmp(pointer(a1, b2-b1+1), pointer(a2), overlap<<3) == 0 || return false @_gc_preserve_end t2 @_gc_preserve_end t1 end diff --git a/base/cmem.jl b/base/cmem.jl new file mode 100644 index 0000000000000..8b0b99b3a6ebd --- /dev/null +++ b/base/cmem.jl @@ -0,0 +1,53 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" + memcpy(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid} + +Call `memcpy` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memcpy` requires at least Julia 1.10. + +""" +function memcpy(dst::Ptr, src::Ptr, n::Integer) + ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n) +end + +""" + memmove(dst::Ptr, src::Ptr, n::Integer) -> Ptr{Cvoid} + +Call `memmove` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memmove` requires at least Julia 1.10. + +""" +function memmove(dst::Ptr, src::Ptr, n::Integer) + ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), dst, src, n) +end + +""" + memset(dst::Ptr, val, n::Integer) -> Ptr{Cvoid} + +Call `memset` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memset` requires at least Julia 1.10. + +""" +function memset(p::Ptr, val, n::Integer) + ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p, val, n) +end + +""" + memcmp(a::Ptr, b::Ptr, n::Integer) -> Int + +Call `memcmp` from the C standard library. + +!!! compat "Julia 1.10" + Support for `memcmp` requires at least Julia 1.9. + +""" +function memcmp(a::Ptr, b::Ptr, n::Integer) + ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), a, b, n % Csize_t) % Int +end diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 74814733f088d..58f77078ddb5e 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -100,6 +100,7 @@ add_with_overflow(x::T, y::T) where {T<:SignedInt} = checked_sadd_int(x, y) add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y) add_with_overflow(x::Bool, y::Bool) = (x+y, false) +include("cmem.jl") include("strings/lazy.jl") # core array operations diff --git a/base/iddict.jl b/base/iddict.jl index 99710fbb3491e..01ff213305d7b 100644 --- a/base/iddict.jl +++ b/base/iddict.jl @@ -134,7 +134,10 @@ end function empty!(d::IdDict) resize!(d.ht, 32) - ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), d.ht, 0, sizeof(d.ht)) + ht = d.ht + t = @_gc_preserve_begin ht + memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht)) + @_gc_preserve_end t d.ndel = 0 d.count = 0 return d diff --git a/base/libc.jl b/base/libc.jl index 82286fbf01af6..99e8dce6b87e5 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -6,10 +6,13 @@ Interface to libc, the C standard library. """ Libc import Base: transcode, windowserror, show +# these need to be defined seperately for bootstrapping but belong to Libc +import Base: memcpy, memmove, memset, memcmp import Core.Intrinsics: bitcast -export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc, - errno, strerror, flush_cstdio, systemsleep, time, transcode +export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, memcpy, + memmove, memset, calloc, realloc, errno, strerror, flush_cstdio, systemsleep, time, + transcode if Sys.iswindows() export GetLastError, FormatMessage end @@ -336,7 +339,6 @@ if Sys.iswindows() end ## Memory related ## - """ free(addr::Ptr) @@ -346,6 +348,8 @@ be freed by the free functions defined in that library, to avoid assertion failu multiple `libc` libraries exist on the system. """ free(p::Ptr) = ccall(:free, Cvoid, (Ptr{Cvoid},), p) +free(p::Cstring) = free(convert(Ptr{UInt8}, p)) +free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p)) """ malloc(size::Integer) -> Ptr{Cvoid} @@ -371,8 +375,7 @@ Call `calloc` from the C standard library. """ calloc(num::Integer, size::Integer) = ccall(:calloc, Ptr{Cvoid}, (Csize_t, Csize_t), num, size) -free(p::Cstring) = free(convert(Ptr{UInt8}, p)) -free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p)) + ## Random numbers ## diff --git a/base/mpfr.jl b/base/mpfr.jl index ff85fc6155df4..2e03018f7669f 100644 --- a/base/mpfr.jl +++ b/base/mpfr.jl @@ -19,6 +19,8 @@ import isone, big, _string_n, decompose, minmax, sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand + +using .Base.Libc import ..Rounding: rounding_raw, setrounding_raw import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp @@ -1140,7 +1142,7 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int} s.size = cld(x.prec, 8*sizeof(Limb)) # limbs b = s.size * sizeof(Limb) # bytes ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits - ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), s.d, x.d, b) # bytes + memcpy(s.d, x.d, b) s, x.exp - 8b, x.sign end diff --git a/base/parse.jl b/base/parse.jl index d800e54258b0d..f6a93e56369b7 100644 --- a/base/parse.jl +++ b/base/parse.jl @@ -210,9 +210,11 @@ function tryparse_internal(::Type{Bool}, sbuff::AbstractString, len = endpos - startpos + 1 if sbuff isa Union{String, SubString{String}} p = pointer(sbuff) + startpos - 1 - GC.@preserve sbuff begin - (len == 4) && (0 == _memcmp(p, "true", 4)) && (return true) - (len == 5) && (0 == _memcmp(p, "false", 5)) && (return false) + truestr = "true" + falsestr = "false" + GC.@preserve sbuff truestr falsestr begin + (len == 4) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, truestr), 4)) && (return true) + (len == 5) && (0 == memcmp(p, unsafe_convert(Ptr{UInt8}, falsestr), 5)) && (return false) end else (len == 4) && (SubString(sbuff, startpos:startpos+3) == "true") && (return true) diff --git a/base/refpointer.jl b/base/refpointer.jl index 0cb2df6d24bce..ad74763ff8286 100644 --- a/base/refpointer.jl +++ b/base/refpointer.jl @@ -83,6 +83,7 @@ else primitive type Cwstring 32 end end + ### General Methods for Ref{T} type eltype(x::Type{<:Ref{T}}) where {T} = @isdefined(T) ? T : Any diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 2fc246f86fa96..830bac90d86e9 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -387,8 +387,6 @@ end end end -@inline _memcpy!(dst, src, n) = ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), dst, src, n) - @inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT} # Make sure to match the scalar reinterpret if that is applicable if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 @@ -434,7 +432,7 @@ end while nbytes_copied < sizeof(T) s[] = a.parent[ind_start + i, tailinds...] nb = min(sizeof(S) - sidx, sizeof(T)-nbytes_copied) - _memcpy!(tptr + nbytes_copied, sptr + sidx, nb) + memcpy(tptr + nbytes_copied, sptr + sidx, nb) nbytes_copied += nb sidx = 0 i += 1 @@ -574,7 +572,7 @@ end if sidx != 0 s[] = a.parent[ind_start + i, tailinds...] nb = min((sizeof(S) - sidx) % UInt, sizeof(T) % UInt) - _memcpy!(sptr + sidx, tptr, nb) + memcpy(sptr + sidx, tptr, nb) nbytes_copied += nb a.parent[ind_start + i, tailinds...] = s[] i += 1 @@ -583,7 +581,7 @@ end # Deal with the main body of elements while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S) nb = min(sizeof(S), sizeof(T) - nbytes_copied) - _memcpy!(sptr, tptr + nbytes_copied, nb) + memcpy(sptr, tptr + nbytes_copied, nb) nbytes_copied += nb a.parent[ind_start + i, tailinds...] = s[] i += 1 @@ -592,7 +590,7 @@ end if nbytes_copied < sizeof(T) s[] = a.parent[ind_start + i, tailinds...] nb = min(sizeof(S), sizeof(T) - nbytes_copied) - _memcpy!(sptr, tptr + nbytes_copied, nb) + memcpy(sptr, tptr + nbytes_copied, nb) a.parent[ind_start + i, tailinds...] = s[] end end diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl index 81d1c41f4c19f..9b236caeb6ff1 100644 --- a/base/ryu/Ryu.jl +++ b/base/ryu/Ryu.jl @@ -1,5 +1,6 @@ module Ryu +using .Base.Libc import .Base: significand_bits, significand_mask, exponent_bits, exponent_mask, exponent_bias, exponent_max, uinttype include("utils.jl") diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl index f95c09d235e6d..aaa62ba33c703 100644 --- a/base/ryu/shortest.jl +++ b/base/ryu/shortest.jl @@ -363,10 +363,10 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, c1 = (c ÷ 100) << 1 d0 = (d % 100) << 1 d1 = (d ÷ 100) << 1 - memcpy(ptr, pos + olength - 2, ptr2, c0 + 1, 2) - memcpy(ptr, pos + olength - 4, ptr2, c1 + 1, 2) - memcpy(ptr, pos + olength - 6, ptr2, d0 + 1, 2) - memcpy(ptr, pos + olength - 8, ptr2, d1 + 1, 2) + memcpy(ptr + pos + olength - 3, ptr2 + c0, 2) + memcpy(ptr + pos + olength - 5, ptr2 + c1, 2) + memcpy(ptr + pos + olength - 7, ptr2 + d0, 2) + memcpy(ptr + pos + olength - 9, ptr2 + d1, 2) i += 8 end output2 = output % UInt32 @@ -375,14 +375,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, output2 = div(output2, UInt32(10000)) c0 = (c % 100) << 1 c1 = (c ÷ 100) << 1 - memcpy(ptr, pos + olength - i - 2, ptr2, c0 + 1, 2) - memcpy(ptr, pos + olength - i - 4, ptr2, c1 + 1, 2) + memcpy(ptr + pos + olength - i - 3, ptr2 + c0, 2) + memcpy(ptr + pos + olength - i - 5, ptr2 + c1, 2) i += 4 end if output2 >= 100 c = (output2 % UInt32(100)) << 1 output2 = div(output2, UInt32(100)) - memcpy(ptr, pos + olength - i - 2, ptr2, c + 1, 2) + memcpy(ptr + pos + olength - i - 3, ptr2 + c, 2) i += 2 end if output2 >= 10 @@ -425,7 +425,7 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, end else pointoff = olength - abs(nexp) - memmove(ptr, pos + pointoff + 1, ptr, pos + pointoff, olength - pointoff + 1) + memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1) buf[pos + pointoff] = decchar pos += olength + 1 precision -= olength @@ -470,11 +470,11 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, if exp2 >= 100 c = exp2 % 10 - memcpy(ptr, pos, ptr2, 2 * div(exp2, 10) + 1, 2) + memcpy(ptr + pos - 1, ptr2 + 2 * div(exp2, 10), 2) buf[pos + 2] = UInt8('0') + (c % UInt8) pos += 3 elseif exp2 >= 10 - memcpy(ptr, pos, ptr2, 2 * exp2 + 1, 2) + memcpy(ptr + pos - 1, ptr2 + 2 * exp2, 2) pos += 2 else if padexp diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl index 4fe0b7d397d07..f5a88c057e2b3 100644 --- a/base/ryu/utils.jl +++ b/base/ryu/utils.jl @@ -1,9 +1,6 @@ const MANTISSA_MASK = Base.significand_mask(Float64) const EXP_MASK = Base.exponent_mask(Float64) >> Base.significand_bits(Float64) -memcpy(d, doff, s, soff, n) = (ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing) -memmove(d, doff, s, soff, n) = (ccall(:memmove, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), d + doff - 1, s + soff - 1, n); nothing) - # Note: these are smaller than the values given in Figure 4 from the paper # see https://github.com/ulfjack/ryu/issues/119 pow5_bitcount(::Type{Float16}) = 30 diff --git a/base/strings/string.jl b/base/strings/string.jl index 9716d06deefdf..a26791958cd50 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -127,7 +127,11 @@ end _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}) = _memcmp(a, b, min(sizeof(a), sizeof(b))) function _memcmp(a::Union{Ptr{UInt8},AbstractString}, b::Union{Ptr{UInt8},AbstractString}, len::Int) - ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), a, b, len % Csize_t) % Int + GC.@preserve a b begin + pa = unsafe_convert(Ptr{UInt8}, a) + pb = unsafe_convert(Ptr{UInt8}, b) + memcmp(pa, pb, len % Csize_t) % Int + end end function cmp(a::String, b::String) @@ -542,7 +546,7 @@ function repeat(c::AbstractChar, r::Integer) s = _string_n(n*r) p = pointer(s) GC.@preserve s if n == 1 - ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), p, u % UInt8, r) + memset(p, u % UInt8, r) elseif n == 2 p16 = reinterpret(Ptr{UInt16}, p) for i = 1:r diff --git a/base/strings/substring.jl b/base/strings/substring.jl index 6c169624c72f5..792925f24b12b 100644 --- a/base/strings/substring.jl +++ b/base/strings/substring.jl @@ -267,7 +267,7 @@ function repeat(s::Union{String, SubString{String}}, r::Integer) out = _string_n(n*r) if n == 1 # common case: repeating a single-byte string @inbounds b = codeunit(s, 1) - ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r) + memset(unsafe_convert(Ptr{UInt8}, out), b, r) else for i = 0:r-1 GC.@preserve s out unsafe_copyto!(pointer(out, i*n+1), pointer(s), n) diff --git a/base/util.jl b/base/util.jl index ec99bc6f40c4f..4dcb819292ff8 100644 --- a/base/util.jl +++ b/base/util.jl @@ -268,7 +268,7 @@ will always be called. function securezero! end @noinline securezero!(a::AbstractArray{<:Number}) = fill!(a, 0) @noinline unsafe_securezero!(p::Ptr{T}, len::Integer=1) where {T} = - ccall(:memset, Ptr{T}, (Ptr{T}, Cint, Csize_t), p, 0, len*sizeof(T)) + memset(p, 0, len*sizeof(T)) unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len)) """ diff --git a/doc/src/base/libc.md b/doc/src/base/libc.md index 0af1b74a79a71..08d2670123234 100644 --- a/doc/src/base/libc.md +++ b/doc/src/base/libc.md @@ -4,6 +4,10 @@ Base.Libc.malloc Base.Libc.calloc Base.Libc.realloc +Base.Libc.memcpy +Base.Libc.memmove +Base.Libc.memset +Base.Libc.memcmp Base.Libc.free Base.Libc.errno Base.Libc.strerror diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index 48fc2b7dafe8f..5fbe19ac4efce 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -12,7 +12,7 @@ import Base.Docs.apropos using Base: unwrap_unionall, rewrap_unionall, isdeprecated, Bottom, show_unquoted, summarysize, signature_type, format_bytes - +using Base.Libc using Markdown include("editless.jl") diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl index a4a5118acf8d7..c2abda9a60cc3 100644 --- a/stdlib/InteractiveUtils/src/clipboard.jl +++ b/stdlib/InteractiveUtils/src/clipboard.jl @@ -100,7 +100,7 @@ elseif Sys.iswindows() pdata == C_NULL && return cleanup(:GlobalAlloc) plock = ccall((:GlobalLock, "kernel32"), stdcall, Ptr{UInt16}, (Ptr{UInt16},), pdata) plock == C_NULL && return cleanup(:GlobalLock) - ccall(:memcpy, Ptr{UInt16}, (Ptr{UInt16}, Ptr{UInt16}, Csize_t), plock, x_u16, sizeof(x_u16)) + GC.@preserve x_u16 memcpy(plock, Base.unsafe_convert(Ptr{UInt16}, x_u16), sizeof(x_u16)) unlock = ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), pdata) (unlock == 0 && Libc.GetLastError() == 0) || return cleanup(:GlobalUnlock) # this should never fail pset = ccall((:SetClipboardData, "user32"), stdcall, Ptr{UInt16}, (Cuint, Ptr{UInt16}), 13, pdata) # CF_UNICODETEXT diff --git a/stdlib/Random/src/DSFMT.jl b/stdlib/Random/src/DSFMT.jl index f72a9dd5e9a0a..4c5cb8c522667 100644 --- a/stdlib/Random/src/DSFMT.jl +++ b/stdlib/Random/src/DSFMT.jl @@ -194,8 +194,11 @@ function dsfmt_jump(s::DSFMT_state, jp::GF2X) work = zeros(Int32, JN32) rwork = reinterpret(UInt64, work) dsfmt = Vector{UInt64}(undef, nval >> 1) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt64}, Ptr{Int32}, Csize_t), - dsfmt, val, (nval - 1) * sizeof(Int32)) + GC.@preserve dsfmt val begin + pdsfmt = Base.unsafe_convert(Ptr{Cvoid}, dsfmt) + pval = Base.unsafe_convert(Ptr{Cvoid}, val) + Base.Libc.memcpy(pdsfmt, pval, (nval - 1) * sizeof(Int32)) + end dsfmt[end] = UInt64(N*2) for i in 0:degree(jp) diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl index 30bed9433de5a..78d4f15e2beac 100644 --- a/stdlib/Random/src/Random.jl +++ b/stdlib/Random/src/Random.jl @@ -16,7 +16,6 @@ using Base.GMP: Limb import SHA using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing - import Base: copymutable, copy, copy!, ==, hash, convert, rand, randn, show diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl index 9fb03f9572688..1a16baa4bce28 100644 --- a/stdlib/Random/src/XoshiroSimd.jl +++ b/stdlib/Random/src/XoshiroSimd.jl @@ -5,6 +5,7 @@ module XoshiroSimd import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView, SamplerType, SamplerTrivial using Base: BitInteger_types +using Base.Libc: memcpy using Core.Intrinsics: llvmcall # Vector-width. Influences random stream. @@ -180,7 +181,7 @@ end s3 = _rotl45(s3) ref = Ref(f(res, T)) # TODO: This may make the random-stream dependent on system endianness - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i) end if rng isa TaskLocalRNG task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3 @@ -222,7 +223,7 @@ end res = _plus(_rotl23(_plus(s0,s3)),s0) resLoc = _and(res, 0x0101010101010101) ref = Ref(resLoc) - ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i) + GC.@preserve ref memcpy(dst+i, Base.unsafe_convert(Ptr{Cvoid}, ref), len-i) t = _shl17(s1) s2 = _xor(s2, s0) s3 = _xor(s3, s1) From ba2aa30ec96ff7850a06166767344ca9fc6b0b84 Mon Sep 17 00:00:00 2001 From: Stephan Hilb Date: Fri, 26 May 2023 23:44:37 +0200 Subject: [PATCH 070/290] Enum: fix stackoverflow in `hash` for custom enum subtypes introduced in #49777 (#49964) --- base/Enums.jl | 2 +- test/enums.jl | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/base/Enums.jl b/base/Enums.jl index 2c18dbca72fcd..45a1b66753484 100644 --- a/base/Enums.jl +++ b/base/Enums.jl @@ -27,7 +27,7 @@ Base.read(io::IO, ::Type{T}) where {T<:Enum} = T(read(io, basetype(T))) Compute hash for an enum value `x`. This internal method will be specialized for every enum type created through [`@enum`](@ref). """ -_enum_hash(x::Enum, h::UInt) = hash(x, h) +_enum_hash(x::Enum, h::UInt) = invoke(hash, Tuple{Any, UInt}, x, h) Base.hash(x::Enum, h::UInt) = _enum_hash(x, h) Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y)) diff --git a/test/enums.jl b/test/enums.jl index 757aa26a061be..6eb9360e08a23 100644 --- a/test/enums.jl +++ b/test/enums.jl @@ -184,6 +184,10 @@ end @enum HashEnum3 Enum3_a=1 @test which(hash, (HashEnum3, UInt)).sig != Tuple{typeof(hash), HashEnum3, UInt64} +# Check that generic `hash` on custom enum subtypes works. +struct HashEnum4 <: Enum{Int} end +@test hash(HashEnum4(), zero(UInt)) == invoke(hash, Tuple{Any, UInt}, HashEnum4(), zero(UInt)) + @test (Vector{Fruit}(undef, 3) .= apple) == [apple, apple, apple] # long, discongruous From fb3df69d2372921aa19137d24bbca0212560ce99 Mon Sep 17 00:00:00 2001 From: romaindegivry Date: Fri, 26 May 2023 23:56:56 +0100 Subject: [PATCH 071/290] Improve error when indexing is interpreted as a typed comprehension (#49939) * Improve errors for typed_hcat by adding a special error for indexing that gets resolved as a typed comprehension. * Add a test for issue #49676 --- base/abstractarray.jl | 8 ++++++++ test/abstractarray.jl | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index e8d2e956b00f8..83c97f4932df5 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1632,6 +1632,14 @@ end typed_hcat(::Type{T}, A::AbstractVecOrMat...) where {T} = _typed_hcat(T, A) +# Catch indexing errors like v[i +1] (instead of v[i+1] or v[i + 1]), where indexing is +# interpreted as a typed concatenation. (issue #49676) +typed_hcat(::AbstractArray, other...) = throw(ArgumentError("It is unclear whether you \ + intend to perform an indexing operation or typed concatenation. If you intend to \ + perform indexing (v[1 + 2]), adjust spacing or insert missing operator to clarify. \ + If you intend to perform typed concatenation (T[1 2]), ensure that T is a type.")) + + hcat(A::AbstractVecOrMat...) = typed_hcat(promote_eltype(A...), A...) hcat(A::AbstractVecOrMat{T}...) where {T} = typed_hcat(T, A...) diff --git a/test/abstractarray.jl b/test/abstractarray.jl index c5ff97deb6777..7fc0652ae6488 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -682,6 +682,14 @@ function test_cat(::Type{TestAbstractArray}) @test Base.typed_hcat(Float64, B, B) == TSlow(b2hcat) @test Base.typed_hcat(Float64, B, B, B) == TSlow(b3hcat) + @testset "issue #49676, bad error message on v[1 +1]" begin + # This is here because all these expressions are handled by Base.typed_hcat + v = [1 2 3] + @test_throws ArgumentError v[1 +1] + @test_throws ArgumentError v[1 1] + @test_throws ArgumentError v[[1 2] [2 3]] + end + @test vcat(B1, B2) == TSlow(vcat([1:24...], [1:25...])) @test hcat(C1, C2) == TSlow([1 2 1 2 3; 3 4 4 5 6]) @test hcat(C1, C2, C1) == TSlow([1 2 1 2 3 1 2; 3 4 4 5 6 3 4]) From e727afb70d2a6937aa49814be6158f74f13fd7db Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 27 May 2023 06:36:10 -0500 Subject: [PATCH 072/290] Move reflection documentation from devdocs to base (#47800) --- doc/make.jl | 2 +- doc/src/{devdocs => base}/reflection.md | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename doc/src/{devdocs => base}/reflection.md (100%) diff --git a/doc/make.jl b/doc/make.jl index 3c69f4e6c47b5..a472c85e969f9 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -118,6 +118,7 @@ BaseDocs = [ "base/punctuation.md", "base/sort.md", "base/iterators.md", + "base/reflection.md", "base/c.md", "base/libc.md", "base/stacktraces.md", @@ -127,7 +128,6 @@ BaseDocs = [ StdlibDocs = [stdlib.targetfile for stdlib in STDLIB_DOCS] DevDocs = [ - "devdocs/reflection.md", "Documentation of Julia's Internals" => [ "devdocs/init.md", "devdocs/ast.md", diff --git a/doc/src/devdocs/reflection.md b/doc/src/base/reflection.md similarity index 100% rename from doc/src/devdocs/reflection.md rename to doc/src/base/reflection.md From 248ceda3c49bc82805b56a4f440cb48460e3eb74 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 27 May 2023 08:39:33 -0500 Subject: [PATCH 073/290] Abbreviate varinfo signature and re-order for consistency (#48860) --- stdlib/InteractiveUtils/src/InteractiveUtils.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl index 5fbe19ac4efce..25f06250c3f8f 100644 --- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl +++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl @@ -21,7 +21,7 @@ include("macros.jl") include("clipboard.jl") """ - varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int = 0) + varinfo(m::Module=Main, pattern::Regex=r""; all=false, imported=false, recursive=false, sortby::Symbol=:name, minsize::Int=0) Return a markdown table giving information about exported global variables in a module, optionally restricted to those matching `pattern`. @@ -37,7 +37,7 @@ The memory consumption estimate is an approximate lower bound on the size of the The output of `varinfo` is intended for display purposes only. See also [`names`](@ref) to get an array of symbols defined in a module, which is suitable for more general manipulations. """ -function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0) +function varinfo(m::Module=Base.active_module(), pattern::Regex=r""; all::Bool = false, imported::Bool = false, recursive::Bool = false, sortby::Symbol = :name, minsize::Int=0) sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`")) rows = Vector{Any}[] workqueue = [(m, ""),] From 0d5bd2f5baf47bd76f41453fda789d215bb3a0d8 Mon Sep 17 00:00:00 2001 From: "Navid C. Constantinou" Date: Mon, 29 May 2023 19:16:17 +1000 Subject: [PATCH 074/290] Add missing code formatting (#49983) --- doc/src/devdocs/boundscheck.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md index 0935257526885..7acd32f04dc75 100644 --- a/doc/src/devdocs/boundscheck.md +++ b/doc/src/devdocs/boundscheck.md @@ -28,10 +28,10 @@ end With a custom array-like type `MyArray` having: ```julia -@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A,i); A.data[to_index(i)]) +@inline getindex(A::MyArray, i::Real) = (@boundscheck checkbounds(A, i); A.data[to_index(i)]) ``` -Then when `getindex` is inlined into `sum`, the call to `checkbounds(A,i)` will be elided. If +Then when `getindex` is inlined into `sum`, the call to `checkbounds(A, i)` will be elided. If your function contains multiple layers of inlining, only `@boundscheck` blocks at most one level of inlining deeper are eliminated. The rule prevents unintended changes in program behavior from code further up the stack. @@ -57,7 +57,7 @@ with [`OffsetArrays`](@ref man-custom-indices): ```julia-repl julia> using OffsetArrays -julia> sum(OffsetArray([1,2,3], -10)) +julia> sum(OffsetArray([1, 2, 3], -10)) 9164911648 # inconsistent results or segfault ``` @@ -123,4 +123,4 @@ the last argument). ## Emit bounds checks -Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect @inbounds declarations. +Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect `@inbounds` declarations. From 1cc10a604045b3d665b496e761de7966aa49f435 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 29 May 2023 18:20:39 +0900 Subject: [PATCH 075/290] refactor `Base.StackTraces.show_spec_linfo` (#49951) This commit separates the `MethodInstance` handling of `Base.StackTraces.show_spec_linfo` into `Base.StackTraces.show_spec_sig`, which can be generally useful for external code inspection tools or debuggers (like JET.jl). --- base/stacktraces.jl | 67 +++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/base/stacktraces.jl b/base/stacktraces.jl index 23dadca8c8fa5..523ca827897d5 100644 --- a/base/stacktraces.jl +++ b/base/stacktraces.jl @@ -319,42 +319,45 @@ function show_spec_linfo(io::IO, frame::StackFrame) print(io, "top-level scope") elseif linfo isa Module Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true) - else - def, sig = if linfo isa MethodInstance - linfo.def, linfo.specTypes + elseif linfo isa MethodInstance + def = linfo.def + if def isa Module + Base.show_mi(io, linfo, #=from_stackframe=#true) else - linfo, linfo.sig + show_spec_sig(io, def, linfo.specTypes) end - if def isa Method - if get(io, :limit, :false)::Bool - if !haskey(io, :displaysize) - io = IOContext(io, :displaysize => displaysize(io)) - end - end - argnames = Base.method_argnames(def) - argnames = replace(argnames, :var"#unused#" => :var"") - if def.nkw > 0 - # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...) - kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+def.nkw) ] - uw = Base.unwrap_unionall(sig)::DataType - pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(def.nkw+2):end]...}, sig) - kwnames = argnames[2:(def.nkw+1)] - for i = 1:length(kwnames) - str = string(kwnames[i])::String - if endswith(str, "...") - kwnames[i] = Symbol(str[1:end-3]) - end - end - Base.show_tuple_as_call(io, def.name, pos_sig; - demangle=true, - kwargs=zip(kwnames, kwarg_types), - argnames=argnames[def.nkw+2:end]) - else - Base.show_tuple_as_call(io, def.name, sig; demangle=true, argnames) + else + m = linfo::Method + show_spec_sig(io, m, m.sig) + end +end + +function show_spec_sig(io::IO, m::Method, @nospecialize(sig::Type)) + if get(io, :limit, :false)::Bool + if !haskey(io, :displaysize) + io = IOContext(io, :displaysize => displaysize(io)) + end + end + argnames = Base.method_argnames(m) + argnames = replace(argnames, :var"#unused#" => :var"") + if m.nkw > 0 + # rearrange call kw_impl(kw_args..., func, pos_args...) to func(pos_args...; kw_args) + kwarg_types = Any[ fieldtype(sig, i) for i = 2:(1+m.nkw) ] + uw = Base.unwrap_unionall(sig)::DataType + pos_sig = Base.rewrap_unionall(Tuple{uw.parameters[(m.nkw+2):end]...}, sig) + kwnames = argnames[2:(m.nkw+1)] + for i = 1:length(kwnames) + str = string(kwnames[i])::String + if endswith(str, "...") + kwnames[i] = Symbol(str[1:end-3]) end - else - Base.show_mi(io, linfo, true) end + Base.show_tuple_as_call(io, m.name, pos_sig; + demangle=true, + kwargs=zip(kwnames, kwarg_types), + argnames=argnames[m.nkw+2:end]) + else + Base.show_tuple_as_call(io, m.name, sig; demangle=true, argnames) end end From 957972e5e74d4b302a229bea8299a8fa1059f1d0 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 29 May 2023 16:21:26 -0300 Subject: [PATCH 076/290] Expose the Julia JIT with a C API (#49858) --- src/Makefile | 2 +- src/codegen-stubs.c | 23 +++++++ src/jitlayers.cpp | 44 ++++++++++++- src/jitlayers.h | 14 +++- src/jl_exported_funcs.inc | 12 ++++ src/julia.expmap | 1 + src/llvm_api.cpp | 133 ++++++++++++++++++++++++++++++++++++++ test/llvmcall2.jl | 13 ++++ 8 files changed, 237 insertions(+), 5 deletions(-) create mode 100644 src/llvm_api.cpp diff --git a/src/Makefile b/src/Makefile index 382e904818838..f8cf55d35c667 100644 --- a/src/Makefile +++ b/src/Makefile @@ -56,7 +56,7 @@ CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \ llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \ llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \ - llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline + llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) CG_LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 365ddec47df42..9de831fa30a9c 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -138,3 +138,26 @@ JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE + +//LLVM C api to the julia JIT +JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetJuliaOJIT_fallback(void) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetExternalJITDylib_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITAddObjectFile_fallback(void* JIT, void* JD, void* ObjBuffer) UNAVAILABLE + +JL_DLLEXPORT void* JLJITAddLLVMIRModule_fallback(void* JIT, void* JD, void* TSM) UNAVAILABLE + +JL_DLLEXPORT void* JLJITLookup_fallback(void* JIT, void* Result, const char *Name) UNAVAILABLE + +JL_DLLEXPORT void* JLJITMangleAndIntern_fallback(void* JIT, const char *Name) UNAVAILABLE + +JL_DLLEXPORT const char *JLJITGetTripleString_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT const char JLJITGetGlobalPrefix_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT const char *JLJITGetDataLayoutString_fallback(void* JIT) UNAVAILABLE + +JL_DLLEXPORT void* JLJITGetIRCompileLayer_fallback(void* JIT) UNAVAILABLE diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 780f5d91847e0..c613180522ecc 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1299,6 +1299,7 @@ JuliaOJIT::JuliaOJIT() ES(cantFail(orc::SelfExecutorProcessControl::Create())), GlobalJD(ES.createBareJITDylib("JuliaGlobals")), JD(ES.createBareJITDylib("JuliaOJIT")), + ExternalJD(ES.createBareJITDylib("JuliaExternal")), ContextPool([](){ auto ctx = std::make_unique(); return orc::ThreadSafeContext(std::move(ctx)); @@ -1323,7 +1324,9 @@ JuliaOJIT::JuliaOJIT() std::make_unique(LockLayer, *TM, 2, PrintLLVMTimers), std::make_unique(LockLayer, *TM, 3, PrintLLVMTimers), }, - OptSelLayer(Pipelines) + OptSelLayer(Pipelines), + ExternalCompileLayer(ES, LockLayer, + std::make_unique(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM, 2)) { #ifdef JL_USE_JITLINK # if defined(LLVM_SHLIB) @@ -1395,6 +1398,9 @@ JuliaOJIT::JuliaOJIT() } JD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + JD.addToLinkOrder(ExternalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + ExternalJD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); + ExternalJD.addToLinkOrder(JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly); #if JULIA_FLOAT16_ABI == 1 orc::SymbolAliasMap jl_crt = { @@ -1494,10 +1500,34 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) } } +Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize) +{ + if (auto Err = TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT -> Error + { + if (M.getDataLayout().isDefault()) + M.setDataLayout(DL); + if (M.getDataLayout() != DL) + return make_error( + "Added modules have incompatible data layouts: " + + M.getDataLayout().getStringRepresentation() + " (module) vs " + + DL.getStringRepresentation() + " (jit)", + inconvertibleErrorCode()); + + return Error::success(); + })) + return Err; + return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM)); +} + +Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr Obj) { + assert(Obj && "Can not add null object"); + return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj)); +} + JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - orc::JITDylib* SearchOrders[2] = {&JD, &GlobalJD}; - ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 2 : 1); + orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; + ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); auto Sym = ES.lookup(SearchOrder, Name); if (Sym) return *Sym; @@ -1509,6 +1539,14 @@ JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name) return findSymbol(getMangledName(Name), true); } +Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) +{ + orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; + ArrayRef SearchOrder = makeArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); + auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); + return Sym; +} + uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name) { auto addr = findSymbol(getMangledName(Name), false); diff --git a/src/jitlayers.h b/src/jitlayers.h index c056a6b3418a3..6d864640a2e24 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -475,6 +475,16 @@ class JuliaOJIT { void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT; void addModule(orc::ThreadSafeModule M) JL_NOTSAFEPOINT; + //Methods for the C API + Error addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, + bool ShouldOptimize = false) JL_NOTSAFEPOINT; + Error addObjectFile(orc::JITDylib &JD, + std::unique_ptr Obj) JL_NOTSAFEPOINT; + Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; + orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; }; + orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; } + orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; } + JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; JL_JITSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; @@ -523,7 +533,7 @@ class JuliaOJIT { orc::ExecutionSession ES; orc::JITDylib &GlobalJD; orc::JITDylib &JD; - + orc::JITDylib &ExternalJD; //Map and inc are guarded by RLST_mutex std::mutex RLST_mutex{}; int RLST_inc = 0; @@ -548,6 +558,8 @@ class JuliaOJIT { LockLayerT LockLayer; const std::array, 4> Pipelines; OptSelLayerT OptSelLayer; + CompileLayerT ExternalCompileLayer; + }; extern JuliaOJIT *jl_ExecutionEngine; std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT; diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index f79537d419b90..b2216d10fb27d 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -573,5 +573,17 @@ YY(LLVMExtraAddGCInvariantVerifierPass) \ YY(LLVMExtraAddDemoteFloat16Pass) \ YY(LLVMExtraAddCPUFeaturesPass) \ + YY(JLJITGetLLVMOrcExecutionSession) \ + YY(JLJITGetJuliaOJIT) \ + YY(JLJITGetExternalJITDylib) \ + YY(JLJITAddObjectFile) \ + YY(JLJITAddLLVMIRModule) \ + YY(JLJITLookup) \ + YY(JLJITMangleAndIntern) \ + YY(JLJITGetTripleString) \ + YY(JLJITGetGlobalPrefix) \ + YY(JLJITGetDataLayoutString) \ + YY(JLJITGetIRCompileLayer) \ + // end of file diff --git a/src/julia.expmap b/src/julia.expmap index 94b955e95981f..447c3c4d8a5f5 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -31,6 +31,7 @@ _Z22jl_coverage_alloc_lineN4llvm9StringRefEi; _Z22jl_malloc_data_pointerN4llvm9StringRefEi; LLVMExtra*; + JLJIT*; llvmGetPassPluginInfo; /* Make visible so that linker will merge duplicate definitions across DSO boundaries */ diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp new file mode 100644 index 0000000000000..6d7b9b143ff0a --- /dev/null +++ b/src/llvm_api.cpp @@ -0,0 +1,133 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#undef DEBUG +#include "llvm-version.h" +#include "platform.h" + +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#define __STDC_CONSTANT_MACROS +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +namespace orc { +class OrcV2CAPIHelper { +public: + using PoolEntry = orc::SymbolStringPtr::PoolEntry; + using PoolEntryPtr = orc::SymbolStringPtr::PoolEntryPtr; + + // Move from SymbolStringPtr to PoolEntryPtr (no change in ref count). + static PoolEntryPtr moveFromSymbolStringPtr(SymbolStringPtr S) + { + PoolEntryPtr Result = nullptr; + std::swap(Result, S.S); + return Result; + } +}; +} // namespace orc +} // namespace llvm + + +typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef; +typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry, + LLVMOrcSymbolStringPoolEntryRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility, + LLVMOrcMaterializationResponsibilityRef) +extern "C" { + +JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void) +{ + return wrap(jl_ExecutionEngine); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcExecutionSessionRef +JLJITGetLLVMOrcExecutionSession_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getExecutionSession()); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcJITDylibRef +JLJITGetExternalJITDylib_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getExternalJITDylib()); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddObjectFile_impl( + JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMMemoryBufferRef ObjBuffer) +{ + return wrap(unwrap(JIT)->addObjectFile( + *unwrap(JD), std::unique_ptr(unwrap(ObjBuffer)))); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef JLJITAddLLVMIRModule_impl( + JuliaOJITRef JIT, LLVMOrcJITDylibRef JD, LLVMOrcThreadSafeModuleRef TSM) +{ + std::unique_ptr TmpTSM(unwrap(TSM)); + return wrap(unwrap(JIT)->addExternalModule(*unwrap(JD), std::move(*TmpTSM))); +} + +JL_DLLEXPORT_CODEGEN LLVMErrorRef +JLJITLookup_impl(JuliaOJITRef JIT, LLVMOrcExecutorAddress *Result, + const char *Name, int ExternalJDOnly) +{ + auto Sym = unwrap(JIT)->findExternalJDSymbol(Name, ExternalJDOnly); + if (Sym) { + auto addr = Sym->getAddress(); + *Result = orc::ExecutorAddr(addr).getValue(); + return LLVMErrorSuccess; + } + else { + *Result = 0; + return wrap(Sym.takeError()); + } +} + +JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef +JLJITMangleAndIntern_impl(JuliaOJITRef JIT, + const char *Name) +{ + return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name))); +} + +JL_DLLEXPORT_CODEGEN const char * +JLJITGetTripleString_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getTargetTriple().str().c_str(); +} + +JL_DLLEXPORT_CODEGEN const char +JLJITGetGlobalPrefix_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getDataLayout().getGlobalPrefix(); +} + +JL_DLLEXPORT_CODEGEN const char * +JLJITGetDataLayoutString_impl(JuliaOJITRef JIT) +{ + return unwrap(JIT)->getDataLayout().getStringRepresentation().c_str(); +} + +JL_DLLEXPORT_CODEGEN LLVMOrcIRCompileLayerRef +JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT) +{ + return wrap(&unwrap(JIT)->getIRCompileLayer()); +} + +} // extern "C" diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl index 8926b962a35c6..07b27fc407433 100644 --- a/test/llvmcall2.jl +++ b/test/llvmcall2.jl @@ -60,3 +60,16 @@ let err = ErrorException("llvmcall only supports intrinsic calls") @test_throws err (@eval ccall("llvm.floor.f64", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0 @test_throws err (@eval ccall("llvm.floor", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0 end + +@testset "JLJIT API" begin + function JLJITGetJuliaOJIT() + ccall(:JLJITGetJuliaOJIT, Ptr{Cvoid}, ()) + end + function JLJITGetTripleString(JIT) + ccall(:JLJITGetTripleString, Cstring, (Ptr{Cvoid},), JIT) + end + jit = JLJITGetJuliaOJIT() + str = JLJITGetTripleString(jit) + jl_str = unsafe_string(str) + @test length(jl_str) > 4 +end From 18d02c249a90a7c2d25ac5a99a14cf7a414a3295 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Mon, 29 May 2023 18:23:15 -0500 Subject: [PATCH 077/290] Use `top_set_bit` to optimize `hash(Real)` (#49986) --- base/float.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/float.jl b/base/float.jl index eb30c087c0f8c..3ea45fd140176 100644 --- a/base/float.jl +++ b/base/float.jl @@ -687,7 +687,7 @@ function hash(x::Real, h::UInt) # handle values representable as Int64, UInt64, Float64 if den == 1 - left = ndigits0z(num,2) + pow + left = top_set_bit(num) + pow right = trailing_zeros(num) + pow if -1074 <= right if 0 <= right && left <= 64 From ed5bd4c9553b757155b559744c8ce7a6ab513bf7 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 30 May 2023 15:09:08 +0900 Subject: [PATCH 078/290] Revert "Use `top_set_bit` to optimize `hash(Real)` (#49986)" (#49993) This reverts commit 18d02c249a90a7c2d25ac5a99a14cf7a414a3295. --- base/float.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/float.jl b/base/float.jl index 3ea45fd140176..eb30c087c0f8c 100644 --- a/base/float.jl +++ b/base/float.jl @@ -687,7 +687,7 @@ function hash(x::Real, h::UInt) # handle values representable as Int64, UInt64, Float64 if den == 1 - left = top_set_bit(num) + pow + left = ndigits0z(num,2) + pow right = trailing_zeros(num) + pow if -1074 <= right if 0 <= right && left <= 64 From 20752db7c1e78c58206c1480533c25b3ae4a7fc8 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 30 May 2023 10:23:39 -0600 Subject: [PATCH 079/290] thread safety: lock around atexit_hooks global (#49868) Ensure the lock is precise, so that we are allowed to register new atexit hooks from inside an atexit hook. But then disable `atexit()` when shutting down after it finishes running. Add tests that cover all the cases: 1. registering a hook from inside a hook 2. registering a hook from another thread while hooks are running 3. attempting to register a hook after all hooks have finished (disallowed) Fixes #49841 Co-authored-by: Jameson Nash --- base/initdefs.jl | 35 +++++++++++++++-- test/atexit.jl | 99 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 5 deletions(-) diff --git a/base/initdefs.jl b/base/initdefs.jl index 002984b83dd97..8a5f9e440a089 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -354,6 +354,7 @@ const atexit_hooks = Callable[ () -> Filesystem.temp_cleanup_purge(force=true) ] const _atexit_hooks_lock = ReentrantLock() +global _atexit_hooks_finished::Bool = false """ atexit(f) @@ -374,12 +375,40 @@ exit code `n` (instead of the original exit code). If more than one exit hook calls `exit(n)`, then Julia will exit with the exit code corresponding to the last called exit hook that calls `exit(n)`. (Because exit hooks are called in LIFO order, "last called" is equivalent to "first registered".) + +Note: Once all exit hooks have been called, no more exit hooks can be registered, +and any call to `atexit(f)` after all hooks have completed will throw an exception. +This situation may occur if you are registering exit hooks from background Tasks that +may still be executing concurrently during shutdown. """ -atexit(f::Function) = Base.@lock _atexit_hooks_lock (pushfirst!(atexit_hooks, f); nothing) +function atexit(f::Function) + Base.@lock _atexit_hooks_lock begin + _atexit_hooks_finished && error("cannot register new atexit hook; already exiting.") + pushfirst!(atexit_hooks, f) + return nothing + end +end function _atexit(exitcode::Cint) - while !isempty(atexit_hooks) - f = popfirst!(atexit_hooks) + # Don't hold the lock around the iteration, just in case any other thread executing in + # parallel tries to register a new atexit hook while this is running. We don't want to + # block that thread from proceeding, and we can allow it to register its hook which we + # will immediately run here. + while true + local f + Base.@lock _atexit_hooks_lock begin + # If this is the last iteration, atomically disable atexit hooks to prevent + # someone from registering a hook that will never be run. + # (We do this inside the loop, so that it is atomic: no one can have registered + # a hook that never gets run, and we run all the hooks we know about until + # the vector is empty.) + if isempty(atexit_hooks) + global _atexit_hooks_finished = true + break + end + + f = popfirst!(atexit_hooks) + end try if hasmethod(f, (Cint,)) f(exitcode) diff --git a/test/atexit.jl b/test/atexit.jl index bf46edae6eaad..5b4fbc0b44a40 100644 --- a/test/atexit.jl +++ b/test/atexit.jl @@ -4,8 +4,9 @@ using Test @testset "atexit.jl" begin function _atexit_tests_gen_cmd_eval(expr::String) + # We run the atexit tests with 2 threads, for the parallelism tests at the end. cmd_eval = ``` - $(Base.julia_cmd()) -e $(expr) + $(Base.julia_cmd()) -t2 -e $(expr) ``` return cmd_eval end @@ -13,8 +14,9 @@ using Test script, io = mktemp(temp_dir) println(io, expr) close(io) + # We run the atexit tests with 2 threads, for the parallelism tests at the end. cmd_script = ``` - $(Base.julia_cmd()) $(script) + $(Base.julia_cmd()) -t2 $(script) ``` return cmd_script end @@ -172,5 +174,98 @@ using Test @test p_script.exitcode == expected_exit_code end end + @testset "test calling atexit() in parallel with running atexit hooks." begin + # These tests cover 3 parallelism cases, as described by the following comments. + julia_expr_list = Dict( + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 1. registering a hook from inside a hook + """ + atexit() do + atexit() do + exit(11) + end + end + # This will attempt to exit 0, but the execution of the atexit hook will + # register another hook, which will exit 11. + exit(0) + """ => 11, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 2. registering a hook from another thread while hooks are running + """ + c = Channel() + # This hook must execute _last_. (Execution is LIFO.) + atexit() do + put!(c, nothing) + put!(c, nothing) + end + atexit() do + # This will run in a concurrent task, testing that we can register atexit + # hooks from another task while running atexit hooks. + Threads.@spawn begin + Core.println("INSIDE") + take!(c) # block on c + Core.println("go") + atexit() do + Core.println("exit11") + exit(11) + end + take!(c) # keep the _atexit() loop alive until we've added another item. + Core.println("done") + end + end + exit(0) + """ => 11, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # 3. attempting to register a hook after all hooks have finished (disallowed) + """ + const atexit_has_finished = Threads.Atomic{Bool}(false) + atexit() do + Threads.@spawn begin + # Block until the atexit hooks have all finished. We use a manual "spin + # lock" because task switch is disallowed inside the finalizer, below. + while !atexit_has_finished[] end + Core.println("done") + try + # By the time this runs, all the atexit hooks will be done. + # So this will throw. + atexit() do + exit(11) + end + catch + # Meaning we _actually_ exit 22. + exit(22) + end + end + end + # Finalizers run after the atexit hooks, so this blocks exit until the spawned + # task above gets a chance to run. + x = [] + finalizer(x) do x + Core.println("FINALIZER") + # Allow the spawned task to finish + atexit_has_finished[] = true + Core.println("ready") + # Then spin forever to prevent exit. + while atexit_has_finished[] end + Core.println("exiting") + end + exit(0) + """ => 22, + # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ) + for julia_expr in keys(julia_expr_list) + cmd_eval = _atexit_tests_gen_cmd_eval(julia_expr) + cmd_script = _atexit_tests_gen_cmd_script(atexit_temp_dir, julia_expr) + expected_exit_code = julia_expr_list[julia_expr] + @test_throws(ProcessFailedException, run(cmd_eval)) + @test_throws(ProcessFailedException, run(cmd_script)) + p_eval = run(cmd_eval; wait = false) + p_script = run(cmd_script; wait = false) + wait(p_eval) + wait(p_script) + @test p_eval.exitcode == expected_exit_code + @test p_script.exitcode == expected_exit_code + end + end rm(atexit_temp_dir; force = true, recursive = true) end From f6f637adf6fa6d2c499c04020c06ae457d2e1d05 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 30 May 2023 15:09:40 -0400 Subject: [PATCH 080/290] cleanup: some fixes from effort to break up monolithic sysimg build (#49953) From #38119 --- base/Base.jl | 5 ++- base/initdefs.jl | 6 +-- base/sysimg.jl | 15 +++---- base/timing.jl | 9 ++-- contrib/generate_precompile.jl | 76 +++++++++++++++++++--------------- pkgimage.mk | 9 ++++ sysimage.mk | 8 +++- test/Makefile | 5 +++ test/cmdlineargs.jl | 7 ++-- test/loading.jl | 4 +- test/precompile.jl | 1 + 11 files changed, 88 insertions(+), 57 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 65abe47f33d2d..8c5b8e13d3fb5 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -6,7 +6,7 @@ using Core.Intrinsics, Core.IR # to start, we're going to use a very simple definition of `include` # that doesn't require any function (except what we can get from the `Core` top-module) -const _included_files = Array{Tuple{Module,String},1}() +const _included_files = Array{Tuple{Module,String},1}(Core.undef, 1) function include(mod::Module, path::String) ccall(:jl_array_grow_end, Cvoid, (Any, UInt), _included_files, UInt(1)) Core.arrayset(true, _included_files, (mod, ccall(:jl_prepend_cwd, Any, (Any,), path)), arraylen(_included_files)) @@ -607,5 +607,8 @@ end end +# Ensure this file is also tracked +@assert !isassigned(_included_files, 1) +_included_files[1] = (parentmodule(Base), abspath(@__FILE__)) end # baremodule Base diff --git a/base/initdefs.jl b/base/initdefs.jl index 8a5f9e440a089..d1d37839a7c13 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -93,6 +93,7 @@ function append_default_depot_path!(DEPOT_PATH) path in DEPOT_PATH || push!(DEPOT_PATH, path) path = abspath(Sys.BINDIR, "..", "share", "julia") path in DEPOT_PATH || push!(DEPOT_PATH, path) + return DEPOT_PATH end function init_depot_path() @@ -111,6 +112,7 @@ function init_depot_path() else append_default_depot_path!(DEPOT_PATH) end + nothing end ## LOAD_PATH & ACTIVE_PROJECT ## @@ -220,9 +222,7 @@ function parse_load_path(str::String) end function init_load_path() - if Base.creating_sysimg - paths = ["@stdlib"] - elseif haskey(ENV, "JULIA_LOAD_PATH") + if haskey(ENV, "JULIA_LOAD_PATH") paths = parse_load_path(ENV["JULIA_LOAD_PATH"]) else paths = filter!(env -> env !== nothing, diff --git a/base/sysimg.jl b/base/sysimg.jl index b0eeffa5757ba..09ea015b0f903 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -9,11 +9,9 @@ using Base.MainInclude # ans, err, and sometimes Out import Base.MainInclude: eval, include # Ensure this file is also tracked -pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "Base.jl"))) -pushfirst!(Base._included_files, (@__MODULE__, joinpath(@__DIR__, "sysimg.jl"))) +pushfirst!(Base._included_files, (@__MODULE__, abspath(@__FILE__))) # set up depot & load paths to be able to find stdlib packages -@eval Base creating_sysimg = true Base.init_depot_path() Base.init_load_path() @@ -82,7 +80,7 @@ let m = Module() GC.@preserve m begin print_time = @eval m (mod, t) -> (print(rpad(string(mod) * " ", $maxlen + 3, "─")); - Base.time_print(t * 10^9); println()) + Base.time_print(stdout, t * 10^9); println()) print_time(Base, (Base.end_base_include - Base.start_base_include) * 10^(-9)) Base._track_dependencies[] = true @@ -104,7 +102,6 @@ let empty!(Core.ARGS) empty!(Base.ARGS) empty!(LOAD_PATH) - @eval Base creating_sysimg = false Base.init_load_path() # want to be able to find external packages in userimg.jl ccall(:jl_clear_implicit_imports, Cvoid, (Any,), Main) @@ -114,12 +111,12 @@ let tot_time = tot_time_base + tot_time_stdlib + tot_time_userimg println("Sysimage built. Summary:") - print("Base ──────── "); Base.time_print(tot_time_base * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base / tot_time) * 100); println("%") - print("Stdlibs ───── "); Base.time_print(tot_time_stdlib * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib / tot_time) * 100); println("%") + print("Base ──────── "); Base.time_print(stdout, tot_time_base * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_base / tot_time) * 100); println("%") + print("Stdlibs ───── "); Base.time_print(stdout, tot_time_stdlib * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_stdlib / tot_time) * 100); println("%") if isfile("userimg.jl") - print("Userimg ───── "); Base.time_print(tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%") + print("Userimg ───── "); Base.time_print(stdout, tot_time_userimg * 10^9); print(" "); show(IOContext(stdout, :compact=>true), (tot_time_userimg / tot_time) * 100); println("%") end - print("Total ─────── "); Base.time_print(tot_time * 10^9); println(); + print("Total ─────── "); Base.time_print(stdout, tot_time * 10^9); println(); empty!(LOAD_PATH) empty!(DEPOT_PATH) diff --git a/base/timing.jl b/base/timing.jl index 3e1f3a3451149..7428fd36c6253 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -135,7 +135,7 @@ function format_bytes(bytes) # also used by InteractiveUtils end end -function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true) +function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true) timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6) str = sprint() do io _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "") @@ -169,8 +169,9 @@ function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, re print(io, ": ", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% of which was recompilation") end parens && print(io, ")") + newline && print(io, "\n") end - newline ? println(str) : print(str) + print(io, str) nothing end @@ -178,7 +179,7 @@ function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad) allocs = gc_alloc_count(diff) compile_time = first(compile_times) recompile_time = last(compile_times) - time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad) + time_print(stdout, elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad) padded_nonzero_print(elapsedtime, "elapsed time (ns)") padded_nonzero_print(diff.total_time, "gc time (ns)") padded_nonzero_print(diff.allocd, "bytes allocated") @@ -279,7 +280,7 @@ macro time(msg, ex) local _msg = $(esc(msg)) local has_msg = !isnothing(_msg) has_msg && print(_msg, ": ") - time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg) + time_print(stdout, elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg) val end end diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index e8901a7b462ea..8fa40e4920eea 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -1,13 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# Prevent this from putting anyting into the Main namespace +@eval Module() begin + if Threads.maxthreadid() != 1 @warn "Running this file with multiple Julia threads may lead to a build error" Threads.maxthreadid() end if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0" Sys.__init_build() -# Prevent this from being put into the Main namespace -@eval Module() begin if !isdefined(Base, :uv_eventloop) Base.reinit_stdio() end @@ -234,6 +235,13 @@ end ansi_enablecursor = "\e[?25h" ansi_disablecursor = "\e[?25l" +blackhole = Sys.isunix() ? "/dev/null" : "nul" +procenv = Dict{String,Any}( + "JULIA_HISTORY" => blackhole, + "JULIA_PROJECT" => nothing, # remove from environment + "JULIA_LOAD_PATH" => "@stdlib", + "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":", + "TERM" => "") generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printed start_time = time_ns() @@ -285,7 +293,9 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path))) $precompile_script """ - run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`) + p = run(pipeline(addenv(`$(julia_exepath()) -O0 --trace-compile=$tmp_proc --sysimage $sysimg + --cpu-target=native --startup-file=no --color=yes`, procenv), + stdin=IOBuffer(s), stdout=debug_output)) n_step1 = 0 for f in (tmp_prec, tmp_proc) isfile(f) || continue @@ -305,23 +315,15 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe # Collect statements from running a REPL process and replaying our REPL script touch(precompile_file) pts, ptm = open_fake_pty() - blackhole = Sys.isunix() ? "/dev/null" : "nul" if have_repl - cmdargs = ```--color=yes - -e 'import REPL; REPL.Terminals.is_precompiling[] = true' - ``` + cmdargs = `-e 'import REPL; REPL.Terminals.is_precompiling[] = true'` else cmdargs = `-e nothing` end - p = withenv("JULIA_HISTORY" => blackhole, - "JULIA_PROJECT" => nothing, # remove from environment - "JULIA_LOAD_PATH" => Sys.iswindows() ? "@;@stdlib" : "@:@stdlib", - "JULIA_PKG_PRECOMPILE_AUTO" => "0", - "TERM" => "") do - run(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg - --cpu-target=native --startup-file=no -i $cmdargs```, - pts, pts, pts; wait=false) - end + p = run(addenv(addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg + --cpu-target=native --startup-file=no --color=yes -i $cmdargs```, procenv), + "JULIA_PKG_PRECOMPILE_AUTO" => "0"), + pts, pts, pts; wait=false) Base.close_stdio(pts) # Prepare a background process to copy output from process until `pts` is closed output_copy = Base.BufferStream() @@ -452,18 +454,16 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe failed = length(statements) - n_succeeded print_state("step3" => string("F$n_succeeded", failed > 0 ? " ($failed failed)" : "")) println() - if have_repl - # Seems like a reasonable number right now, adjust as needed - # comment out if debugging script - n_succeeded > 1500 || @warn "Only $n_succeeded precompile statements" - end + # Seems like a reasonable number right now, adjust as needed + # comment out if debugging script + n_succeeded > (have_repl ? 900 : 90) || @warn "Only $n_succeeded precompile statements" fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.") fetch(step2) == :ok || throw("Step 2 of collecting precompiles failed.") tot_time = time_ns() - start_time println("Precompilation complete. Summary:") - print("Total ─────── "); Base.time_print(tot_time); println() + print("Total ─────── "); Base.time_print(stdout, tot_time); println() finally fancyprint && print(ansi_enablecursor) return @@ -474,22 +474,30 @@ generate_precompile_statements() # As a last step in system image generation, # remove some references to build time environment for a more reproducible build. Base.Filesystem.temp_cleanup_purge(force=true) -@eval Base PROGRAM_FILE = "" -@eval Sys begin - BINDIR = "" - STDLIB = "" -end -empty!(Base.ARGS) -empty!(Core.ARGS) -end # @eval -end # if +let stdout = Ref{IO}(stdout) + Base.PROGRAM_FILE = "" + Sys.BINDIR = "" + Sys.STDLIB = "" + empty!(Base.ARGS) + empty!(Core.ARGS) + empty!(Base.TOML_CACHE.d) + Base.TOML.reinit!(Base.TOML_CACHE.p, "") + + println("Outputting sysimage file...") + Base.stdout = Core.stdout + Base.stderr = Core.stderr -println("Outputting sysimage file...") -let pre_output_time = time_ns() # Print report after sysimage has been saved so all time spent can be captured + pre_output_time = time_ns() Base.postoutput() do output_time = time_ns() - pre_output_time - print("Output ────── "); Base.time_print(output_time); println() + let stdout = stdout[] + print(stdout, "Output ────── "); Base.time_print(stdout, output_time); println(stdout) + end + stdout[] = Core.stdout end end + +end # if +end # @eval diff --git a/pkgimage.mk b/pkgimage.mk index dcf9dd1303d47..0803a188851bb 100644 --- a/pkgimage.mk +++ b/pkgimage.mk @@ -5,7 +5,16 @@ include $(JULIAHOME)/Make.inc VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION) +# set some influential environment variables export JULIA_DEPOT_PATH := $(build_prefix)/share/julia +export JULIA_LOAD_PATH := @stdlib +unexport JULIA_PROJECT := +unexport JULIA_BINDIR := + +default: release +release: all-release +debug: all-debug +all: release debug $(JULIA_DEPOT_PATH): mkdir -p $@ diff --git a/sysimage.mk b/sysimage.mk index 7ed61d471a153..e5bbfad119131 100644 --- a/sysimage.mk +++ b/sysimage.mk @@ -76,8 +76,12 @@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAH define sysimg_builder $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji $$(JULIAHOME)/contrib/generate_precompile.jl @$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \ - if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \ - JULIA_NUM_THREADS=1 \ + if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) \ + WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \ + JULIA_LOAD_PATH='@stdlib' \ + JULIA_PROJECT= \ + JULIA_DEPOT_PATH=':' \ + JULIA_NUM_THREADS=1 \ $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \ --startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \ echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \ diff --git a/test/Makefile b/test/Makefile index 24e137a5b1492..88dbe5b2b4ed6 100644 --- a/test/Makefile +++ b/test/Makefile @@ -6,6 +6,11 @@ VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION) STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR) # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE +export JULIA_DEPOT_PATH := $(build_prefix)/share/julia +export JULIA_LOAD_PATH := @stdlib +unexport JULIA_PROJECT := +unexport JULIA_BINDIR := + TESTGROUPS = unicode strings compiler TESTS = all default stdlib $(TESTGROUPS) \ $(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \ diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 1d04926ef23af..7ebed56227d03 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -134,10 +134,11 @@ end let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` # tests for handling of ENV errors - let v = writereadpipeline("println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))", - setenv(`$exename -i -E 'empty!(LOAD_PATH); @isdefined InteractiveUtils'`, + let v = writereadpipeline( + "println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))", + setenv(`$exename -i -E '@assert isempty(LOAD_PATH); push!(LOAD_PATH, "@stdlib"); @isdefined InteractiveUtils'`, "JULIA_LOAD_PATH" => "", - "JULIA_DEPOT_PATH" => "", + "JULIA_DEPOT_PATH" => ";:", "HOME" => homedir())) @test v == ("false\nREPL: InteractiveUtilstrue\n", true) end diff --git a/test/loading.jl b/test/loading.jl index ea544c2635dbc..394c13c5f2962 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -692,7 +692,9 @@ mktempdir() do dir mkpath(vpath) script = "@assert startswith(Base.active_project(), $(repr(vpath)))" cmd = `$(Base.julia_cmd()) --startup-file=no -e $(script)` - cmd = addenv(cmd, "JULIA_DEPOT_PATH" => dir) + cmd = addenv(cmd, + "JULIA_DEPOT_PATH" => dir, + "JULIA_LOAD_PATH" => Sys.iswindows() ? ";" : ":") cmd = pipeline(cmd; stdout, stderr) @test success(cmd) end diff --git a/test/precompile.jl b/test/precompile.jl index de3510d49118d..9cb5a1d52d485 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -4,6 +4,7 @@ original_depot_path = copy(Base.DEPOT_PATH) original_load_path = copy(Base.LOAD_PATH) using Test, Distributed, Random +using REPL # doc lookup function Foo_module = :Foo4b3a94a1a081a8cb Foo2_module = :F2oo4b3a94a1a081a8cb From bd5e6da50f8e9937482bc4317fd571a725b39fde Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 30 May 2023 15:11:00 -0400 Subject: [PATCH 081/290] fix atomic intrinsics implementation issues (#49967) * jltypes: add missing GC root for cmpswap_type Tuple. This is called with a fieldtype, which might not even be a DataType. * support Ptr{Union{}} and Ptr{Cvoid} better --- src/intrinsics.cpp | 6 ++++- src/jltypes.c | 12 +++++----- src/runtime_intrinsics.c | 4 ++-- test/intrinsics.jl | 48 +++++++++++++++++++++++++++++++++++----- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 7bef27f477534..ee1ded5a51e44 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -926,7 +926,11 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl bool isboxed; Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed); assert(!isboxed); - Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); + Value *thePtr; + if (!type_is_ghost(ptrty)) + thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); + else + thePtr = nullptr; // could use any value here, since typed_store will not use it jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed, llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify"); if (issetfield) diff --git a/src/jltypes.c b/src/jltypes.c index 14a08d8c71db2..444923f600569 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -1411,7 +1411,7 @@ jl_datatype_t *jl_apply_modify_type(jl_value_t *dt) return rettyp; } -jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt) +jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *ty) { jl_value_t *params[2]; jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names); @@ -1422,12 +1422,12 @@ jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt) if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames)) names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames } - params[0] = dt; + params[0] = ty; params[1] = (jl_value_t*)jl_bool_type; - jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2); - JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE) - jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp); - JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE) + jl_value_t *tuptyp = jl_apply_tuple_type_v(params, 2); + JL_GC_PUSH1(&tuptyp); + jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, tuptyp); + JL_GC_POP(); return rettyp; } diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 9babdf89f098b..ed320aa9a6c35 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -429,6 +429,8 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp jl_atomic_error("atomic_pointerreplace: invalid atomic ordering"); // TODO: filter other invalid orderings jl_value_t *ety = jl_tparam0(jl_typeof(p)); + if (!is_valid_intrinsic_elptr(ety)) + jl_error("atomic_pointerreplace: invalid pointer"); char *pp = (char*)jl_unbox_long(p); jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety); JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE) @@ -447,8 +449,6 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp return result; } else { - if (!is_valid_intrinsic_elptr(ety)) - jl_error("atomic_pointerreplace: invalid pointer"); if (jl_typeof(x) != ety) jl_type_error("atomic_pointerreplace", ety, x); size_t nb = jl_datatype_size(ety); diff --git a/test/intrinsics.jl b/test/intrinsics.jl index aa2a9649857c4..35ce05b61dc24 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -214,14 +214,14 @@ swap(i, j) = j for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Complex{Int512}, Any) r = Ref{TT}(10) GC.@preserve r begin - (function (::Type{TT}) where TT + (@noinline function (::Type{TT}) where TT p = Base.unsafe_convert(Ptr{TT}, r) T(x) = convert(TT, x) S = UInt32 if TT !== Any @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) - @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) - @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(2), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(10), S(3), :sequentially_consistent, :sequentially_consistent) end @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] if sizeof(r) > 8 @@ -234,7 +234,10 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] else - TT !== Any && @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) + if TT !== Any + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(4), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(5)), T(10), :sequentially_consistent) + end @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10) @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1) @@ -248,10 +251,12 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102) @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false)) @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103) + @test Core.Intrinsics.atomic_pointermodify(p, Returns(T(105)), nothing, :sequentially_consistent) === Pair{TT,TT}(T(103), T(105)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(105) end if TT === Any - @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103) + @test Core.Intrinsics.atomic_pointermodify(p, swap, S(105), :sequentially_consistent) === Pair{TT,TT}(T(105), S(105)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(105) @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1) @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false)) @@ -262,6 +267,37 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co end end +for TT in (Ptr{Nothing}, Ptr) + r = Ref(nothing) + GC.@preserve r begin + p = Ref{TT}(Base.unsafe_convert(Ptr{Nothing}, r)) + (@noinline function (p::Ref) + p = p[] + S = UInt32 + @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, nothing, S(2), :sequentially_consistent, :sequentially_consistent) + @test Core.Intrinsics.pointerref(p, 1, 1) === nothing === r[] + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(1)), nothing, :sequentially_consistent) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, S(1), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), nothing, :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing) + @test Core.Intrinsics.atomic_pointermodify(p, Returns(nothing), S(1), :sequentially_consistent) === Pair{Nothing,Nothing}(nothing, nothing) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing + @test Core.Intrinsics.atomic_pointerreplace(p, S(100), nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing + end)(p,) + end +end + + mutable struct IntWrap <: Signed x::Int end From c63feee3bc3eb1f8433cb03a8cf85af39c0f05d3 Mon Sep 17 00:00:00 2001 From: Fabian Zickgraf Date: Tue, 30 May 2023 19:25:24 +0000 Subject: [PATCH 082/290] show: fix printing of "function (x...) end" (#49874) Previously, the parentheses were missing. --- base/show.jl | 10 ++++++++-- test/show.jl | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/base/show.jl b/base/show.jl index fa8c411588102..2930b4951dd0a 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2132,10 +2132,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In # block with argument elseif head in (:for,:while,:function,:macro,:if,:elseif,:let) && nargs==2 + if head === :function && is_expr(args[1], :...) + # fix printing of "function (x...) x end" + block_args = Expr(:tuple, args[1]) + else + block_args = args[1] + end if is_expr(args[2], :block) - show_block(IOContext(io, beginsym=>false), head, args[1], args[2], indent, quote_level) + show_block(IOContext(io, beginsym=>false), head, block_args, args[2], indent, quote_level) else - show_block(IOContext(io, beginsym=>false), head, args[1], Expr(:block, args[2]), indent, quote_level) + show_block(IOContext(io, beginsym=>false), head, block_args, Expr(:block, args[2]), indent, quote_level) end print(io, "end") diff --git a/test/show.jl b/test/show.jl index 76772c649a838..6949db4bb9956 100644 --- a/test/show.jl +++ b/test/show.jl @@ -1010,6 +1010,9 @@ test_mt(show_f5, "show_f5(A::AbstractArray{T, N}, indices::Vararg{$Int, N})") @test sprint(show, :(function f end)) == ":(function f end)" @test_repr "function g end" +# Printing of :(function (x...) end) +@test startswith(replstr(Meta.parse("function (x...) end")), ":(function (x...,)") + # Printing of macro definitions @test sprint(show, :(macro m end)) == ":(macro m end)" @test_repr "macro m end" From 318f0ead957a37e2898d71d457d7eab2a647bb13 Mon Sep 17 00:00:00 2001 From: Fons van der Plas Date: Tue, 30 May 2023 21:29:25 +0200 Subject: [PATCH 083/290] docs: first describe simple `read` and `write` without streaming (#49834) --- doc/src/manual/networking-and-streams.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md index 1ee2f33de5c23..00a10177b2155 100644 --- a/doc/src/manual/networking-and-streams.md +++ b/doc/src/manual/networking-and-streams.md @@ -120,7 +120,28 @@ of common properties. ## Working with Files -Like many other environments, Julia has an [`open`](@ref) function, which takes a filename and +You can write content to a file with the `write(filename::String, content)` method: + +```julia-repl +julia> write("hello.txt", "Hello, World!") +13 +``` + +_(`13` is the number of bytes written.)_ + +You can read the contents of a file with the `read(filename::String)` method, or `read(filename::String, String)` +to the contents as a string: + +```julia-repl +julia> read("hello.txt", String) +"Hello, World!" +``` + + +### Advanced: streaming files + +The `read` and `write` methods above allow you to read and write file contents. Like many other +environments, Julia also has an [`open`](@ref) function, which takes a filename and returns an [`IOStream`](@ref) object that you can use to read and write things from the file. For example, if we have a file, `hello.txt`, whose contents are `Hello, World!`: From d912d85c428f43f00c0932653a51236915116c18 Mon Sep 17 00:00:00 2001 From: Fons van der Plas Date: Tue, 30 May 2023 21:32:58 +0200 Subject: [PATCH 084/290] doc: simple docstring for `read(filename::AbstractString)` (#49836) --- base/io.jl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/base/io.jl b/base/io.jl index 9c00c57576bac..4163a5e803676 100644 --- a/base/io.jl +++ b/base/io.jl @@ -461,14 +461,18 @@ wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), convert(String, filename)::String, "w") """ - read(filename::AbstractString, args...) + read(filename::AbstractString) -Open a file and read its contents. `args` is passed to `read`: this is equivalent to -`open(io->read(io, args...), filename)`. +Read the entire contents of a file as a `Vector{UInt8}`. read(filename::AbstractString, String) Read the entire contents of a file as a string. + + read(filename::AbstractString, args...) + +Open a file and read its contents. `args` is passed to `read`: this is equivalent to +`open(io->read(io, args...), filename)`. """ read(filename::AbstractString, args...) = open(io->read(io, args...), convert(String, filename)::String) From 84bf67c8a25d15fd9f05f85918617794bc5c154f Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 30 May 2023 14:55:28 -0500 Subject: [PATCH 085/290] Support sorting iterators (#46104) * widen sort's type signature * throw on AbstractString * Throw on infinite iterator * make sort(::NTuple) return a tuple (use vector internally for sorting for large tuples) --- base/sort.jl | 42 ++++++++++++++++++++++++++++++++++++++++-- test/sorting.jl | 31 +++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 0e84657fc481e..b78f773ad9f72 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -5,7 +5,8 @@ module Sort using Base.Order using Base: copymutable, midpoint, require_one_based_indexing, uinttype, - sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit + sub_with_overflow, add_with_overflow, OneTo, BitSigned, BitIntegerType, top_set_bit, + IteratorSize, HasShape, IsInfinite, tail import Base: sort, @@ -1383,6 +1384,11 @@ end Variant of [`sort!`](@ref) that returns a sorted copy of `v` leaving `v` itself unmodified. +Uses `Base.copymutable` to support immutable collections and iterables. + +!!! compat "Julia 1.10" + `sort` of arbitrary iterables requires at least Julia 1.10. + # Examples ```jldoctest julia> v = [3, 1, 2]; @@ -1400,7 +1406,39 @@ julia> v 2 ``` """ -sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) +function sort(v; kws...) + size = IteratorSize(v) + size == HasShape{0}() && throw(ArgumentError("$v cannot be sorted")) + size == IsInfinite() && throw(ArgumentError("infinite iterator $v cannot be sorted")) + sort!(copymutable(v); kws...) +end +sort(v::AbstractVector; kws...) = sort!(copymutable(v); kws...) # for method disambiguation +sort(::AbstractString; kws...) = + throw(ArgumentError("sort(::AbstractString) is not supported")) +sort(::Tuple; kws...) = + throw(ArgumentError("sort(::Tuple) is only supported for NTuples")) + +function sort(x::NTuple{N}; lt::Function=isless, by::Function=identity, + rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where N + o = ord(lt,by,rev,order) + if N > 9 + v = sort!(copymutable(x), DEFAULT_STABLE, o) + tuple((v[i] for i in 1:N)...) + else + _sort(x, o) + end +end +_sort(x::Union{NTuple{0}, NTuple{1}}, o::Ordering) = x +function _sort(x::NTuple, o::Ordering) + a, b = Base.IteratorsMD.split(x, Val(length(x)>>1)) + merge(_sort(a, o), _sort(b, o), o) +end +merge(x::NTuple, y::NTuple{0}, o::Ordering) = x +merge(x::NTuple{0}, y::NTuple, o::Ordering) = y +merge(x::NTuple{0}, y::NTuple{0}, o::Ordering) = x # Method ambiguity +merge(x::NTuple, y::NTuple, o::Ordering) = + (lt(o, y[1], x[1]) ? (y[1], merge(x, tail(y), o)...) : (x[1], merge(tail(x), y, o)...)) + ## partialsortperm: the permutation to sort the first k elements of an array ## diff --git a/test/sorting.jl b/test/sorting.jl index ec1666dabb2fb..0528d9d81c296 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -88,6 +88,20 @@ end vcat(2000, (x:x+99 for x in 1900:-100:100)..., 1:99) end +function tuple_sort_test(x) + @test issorted(sort(x)) + length(x) > 9 && return # length > 9 uses a vector fallback + @test 0 == @allocated sort(x) +end +@testset "sort(::NTuple)" begin + @test sort((9,8,3,3,6,2,0,8)) == (0,2,3,3,6,8,8,9) + @test sort((9,8,3,3,6,2,0,8), by=x->x÷3) == (2,0,3,3,8,6,8,9) + for i in 1:40 + tuple_sort_test(tuple(rand(i)...)) + end + @test_throws ArgumentError sort((1,2,3.0)) +end + @testset "partialsort" begin @test partialsort([3,6,30,1,9],3) == 6 @test partialsort([3,6,30,1,9],3:4) == [6,9] @@ -530,6 +544,23 @@ end @test isequal(a, [8,6,7,NaN,5,3,0,9]) end +@testset "sort!(iterable)" begin + gen = (x % 7 + 0.1x for x in 1:50) + @test sort(gen) == sort!(collect(gen)) + gen = (x % 7 + 0.1y for x in 1:10, y in 1:5) + @test sort(gen; dims=1) == sort!(collect(gen); dims=1) + @test sort(gen; dims=2) == sort!(collect(gen); dims=2) + + @test_throws ArgumentError("dimension out of range") sort(gen; dims=3) + + @test_throws UndefKeywordError(:dims) sort(gen) + @test_throws UndefKeywordError(:dims) sort(collect(gen)) + @test_throws UndefKeywordError(:dims) sort!(collect(gen)) + + @test_throws ArgumentError sort("string") + @test_throws ArgumentError("1 cannot be sorted") sort(1) +end + @testset "sort!(::AbstractVector{<:Integer}) with short int range" begin a = view([9:-1:0;], :)::SubArray sort!(a) From c3a08fe749b85d4b854b67ca0123d26b4ad9c22a Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 30 May 2023 16:13:57 -0400 Subject: [PATCH 086/290] Makefile: For gcanalyze, use Clang's CFLAGS not GCC's It's supported to build the analysis plugin with GCC, but the flags we actually run it with need to match Clang. --- Make.inc | 60 ++++++++++++++++++++++++++++++++++++---------------- src/Makefile | 34 +++++++++++++++-------------- 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/Make.inc b/Make.inc index 35b0657de5aa2..301ee934320f6 100644 --- a/Make.inc +++ b/Make.inc @@ -486,37 +486,61 @@ MACOSX_VERSION_MIN := 11.0 endif endif -ifeq ($(USEGCC),1) -CC := $(CROSS_COMPILE)gcc -CXX := $(CROSS_COMPILE)g++ -JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 +JCFLAGS_COMMON := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 +JCFLAGS_CLANG := $(JCFLAGS_COMMON) +JCFLAGS_GCC := $(JCFLAGS_COMMON) + # AArch64 needs this flag to generate the .eh_frame used by libunwind -JCPPFLAGS := -fasynchronous-unwind-tables -JCXXFLAGS := -pipe $(fPIC) -fno-rtti -std=c++14 +JCPPFLAGS_COMMON := -fasynchronous-unwind-tables +JCPPFLAGS_CLANG := $(JCPPFLAGS_COMMON) +JCPPFLAGS_GCC := $(JCPPFLAGS_COMMON) + +JCXXFLAGS_COMMON := -pipe $(fPIC) -fno-rtti -std=c++14 +JCXXFLAGS_CLANG := $(JCXXFLAGS_COMMON) -pedantic +JCXXFLAGS_GCC := $(JCXXFLAGS_COMMON) + +DEBUGFLAGS_COMMON := -O0 -DJL_DEBUG_BUILD -fstack-protector +DEBUGFLAGS_CLANG := $(DEBUGFLAGS_COMMON) -g +DEBUGFLAGS_GCC := $(DEBUGFLAGS_COMMON) -ggdb2 + +SHIPFLAGS_COMMON := -O3 +SHIPFLAGS_CLANG := $(SHIPFLAGS_COMMON) -g +SHIPFLAGS_GCC := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions + +ifeq ($(OS), Darwin) +JCPPFLAGS_CLANG += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1 +endif + ifneq ($(OS), WINNT) # Do not enable on windows to avoid warnings from libuv. -JCXXFLAGS += -pedantic +JCXXFLAGS_GCC += -pedantic endif -DEBUGFLAGS := -O0 -ggdb2 -DJL_DEBUG_BUILD -fstack-protector -SHIPFLAGS := -O3 -ggdb2 -falign-functions + +ifeq ($(USEGCC),1) +CC := $(CROSS_COMPILE)gcc +CXX := $(CROSS_COMPILE)g++ +JCFLAGS := $(JCFLAGS_GCC) +JCPPFLAGS := $(JCPPFLAGS_GCC) +JCXXFLAGS := $(JCXXFLAGS_GCC) +DEBUGFLAGS := $(DEBUGFLAGS_GCC) +SHIPFLAGS := $(SHIPFLAGS_GCC) endif ifeq ($(USECLANG),1) -CC := $(CROSS_COMPILE)clang -CXX := $(CROSS_COMPILE)clang++ -JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -# AArch64 needs this flag to generate the .eh_frame used by libunwind -JCPPFLAGS := -fasynchronous-unwind-tables -JCXXFLAGS := -pipe $(fPIC) -fno-rtti -pedantic -std=c++14 -DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector -SHIPFLAGS := -O3 -g +CC := $(CROSS_COMPILE)clang +CXX := $(CROSS_COMPILE)clang++ +JCFLAGS := $(JCFLAGS_CLANG) +JCPPFLAGS := $(JCPPFLAGS_CLANG) +JCXXFLAGS := $(JCXXFLAGS_CLANG) +DEBUGFLAGS := $(DEBUGFLAGS_CLANG) +SHIPFLAGS := $(SHIPFLAGS_CLANG) + ifeq ($(OS), Darwin) CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN) CXX += -mmacosx-version-min=$(MACOSX_VERSION_MIN) FC += -mmacosx-version-min=$(MACOSX_VERSION_MIN) # export MACOSX_DEPLOYMENT_TARGET so that ld picks it up, especially for deps export MACOSX_DEPLOYMENT_TARGET=$(MACOSX_VERSION_MIN) -JCPPFLAGS += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1 endif endif diff --git a/src/Makefile b/src/Makefile index f8cf55d35c667..e561aefcdfe04 100644 --- a/src/Makefile +++ b/src/Makefile @@ -170,16 +170,18 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj) -SHIPFLAGS += $(FLAGS) -DEBUGFLAGS += $(FLAGS) - -# if not absolute, then relative to the directory of the julia executable -SHIPFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\"" -DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\"" - # Add SONAME defines so we can embed proper `dlopen()` calls. -SHIPFLAGS += "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)\"" -DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)\"" +ADDL_SHIPFLAGS := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\"" \ + "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL).$(JL_MAJOR_SHLIB_EXT)\"" +ADDL_DEBUGFLAGS := "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\"" \ + "-DJL_LIBJULIA_SONAME=\"$(LIBJULIA_PATH_REL)-debug.$(JL_MAJOR_SHLIB_EXT)\"" + +SHIPFLAGS += $(FLAGS) $(ADDL_SHIPFLAGS) +DEBUGFLAGS += $(FLAGS) $(ADDL_DEBUGFLAGS) +SHIPFLAGS_GCC += $(FLAGS) $(ADDL_SHIPFLAGS) +DEBUGFLAGS_GCC += $(FLAGS) $(ADDL_DEBUGFLAGS) +SHIPFLAGS_CLANG += $(FLAGS) $(ADDL_SHIPFLAGS) +DEBUGFLAGS_CLANG += $(FLAGS) $(ADDL_DEBUGFLAGS) ifeq ($(USE_CROSS_FLISP), 1) FLISPDIR := $(BUILDDIR)/flisp/host @@ -481,36 +483,36 @@ clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .F @$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \ -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c $<) + $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c $<) clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \ -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c++ $<) + $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -x c++ $<) clang-sa-%: JL_CXXFLAGS += -UNDEBUG clang-sa-%: $(SRCDIR)/%.c .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \ -Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c $<) + $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c $<) clang-sa-%: $(SRCDIR)/%.cpp .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \ -Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \ $(SA_EXCEPTIONS-$(notdir $<)) \ - $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c++ $<) + $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -Werror -x c++ $<) clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \ -load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \ - -- $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c) + -- $(CLANGSA_FLAGS) $(JCPPFLAGS_CLANG) $(JCFLAGS_CLANG) $(JL_CFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics -fno-caret-diagnostics -x c) clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check @$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \ -load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \ - -- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++) + -- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS_CLANG) $(JCXXFLAGS_CLANG) $(JL_CXXFLAGS) $(DEBUGFLAGS_CLANG) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++) # set the exports for the source files based on where they are getting linked -clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS += -DJL_LIBRARY_EXPORTS +clang-sa-% clang-sagc-% clang-tidy-%: DEBUGFLAGS_CLANG += -DJL_LIBRARY_EXPORTS # Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc` tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS))) From 7e25ebf5af1b7b346dc1f337e7ab5267c214e8e6 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Wed, 31 May 2023 05:32:18 -0400 Subject: [PATCH 087/290] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20?= =?UTF-8?q?Pkg=20stdlib=20from=20daf02a458=20to=209c01707a2=20(#50005)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🤖 [master] Bump the Pkg stdlib from daf02a458 to 9c01707a2 * remove Pkg.LazilyInitializedFields from docs --------- Co-authored-by: Dilum Aluthge Co-authored-by: Ian --- .../Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 | 1 + .../sha512 | 1 + .../Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 | 1 - .../sha512 | 1 - doc/make.jl | 6 ------ stdlib/Pkg.version | 2 +- 6 files changed, 3 insertions(+), 9 deletions(-) create mode 100644 deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 create mode 100644 deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 delete mode 100644 deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 diff --git a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 new file mode 100644 index 0000000000000..dc9660f97087f --- /dev/null +++ b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 @@ -0,0 +1 @@ +16da059096b4981e389884a5232fb3c6 diff --git a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 new file mode 100644 index 0000000000000..c7e946d70bbdb --- /dev/null +++ b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 @@ -0,0 +1 @@ +d162771efdbae9dd5c349319595b094577b4612c11b163cfbf16242a818769975cb7a8c5188c1cba75ae804b430d52e9a08c1f394286f2ae239bec7f8c7e3b71 diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 deleted file mode 100644 index 08f5ccda57979..0000000000000 --- a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -c135dc6ed97656fe956d9ee5cf3cbc55 diff --git a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 b/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 deleted file mode 100644 index 957075f0f281a..0000000000000 --- a/deps/checksums/Pkg-daf02a458ae6daa402a5dd6683c40d6910325c4e.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -2ae67fd4c5e1bf83df5df836fcd69afc0fb8454723043d32de9c7bc29feedf390adb76efda52e79937ea801ff21b5f4ea875469136424e2889904130b247b52a diff --git a/doc/make.jl b/doc/make.jl index a472c85e969f9..a9343a3133a63 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -265,12 +265,6 @@ DocMeta.setdocmeta!( maybe_revise(:(using Base.BinaryPlatforms)); recursive=true, warn=false, ) -DocMeta.setdocmeta!( - Pkg.LazilyInitializedFields, - :DocTestSetup, - maybe_revise(:(using Pkg.LazilyInitializedFields)); - recursive=true, warn=false, -) let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS) global const buildroot = i === nothing ? (@__DIR__) : first(match(r, ARGS[i]).captures) diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 7b5006f2141ff..d9eba65f4ce77 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = daf02a458ae6daa402a5dd6683c40d6910325c4e +PKG_SHA1 = 9c01707a20478b859e6fd995b5e16c82e7096d92 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From ca3270b06f41195c0e773a55ce0333bddbe9b461 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 31 May 2023 05:33:03 -0400 Subject: [PATCH 088/290] lowering: apply let hygiene in let environment (#49999) Not just a regression, since this test also fails on old versions, but #49897 applied the wrong environment to make this hygiene correct which broke it worse. Fix #49984 --- src/macroexpand.scm | 2 +- test/syntax.jl | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 89c9564e2e24a..e0e809eee08f1 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -447,7 +447,7 @@ newenv m parent-scope inarg)) ;; expand initial values in old env (resolve-expansion-vars- (caddr bind) env m parent-scope inarg)) - (resolve-expansion-vars- bind env m parent-scope inarg))) + (resolve-expansion-vars- bind newenv m parent-scope inarg))) binds)) ,body))) ((hygienic-scope) ; TODO: move this lowering to resolve-scopes, instead of reimplementing it here badly diff --git a/test/syntax.jl b/test/syntax.jl index aa854bfa0d19b..af9344c5217e3 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3479,3 +3479,7 @@ end @test @_macroexpand(global (; x::S, $(esc(:y))::$(esc(:T))) = a) == :(global (; x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a))) end + +# issue #49984 +macro z49984(s); :(let a; $(esc(s)); end); end +@test let a = 1; @z49984(a) === 1; end From da98f540724ccc2d1296091452c4565ed7cb3f17 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Wed, 31 May 2023 12:17:23 +0200 Subject: [PATCH 089/290] move `jl_timing_apply_env` to a later place where it uses initialized data Also, move it after `DISABLE_SUBSYSTEM` to avoid it clobbering the results. --- src/jlapi.c | 3 --- src/timing.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jlapi.c b/src/jlapi.c index 001253fed71a8..0dffaac627288 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -690,9 +690,6 @@ static void rr_detach_teleport(void) { JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[]) { #ifdef USE_TRACY - // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and - // JULIA_TIMING_METADATA_PRINT_LIMIT=20 - jl_timing_apply_env(); if (getenv("JULIA_WAIT_FOR_TRACY")) while (!TracyCIsConnected) jl_cpu_pause(); // Wait for connection #endif diff --git a/src/timing.c b/src/timing.c index ef57972ffc574..5f0548ed21c3c 100644 --- a/src/timing.c +++ b/src/timing.c @@ -131,6 +131,9 @@ void jl_init_timing(void) DISABLE_SUBSYSTEM(AST_UNCOMPRESS); #endif + // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and + // JULIA_TIMING_METADATA_PRINT_LIMIT=20 + jl_timing_apply_env(); } void jl_destroy_timing(void) From b050ca9a135bcfb0723662282deed4518567ee45 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Wed, 31 May 2023 12:17:45 +0200 Subject: [PATCH 090/290] only add an entry for `METHOD_MATCH` if it does some work --- src/gf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gf.c b/src/gf.c index e69e7f16756d5..ac2ac9ff4a7a6 100644 --- a/src/gf.c +++ b/src/gf.c @@ -3576,9 +3576,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int intersections, size_t world, int cache_result, size_t *min_valid, size_t *max_valid, int *ambig) { - JL_TIMING(METHOD_MATCH, METHOD_MATCH); if (world > jl_atomic_load_acquire(&jl_world_counter)) return jl_nothing; // the future is not enumerable + JL_TIMING(METHOD_MATCH, METHOD_MATCH); int has_ambiguity = 0; jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)type); assert(jl_is_datatype(unw)); From faca5bde2cffdc00fc8c7a97288741edde3b0d5d Mon Sep 17 00:00:00 2001 From: KristofferC Date: Wed, 31 May 2023 12:18:36 +0200 Subject: [PATCH 091/290] disable `TYPE_CACHE_INSERT` profile zone by default This one tend to be very fast, produce many entries and it tends to be not very interesting in most cases --- src/timing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/timing.c b/src/timing.c index 5f0548ed21c3c..9a1af1bdf0ac1 100644 --- a/src/timing.c +++ b/src/timing.c @@ -125,6 +125,7 @@ void jl_init_timing(void) #define DISABLE_SUBSYSTEM(subsystem) jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (JL_TIMING_##subsystem / (sizeof(uint64_t) * CHAR_BIT)), 1 << (JL_TIMING_##subsystem % (sizeof(uint64_t) * CHAR_BIT))) DISABLE_SUBSYSTEM(ROOT); DISABLE_SUBSYSTEM(TYPE_CACHE_LOOKUP); + DISABLE_SUBSYSTEM(TYPE_CACHE_INSERT); DISABLE_SUBSYSTEM(METHOD_MATCH); DISABLE_SUBSYSTEM(METHOD_LOOKUP_FAST); DISABLE_SUBSYSTEM(AST_COMPRESS); From d5145deed79eca044c541ba5bb021b5834569e98 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 31 May 2023 08:44:55 -0500 Subject: [PATCH 092/290] fix and test sorting arrays with >1 dimension and custom `similar` (#49392) * fix and test sorting arrays with >1 dimension and custom `similar` * add PR number --------- Co-authored-by: Lilith Hafner --- base/sort.jl | 2 +- test/sorting.jl | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/base/sort.jl b/base/sort.jl index b78f773ad9f72..99f2ed3e1aeb8 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -1809,7 +1809,7 @@ function sort!(A::AbstractArray{T}; by=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward, # TODO stop eagerly over-allocating. - scratch::Union{Vector{T}, Nothing}=similar(A, size(A, dims))) where T + scratch::Union{Vector{T}, Nothing}=Vector{T}(undef, size(A, dims))) where T __sort!(A, Val(dims), maybe_apply_initial_optimizations(alg), ord(lt, by, rev, order), scratch) end function __sort!(A::AbstractArray{T}, ::Val{K}, diff --git a/test/sorting.jl b/test/sorting.jl index 0528d9d81c296..cf98182307088 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -1011,6 +1011,20 @@ end end end +struct MyArray49392{T, N} <: AbstractArray{T, N} + data::Array{T, N} +end +Base.size(A::MyArray49392) = size(A.data) +Base.getindex(A::MyArray49392, i...) = getindex(A.data, i...) +Base.setindex!(A::MyArray49392, v, i...) = setindex!(A.data, v, i...) +Base.similar(A::MyArray49392, ::Type{T}, dims::Dims{N}) where {T, N} = MyArray49392(similar(A.data, T, dims)) + +@testset "Custom matrices (#49392)" begin + x = rand(10, 10) + y = MyArray49392(copy(x)) + @test all(sort!(y, dims=2) .== sort!(x,dims=2)) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From d3d09c13f8ce1108cedbf2ab3e3ce34304af584a Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 31 May 2023 16:04:41 +0200 Subject: [PATCH 093/290] add a timing zone for `call_require` (#49723) --- src/timing.h | 1 + src/toplevel.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/timing.h b/src/timing.h index a3ae283e98b3d..4f94bbc3050cf 100644 --- a/src/timing.h +++ b/src/timing.h @@ -182,6 +182,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(CODEGEN_LLVM) \ X(CODEGEN_Codeinst) \ X(CODEGEN_Workqueue) \ + X(LOAD_Require) \ X(LOAD_Sysimg) \ X(LOAD_Pkgimg) \ X(LOAD_Processor) \ diff --git a/src/toplevel.c b/src/toplevel.c index 5daf27043e938..cf0104879a7b0 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -461,6 +461,9 @@ static void body_attributes(jl_array_t *body, int *has_ccall, int *has_defs, int static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED { + JL_TIMING(LOAD_IMAGE, LOAD_Require); + jl_timing_printf(JL_TIMING_DEFAULT_BLOCK, "%s", jl_symbol_name(var)); + static jl_value_t *require_func = NULL; int build_mode = jl_generating_output(); jl_module_t *m = NULL; From 9b27a8f8a5b8a96782379b8ce32fe670d131012b Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 1 Jun 2023 00:49:36 +0900 Subject: [PATCH 094/290] errorshow: simplify printing of keyword argument types using a new macro format (#49959) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Julia, keyword arguments are represented as `Base.Pairs` objects. However, the object type often appears unnecessarily complex, especially when printed in a stack trace. This commit aims to simplify the printing of stack traces that involve keyword method calls, while still allowing us to reconstruct the actual method signature types from the printed signature types. The approach is similar to #49117: this commit introduces a new macro called `Base.@Kwargs`. It follows the same syntax as `@NamedTuple` and returns a `Base.Pairs` type that is used for keyword method calls. We use this syntax when printing keyword argument types. Here's an example of a stack trace: ```diff diff --git a/b.jl b/a.jl index 91dd6f0464..b804ae4be5 100644 --- a/b.jl +++ b/a.jl @@ -22,12 +22,11 @@ Stacktrace: @ Base ./reduce.jl:44 [inlined] [6] mapfoldl(f::typeof(identity), op::typeof(Base.add_sum), itr::String; init::Int64) @ Base ./reduce.jl:175 [inlined] - [7] mapreduce(f::typeof(identity), op::typeof(Base.add_sum), itr::String; kw::Base.Pairs{…}) + [7] mapreduce(f::typeof(identity), op::typeof(Base.add_sum), itr::String; kw::@Kwargs{init::Int64}) @ Base ./reduce.jl:307 [inlined] - [8] sum(f::typeof(identity), a::String; kw::Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}}) + [8] sum(f::typeof(identity), a::String; kw::@Kwargs{init::Int64}) @ Base ./reduce.jl:535 [inlined] - [9] sum(a::String; kw::Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}}) + [9] sum(a::String; kw::@Kwargs{init::Int64}) @ Base ./reduce.jl:564 [inlined] [10] top-level scope ``` --- * RFC: errorshow: simplify printing of keyword argument types using a new macro format * export and document `Base.@Kwargs` and further simplify the stack trace view * use the `@Kwargs` syntax only when printing kwmethod signature within stack trace view And add tests. * add news entry * more type stability * Apply suggestions from code review * enable the type-repr simplification unconditionally in the stack trace Since keyword pairs can appear within positional arguments, it can be confusing if we print the same type with different representations. * omit type annotation for splat keyword argument * add test for `@Kwargs` * clean up test/errorshow.jl --- NEWS.md | 3 ++ base/exports.jl | 1 + base/namedtuple.jl | 59 ++++++++++++++++++++++++++++++ base/show.jl | 65 ++++++++++++++++++++++++--------- doc/src/base/base.md | 1 + test/errorshow.jl | 86 +++++++++++++++++++++++++++++--------------- test/namedtuple.jl | 6 ++++ test/show.jl | 2 ++ 8 files changed, 179 insertions(+), 44 deletions(-) diff --git a/NEWS.md b/NEWS.md index 404b2b11687af..2e1fa8c102461 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,9 @@ Language changes that significantly improves load and inference times for heavily overloaded methods that dispatch on Types (such as traits and constructors). * The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127). +* When a method with keyword arguments is displayed in the stack trace view, the textual + representation of the keyword arguments' types is simplified using the new + `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]). Compiler/Runtime improvements ----------------------------- diff --git a/base/exports.jl b/base/exports.jl index ec151df0bfde2..8d8983950fe74 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1004,6 +1004,7 @@ export @v_str, # version number @raw_str, # raw string with no interpolation/unescaping @NamedTuple, + @Kwargs, @lazy_str, # lazy string # documentation diff --git a/base/namedtuple.jl b/base/namedtuple.jl index 320d068205a3d..5f6bdefbefd75 100644 --- a/base/namedtuple.jl +++ b/base/namedtuple.jl @@ -495,6 +495,65 @@ macro NamedTuple(ex) return :(NamedTuple{($(vars...),), Tuple{$(types...)}}) end +""" + @Kwargs{key1::Type1, key2::Type2, ...} + +This macro gives a convenient way to construct the type representation of keyword arguments +from the same syntax as [`@NamedTuple`](@ref). +For example, when we have a function call like `func([positional arguments]; kw1=1.0, kw2="2")`, +we can use this macro to construct the internal type representation of the keyword arguments +as `@Kwargs{kw1::Float64, kw2::String}`. +The macro syntax is specifically designed to simplify the signature type of a keyword method +when it is printed in the stack trace view. + +```julia +julia> @Kwargs{init::Int} # the internal representation of keyword arguments +Base.Pairs{Symbol, Int64, Tuple{Symbol}, @NamedTuple{init::Int64}} + +julia> sum("julia"; init=1) +ERROR: MethodError: no method matching +(::Char, ::Char) + +Closest candidates are: + +(::Any, ::Any, ::Any, ::Any...) + @ Base operators.jl:585 + +(::Integer, ::AbstractChar) + @ Base char.jl:247 + +(::T, ::Integer) where T<:AbstractChar + @ Base char.jl:237 + +Stacktrace: + [1] add_sum(x::Char, y::Char) + @ Base ./reduce.jl:24 + [2] BottomRF + @ Base ./reduce.jl:86 [inlined] + [3] _foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, init::Int64, itr::String) + @ Base ./reduce.jl:62 + [4] foldl_impl(op::Base.BottomRF{typeof(Base.add_sum)}, nt::Int64, itr::String) + @ Base ./reduce.jl:48 [inlined] + [5] mapfoldl_impl(f::typeof(identity), op::typeof(Base.add_sum), nt::Int64, itr::String) + @ Base ./reduce.jl:44 [inlined] + [6] mapfoldl(f::typeof(identity), op::typeof(Base.add_sum), itr::String; init::Int64) + @ Base ./reduce.jl:175 [inlined] + [7] mapreduce(f::typeof(identity), op::typeof(Base.add_sum), itr::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:307 [inlined] + [8] sum(f::typeof(identity), a::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:535 [inlined] + [9] sum(a::String; kw::@Kwargs{init::Int64}) + @ Base ./reduce.jl:564 [inlined] + [10] top-level scope + @ REPL[12]:1 +``` + +!!! compat "Julia 1.10" + This macro is available as of Julia 1.10. +""" +macro Kwargs(ex) + return :(let + NT = @NamedTuple $ex + Base.Pairs{keytype(NT),eltype(NT),typeof(NT.parameters[1]),NT} + end) +end + @constprop :aggressive function split_rest(t::NamedTuple{names}, n::Int, st...) where {names} _check_length_split_rest(length(t), n) names_front, names_last_n = split_rest(names, n, st...) diff --git a/base/show.jl b/base/show.jl index 2930b4951dd0a..45d6a502619db 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1057,10 +1057,27 @@ function show_type_name(io::IO, tn::Core.TypeName) nothing end +function maybe_kws_nt(x::DataType) + x.name === typename(Pairs) || return nothing + length(x.parameters) == 4 || return nothing + x.parameters[1] === Symbol || return nothing + p4 = x.parameters[4] + if (isa(p4, DataType) && p4.name === typename(NamedTuple) && length(p4.parameters) == 2) + syms, types = p4.parameters + types isa DataType || return nothing + x.parameters[2] === eltype(p4) || return nothing + isa(syms, Tuple) || return nothing + x.parameters[3] === typeof(syms) || return nothing + return p4 + end + return nothing +end + function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[]) parameters = x.parameters::SimpleVector istuple = x.name === Tuple.name isnamedtuple = x.name === typename(NamedTuple) + kwsnt = maybe_kws_nt(x) n = length(parameters) # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg` @@ -1094,30 +1111,41 @@ function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[]) return elseif isnamedtuple syms, types = parameters - first = true if syms isa Tuple && types isa DataType print(io, "@NamedTuple{") - for i in 1:length(syms) - if !first - print(io, ", ") - end - print(io, syms[i]) - typ = types.parameters[i] - if typ !== Any - print(io, "::") - show(io, typ) - end - first = false - end + show_at_namedtuple(io, syms, types) print(io, "}") return end + elseif get(io, :backtrace, false)::Bool && kwsnt !== nothing + # simplify the type representation of keyword arguments + # when printing signature of keyword method in the stack trace + print(io, "@Kwargs{") + show_at_namedtuple(io, kwsnt.parameters[1]::Tuple, kwsnt.parameters[2]::DataType) + print(io, "}") + return end show_type_name(io, x.name) show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres) end +function show_at_namedtuple(io::IO, syms::Tuple, types::DataType) + first = true + for i in 1:length(syms) + if !first + print(io, ", ") + end + print(io, syms[i]) + typ = types.parameters[i] + if typ !== Any + print(io, "::") + show(io, typ) + end + first = false + end +end + function show_supertypes(io::IO, typ::DataType) print(io, typ) while typ != Any @@ -2508,7 +2536,7 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type; print_within_stacktrace(io, argnames[i]; color=:light_black) end print(io, "::") - print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false)) + print_type_bicolor(env_io, sig[i]; use_color = get(io, :backtrace, false)::Bool) end if kwargs !== nothing print(io, "; ") @@ -2517,8 +2545,13 @@ function show_tuple_as_call(out::IO, name::Symbol, sig::Type; first || print(io, ", ") first = false print_within_stacktrace(io, k; color=:light_black) - print(io, "::") - print_type_bicolor(io, t; use_color = get(io, :backtrace, false)) + if t == pairs(NamedTuple) + # omit type annotation for splat keyword argument + print(io, "...") + else + print(io, "::") + print_type_bicolor(io, t; use_color = get(io, :backtrace, false)::Bool) + end end end print_within_stacktrace(io, ")", bold=true) diff --git a/doc/src/base/base.md b/doc/src/base/base.md index 5556578bcc245..3d17665190e21 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -234,6 +234,7 @@ Core.Tuple Core.NTuple Core.NamedTuple Base.@NamedTuple +Base.@Kwargs Base.Val Core.Vararg Core.Nothing diff --git a/test/errorshow.jl b/test/errorshow.jl index 5c6d8e3bea08c..9be3e675cede3 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -957,43 +957,73 @@ end f_internal_wrap(g, a; kw...) = error(); @inline f_internal_wrap(a; kw...) = f_internal_wrap(identity, a; kw...); -bt = try - f_internal_wrap(1) -catch - catch_backtrace() +let bt + @test try + f_internal_wrap(1) + false + catch + bt = catch_backtrace() + true + end + @test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt)) end -@test !occursin("#f_internal_wrap#", sprint(Base.show_backtrace, bt)) g_collapse_pos(x, y=1.0, z=2.0) = error() -bt = try - g_collapse_pos(1.0) -catch - catch_backtrace() +let bt + @test try + g_collapse_pos(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str) + @test !occursin("g_collapse_pos(x::Float64)", bt_str) end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_pos(x::Float64, y::Float64, z::Float64)", bt_str) -@test !occursin("g_collapse_pos(x::Float64)", bt_str) g_collapse_kw(x; y=2.0) = error() -bt = try - g_collapse_kw(1.0) -catch - catch_backtrace() +let bt + @test try + g_collapse_kw(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str) + @test !occursin("g_collapse_kw(x::Float64)", bt_str) end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_kw(x::Float64; y::Float64)", bt_str) -@test !occursin("g_collapse_kw(x::Float64)", bt_str) g_collapse_pos_kw(x, y=1.0; z=2.0) = error() -bt = try - g_collapse_pos_kw(1.0) -catch - catch_backtrace() -end -bt_str = sprint(Base.show_backtrace, bt) -@test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str) -@test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str) -@test !occursin("g_collapse_pos_kw(x::Float64)", bt_str) +let bt + @test try + g_collapse_pos_kw(1.0) + false + catch + bt = catch_backtrace() + true + end + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("g_collapse_pos_kw(x::Float64, y::Float64; z::Float64)", bt_str) + @test !occursin("g_collapse_pos_kw(x::Float64, y::Float64)", bt_str) + @test !occursin("g_collapse_pos_kw(x::Float64)", bt_str) +end + +simplify_kwargs_type(pos; kws...) = (pos, sum(kws)) +let bt + res = try + simplify_kwargs_type(0; kw1=1.0, kw2="2.0") + false + catch + bt = catch_backtrace() + true + end + @test res + bt_str = sprint(Base.show_backtrace, bt) + @test occursin("simplify_kwargs_type(pos::$Int; kws::@Kwargs{kw1::Float64, kw2::String})", bt_str) +end # Test Base.print_with_compare in convert MethodErrors struct TypeCompareError{A,B} <: Exception end diff --git a/test/namedtuple.jl b/test/namedtuple.jl index ea3a5cdbb8ee4..eb3846c8cbffd 100644 --- a/test/namedtuple.jl +++ b/test/namedtuple.jl @@ -342,6 +342,12 @@ end @test_throws LoadError include_string(Main, "@NamedTuple(a::Int, b)") end +# @Kwargs +@testset "@Kwargs" begin + @test @Kwargs{a::Int,b::String} == typeof(pairs((;a=1,b="2"))) + @test @Kwargs{} == typeof(pairs((;))) +end + # issue #29333, implicit names let x = 1, y = 2 @test (;y) === (y = 2,) diff --git a/test/show.jl b/test/show.jl index 6949db4bb9956..f2c553b3ff49a 100644 --- a/test/show.jl +++ b/test/show.jl @@ -1369,6 +1369,8 @@ test_repr("(:).a") @test repr(@NamedTuple{kw::NTuple{7, Int64}}) == "@NamedTuple{kw::NTuple{7, Int64}}" @test repr(@NamedTuple{a::Float64, b}) == "@NamedTuple{a::Float64, b}" +# Test general printing of `Base.Pairs` (it should not use the `@Kwargs` macro syntax) +@test repr(@Kwargs{init::Int}) == "Base.Pairs{Symbol, $Int, Tuple{Symbol}, @NamedTuple{init::$Int}}" @testset "issue #42931" begin @test repr(NTuple{4, :A}) == "NTuple{4, :A}" From dd441136914e7f5d01495b1787730db1aa538021 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 1 Jun 2023 02:25:03 +0900 Subject: [PATCH 095/290] add `@inline` annotation to `Core.Compiler.specialize_method` (#50015) Since the return type of `Core.Compiler.specialize_method` relies on the boolean keyword argument `preexisting`, it is profitable to constant propagate it to get better inferrability. --- base/compiler/utilities.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index e7ce41a3be92a..f3c5694535ce6 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -211,7 +211,7 @@ function normalize_typevars(method::Method, @nospecialize(atype), sparams::Simpl end # get a handle to the unique specialization object representing a particular instantiation of a call -function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false) +@inline function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false) if isa(atype, UnionAll) atype, sparams = normalize_typevars(method, atype, sparams) end From 304e9a01134d56a7d9a93915590fa9a617d60ff3 Mon Sep 17 00:00:00 2001 From: Kiran Date: Wed, 31 May 2023 16:16:01 -0400 Subject: [PATCH 096/290] Add GC metric `last_full_sweep` (#50018) Records the time that the last full sweep ran. --- base/timing.jl | 1 + src/gc.c | 3 +++ src/gc.h | 1 + 3 files changed, 5 insertions(+) diff --git a/base/timing.jl b/base/timing.jl index 7428fd36c6253..d166b4162db59 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -24,6 +24,7 @@ struct GC_Num mark_time ::Int64 total_sweep_time ::Int64 total_mark_time ::Int64 + last_full_sweep ::Int64 end gc_num() = ccall(:jl_gc_num, GC_Num, ()) diff --git a/src/gc.c b/src/gc.c index a7ff7c6c06201..b92324bde6c63 100644 --- a/src/gc.c +++ b/src/gc.c @@ -3392,6 +3392,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t sweep_time = gc_end_time - start_sweep_time; gc_num.total_sweep_time += sweep_time; gc_num.sweep_time = sweep_time; + if (sweep_full) { + gc_num.last_full_sweep = gc_end_time; + } // sweeping is over // 7. if it is a quick sweep, put back the remembered objects in queued state diff --git a/src/gc.h b/src/gc.h index eb20dd0ac36f6..f75ec26bc9017 100644 --- a/src/gc.h +++ b/src/gc.h @@ -81,6 +81,7 @@ typedef struct { uint64_t mark_time; uint64_t total_sweep_time; uint64_t total_mark_time; + uint64_t last_full_sweep; } jl_gc_num_t; // Array chunks (work items representing suffixes of From e0761b3b953fc4b1fb27dac840158119f26882cd Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 1 Jun 2023 13:37:51 +0900 Subject: [PATCH 097/290] inlining: some refactoring on the inlining code (#50016) - clean up the edge tracking: * removed `valid_worlds` tracking since it is never updated * removed `EdgeTracker` object and make `InliningEdgeTracker` update inlining edges always - clean up dead `OptimizationParams` arguments --- base/compiler/optimize.jl | 28 ++++------------------ base/compiler/ssair/inlining.jl | 42 ++++++++++++++++----------------- base/compiler/ssair/passes.jl | 2 +- base/compiler/typeinfer.jl | 3 +-- 4 files changed, 26 insertions(+), 49 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 32386691554ff..8810857ce81a7 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -66,26 +66,6 @@ is_declared_noinline(@nospecialize src::MaybeCompressed) = # OptimizationState # ##################### -struct EdgeTracker - edges::Vector{Any} - valid_worlds::RefValue{WorldRange} - EdgeTracker(edges::Vector{Any}, range::WorldRange) = - new(edges, RefValue{WorldRange}(range)) -end -EdgeTracker() = EdgeTracker(Any[], 0:typemax(UInt)) - -intersect!(et::EdgeTracker, range::WorldRange) = - et.valid_worlds[] = intersect(et.valid_worlds[], range) - -function add_backedge!(et::EdgeTracker, mi::MethodInstance) - push!(et.edges, mi) - return nothing -end -function add_invoke_backedge!(et::EdgeTracker, @nospecialize(invokesig), mi::MethodInstance) - push!(et.edges, invokesig, mi) - return nothing -end - is_source_inferred(@nospecialize src::MaybeCompressed) = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) @@ -125,16 +105,16 @@ function inlining_policy(interp::AbstractInterpreter, end struct InliningState{Interp<:AbstractInterpreter} - et::Union{EdgeTracker,Nothing} + edges::Vector{Any} world::UInt interp::Interp end function InliningState(sv::InferenceState, interp::AbstractInterpreter) - et = EdgeTracker(sv.stmt_edges[1]::Vector{Any}, sv.valid_worlds) - return InliningState(et, sv.world, interp) + edges = sv.stmt_edges[1]::Vector{Any} + return InliningState(edges, sv.world, interp) end function InliningState(interp::AbstractInterpreter) - return InliningState(nothing, get_world_counter(interp), interp) + return InliningState(Any[], get_world_counter(interp), interp) end # get `code_cache(::AbstractInterpreter)` from `state::InliningState` diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 3c444894dd4b6..aebcc7394e309 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -57,18 +57,17 @@ struct UnionSplit end struct InliningEdgeTracker - et::Union{Nothing,EdgeTracker} + edges::Vector{Any} invokesig::Union{Nothing,Vector{Any}} + InliningEdgeTracker(state::InliningState, invokesig::Union{Nothing,Vector{Any}}=nothing) = + new(state.edges, invokesig) end -InliningEdgeTracker(et::Union{Nothing,EdgeTracker}) = InliningEdgeTracker(et, nothing) -function add_inlining_backedge!((; et, invokesig)::InliningEdgeTracker, mi::MethodInstance) - if et !== nothing - if invokesig === nothing - add_backedge!(et, mi) - else - add_invoke_backedge!(et, invoke_signature(invokesig), mi) - end +function add_inlining_backedge!((; edges, invokesig)::InliningEdgeTracker, mi::MethodInstance) + if invokesig === nothing + push!(edges, mi) + else # invoke backedge + push!(edges, invoke_signature(invokesig), mi) end return nothing end @@ -871,9 +870,8 @@ end function resolve_todo(mi::MethodInstance, result::Union{MethodMatch,InferenceResult}, argtypes::Vector{Any}, @nospecialize(info::CallInfo), flag::UInt8, state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing) - et = InliningEdgeTracker(state.et, invokesig) + et = InliningEdgeTracker(state, invokesig) - #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) if isa(result, InferenceResult) src = result.src effects = result.ipo_effects @@ -916,7 +914,7 @@ function resolve_todo(mi::MethodInstance, argtypes::Vector{Any}, return nothing end - et = InliningEdgeTracker(state.et, nothing) + et = InliningEdgeTracker(state) cached_result = get_cached_result(state, mi) if cached_result isa ConstantCase @@ -1003,7 +1001,7 @@ function flags_for_effects(effects::Effects) end function handle_single_case!(todo::Vector{Pair{Int,Any}}, - ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case), params::OptimizationParams, + ir::IRCode, idx::Int, stmt::Expr, @nospecialize(case), isinvoke::Bool = false) if isa(case, ConstantCase) ir[SSAValue(idx)][:inst] = case.val @@ -1191,13 +1189,13 @@ function handle_invoke_call!(todo::Vector{Pair{Int,Any}}, validate_sparams(mi.sparam_vals) || return nothing if Union{} !== argtypes_to_type(argtypes) <: mi.def.sig item = resolve_todo(mi, result.result, argtypes, info, flag, state; invokesig) - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true) + handle_single_case!(todo, ir, idx, stmt, item, true) return nothing end end item = analyze_method!(match, argtypes, info, flag, state; allow_typevars=false, invokesig) end - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp), true) + handle_single_case!(todo, ir, idx, stmt, item, true) return nothing end @@ -1451,7 +1449,7 @@ function handle_call!(todo::Vector{Pair{Int,Any}}, cases === nothing && return nothing cases, all_covered, joint_effects = cases handle_cases!(todo, ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, - all_covered, joint_effects, OptimizationParams(state.interp)) + all_covered, joint_effects) end function handle_match!(cases::Vector{InliningCase}, @@ -1490,7 +1488,7 @@ function semiconcrete_result_item(result::SemiConcreteResult, @nospecialize(info::CallInfo), flag::UInt8, state::InliningState) mi = result.mi if !OptimizationParams(state.interp).inlining || is_stmt_noinline(flag) - et = InliningEdgeTracker(state.et, nothing) + et = InliningEdgeTracker(state) return compileable_specialization(mi, result.effects, et, info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) else @@ -1524,7 +1522,7 @@ may_inline_concrete_result(result::ConcreteResult) = function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallInfo), state::InliningState; invokesig::Union{Nothing,Vector{Any}}=nothing) if !may_inline_concrete_result(result) - et = InliningEdgeTracker(state.et, invokesig) + et = InliningEdgeTracker(state, invokesig) return compileable_specialization(result.mi, result.effects, et, info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) end @@ -1534,12 +1532,12 @@ end function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype), cases::Vector{InliningCase}, fully_covered::Bool, - joint_effects::Effects, params::OptimizationParams) + joint_effects::Effects) # If we only have one case and that case is fully covered, we may either # be able to do the inlining now (for constant cases), or push it directly # onto the todo list if fully_covered && length(cases) == 1 - handle_single_case!(todo, ir, idx, stmt, cases[1].item, params) + handle_single_case!(todo, ir, idx, stmt, cases[1].item) elseif length(cases) > 0 isa(atype, DataType) || return nothing for case in cases @@ -1572,7 +1570,7 @@ function handle_opaque_closure_call!(todo::Vector{Pair{Int,Any}}, item = analyze_method!(info.match, sig.argtypes, info, flag, state; allow_typevars=false) end end - handle_single_case!(todo, ir, idx, stmt, item, OptimizationParams(state.interp)) + handle_single_case!(todo, ir, idx, stmt, item) return nothing end @@ -1584,7 +1582,7 @@ function handle_modifyfield!_call!(ir::IRCode, idx::Int, stmt::Expr, info::Modif length(info.results) == 1 || return nothing match = info.results[1]::MethodMatch match.fully_covers || return nothing - case = compileable_specialization(match, Effects(), InliningEdgeTracker(state.et), info; + case = compileable_specialization(match, Effects(), InliningEdgeTracker(state), info; compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes) case === nothing && return nothing stmt.head = :invoke_modify diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 44409cfbcd486..3ac11b7bd232a 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1124,7 +1124,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, @nospecialize(info::CallInfo), inlining::InliningState, attach_after::Bool) code = get(code_cache(inlining), mi, nothing) - et = InliningEdgeTracker(inlining.et) + et = InliningEdgeTracker(inlining) if code isa CodeInstance if use_const_api(code) # No code in the function - Nothing to do diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 7d983ec5420db..757f9f567a0c2 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -269,8 +269,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState) for (caller, _, _) in results opt = caller.src if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true` - analyzed = optimize(interp, opt, caller) - caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[] + optimize(interp, opt, caller) end end for (caller, edges, cached) in results From f61bbfb61970d650427a571f226c0ca38d7dd296 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 1 Jun 2023 13:45:04 +0100 Subject: [PATCH 098/290] Some mailmap updates (#49997) --- .mailmap | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.mailmap b/.mailmap index 5335c88a63d7d..e27453c63d2b5 100644 --- a/.mailmap +++ b/.mailmap @@ -283,3 +283,9 @@ Daniel Karrasch Roger Luo Roger Luo + +Frames Catherine White +Frames Catherine White +Frames Catherine White + +Claire Foster From cb0537fcf318ad053883860234f9206a36aaa9da Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Fri, 19 May 2023 03:46:07 -0400 Subject: [PATCH 099/290] timing: Allocate all timing events dynamically This paves the way for a Julia-side API that can create new events on-the-fly without having to modify timing.h The core of the change is to introduce two different structs: - An "event" stores all of the statically-determined attributes of a profiler event (typically, zone name and source location info) - A "timing block" stores the dynamic information relevant to a particular span/measurement in the timing run Events and timing blocks have a one-to-many relationship. The intended pattern for a Julia-side API is to construct an event once at parse-time using `jl_timing_event_create` (since this is relatively expensive due to profiler traffic and allocations) and then to create its own timing block on-the-fly for each block entry/exit. This also re-factors the API a bit to hopefully be more consistently named --- src/julia.expmap | 1 + src/julia.h | 1 + src/task.c | 12 +-- src/timing.c | 235 +++++++++++++++++++++++++++++++++-------- src/timing.h | 264 +++++++++++++++++------------------------------ 5 files changed, 297 insertions(+), 216 deletions(-) diff --git a/src/julia.expmap b/src/julia.expmap index 447c3c4d8a5f5..ee35997827221 100644 --- a/src/julia.expmap +++ b/src/julia.expmap @@ -30,6 +30,7 @@ _Z24jl_coverage_data_pointerN4llvm9StringRefEi; _Z22jl_coverage_alloc_lineN4llvm9StringRefEi; _Z22jl_malloc_data_pointerN4llvm9StringRefEi; + _jl_timing_*; LLVMExtra*; JLJIT*; llvmGetPassPluginInfo; diff --git a/src/julia.h b/src/julia.h index d214509c7d0b6..2140b0ad0ab90 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1982,6 +1982,7 @@ JL_DLLEXPORT void jl_sigatomic_end(void); // tasks and exceptions ------------------------------------------------------- typedef struct _jl_timing_block_t jl_timing_block_t; +typedef struct _jl_timing_event_t jl_timing_event_t; typedef struct _jl_excstack_t jl_excstack_t; // info describing an exception handler diff --git a/src/task.c b/src/task.c index 477ae481071a0..1dab8688cb079 100644 --- a/src/task.c +++ b/src/task.c @@ -646,7 +646,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER int finalizers_inhibited = ptls->finalizers_inhibited; ptls->finalizers_inhibited = 0; - jl_timing_block_t *blk = jl_timing_block_exit_task(ct, ptls); + jl_timing_block_t *blk = jl_timing_block_task_exit(ct, ptls); ctx_switch(ct); #ifdef MIGRATE_TASKS @@ -666,7 +666,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER 0 != ct->ptls && 0 == ptls->finalizers_inhibited); ptls->finalizers_inhibited = finalizers_inhibited; - jl_timing_block_enter_task(ct, ptls, blk); (void)blk; + jl_timing_block_task_enter(ct, ptls, blk); (void)blk; sig_atomic_t other_defer_signal = ptls->defer_signal; ptls->defer_signal = defer_signal; @@ -705,7 +705,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct) #define pop_timings_stack() \ jl_timing_block_t *cur_block = ptls->timing_stack; \ while (cur_block && eh->timing_stack != cur_block) { \ - cur_block = jl_pop_timing_block(cur_block); \ + cur_block = jl_timing_block_pop(cur_block); \ } \ assert(cur_block == eh->timing_stack); #else @@ -1084,7 +1084,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->ptls = NULL; t->world_age = ct->world_age; t->reentrant_timing = 0; - jl_timing_init_task(t); + jl_timing_task_init(t); #ifdef COPY_STACKS if (!t->copy_stack) { @@ -1221,7 +1221,7 @@ CFI_NORETURN ct->started = 1; JL_PROBE_RT_START_TASK(ct); - jl_timing_block_enter_task(ct, ptls, NULL); + jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { record_backtrace(ptls, 0); jl_push_excstack(&ct->excstack, ct->result, @@ -1693,7 +1693,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->ctx.asan_fake_stack = NULL; #endif - jl_timing_block_enter_task(ct, ptls, NULL); + jl_timing_block_task_enter(ct, ptls, NULL); #ifdef COPY_STACKS // initialize the base_ctx from which all future copy_stacks will be copies diff --git a/src/timing.c b/src/timing.c index 9a1af1bdf0ac1..8af101c0f8026 100644 --- a/src/timing.c +++ b/src/timing.c @@ -24,24 +24,21 @@ extern "C" { static uint64_t t0; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; +JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_EVENT_LAST]; -JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_EVENT_LAST]; +static arraylist_t jl_timing_counts_events; // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. JL_DLLEXPORT uint32_t jl_timing_print_limit = 10; -static const char *jl_timing_names[(int)JL_TIMING_EVENT_LAST] = +const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST] = { #define X(name) #name, - JL_TIMING_EVENTS + JL_TIMING_SUBSYSTEMS #undef X }; -static int jl_timing_names_sorted[(int)JL_TIMING_EVENT_LAST]; - JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; void jl_print_timings(void) @@ -49,19 +46,25 @@ void jl_print_timings(void) #ifdef USE_TIMING_COUNTS uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; - for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { - root_time -= jl_atomic_load_relaxed(jl_timing_self_counts + i); + for (int i = 0; i < jl_timing_counts_events.len; i++) { + jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + root_time -= jl_atomic_load_relaxed(&other_event->self); } - jl_atomic_store_relaxed(jl_timing_self_counts + JL_TIMING_ROOT, root_time); - jl_atomic_store_relaxed(jl_timing_full_counts + JL_TIMING_ROOT, total_time); + jl_timing_counts_event_t *root_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[0]; + jl_atomic_store_relaxed(&root_event->self, root_time); + jl_atomic_store_relaxed(&root_event->total, total_time); + fprintf(stderr, "\nJULIA TIMINGS\n"); fprintf(stderr, "%-25s, %-30s, %-30s\n", "Event", "Self Cycles (% of Total)", "Total Cycles (% of Total)"); - for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { - int j = jl_timing_names_sorted[i]; - uint64_t self = jl_atomic_load_relaxed(jl_timing_self_counts + j); - uint64_t total = jl_atomic_load_relaxed(jl_timing_full_counts + j); + for (int i = 0; i < jl_timing_counts_events.len; i++) { + jl_timing_counts_event_t *event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + uint64_t self = jl_atomic_load_relaxed(&event->self); + uint64_t total = jl_atomic_load_relaxed(&event->total); if (total != 0) - fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%), %20" PRIu64 " (%5.2f %%)\n", jl_timing_names[j], self, 100 * (((double)self) / total_time), total, 100 * (((double)total) / total_time)); + fprintf(stderr, "%-25s, %20" PRIu64 " (%5.2f %%), %20" PRIu64 " (%5.2f %%)\n", + event->name, + self, 100 * (((double)self) / total_time), + total, 100 * (((double)total) / total_time)); } fprintf(stderr, "\nJULIA COUNTERS\n"); @@ -77,22 +80,32 @@ void jl_print_timings(void) #endif } -int cmp_names(const void *a, const void *b) { - int ia = *(const int*)a; - int ib = *(const int*)b; - return strcmp(jl_timing_names[ia], jl_timing_names[ib]); +static const int indirect_strcmp(const void *a, const void *b) { + return strcmp(*(const char **)a, *(const char **)b); } void jl_init_timing(void) { t0 = cycleclock(); - _Static_assert((int)JL_TIMING_LAST <= (int)JL_TIMING_EVENT_LAST, "More owners than events!"); + _Static_assert(JL_TIMING_SUBSYSTEM_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing subsystems!"); - for (int i = 0; i < JL_TIMING_EVENT_LAST; i++) { - jl_timing_names_sorted[i] = i; - } - qsort(jl_timing_names_sorted, JL_TIMING_EVENT_LAST, sizeof(int), cmp_names); +#ifdef USE_TIMING_COUNTS + // Create events list for counts backend + arraylist_new(&jl_timing_counts_events, 1); + + jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); + arraylist_push(&jl_timing_counts_events, (void *)new_event); + + new_event->name = "ROOT"; + jl_atomic_store_relaxed(&new_event->total, 0); +#endif + + // Sort the subsystem names for quick enable/disable lookups + qsort( + jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, + sizeof(const char *), indirect_strcmp + ); int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI @@ -142,18 +155,157 @@ void jl_destroy_timing(void) jl_ptls_t ptls = jl_current_task->ptls; jl_timing_block_t *stack = ptls->timing_stack; while (stack) { - _jl_timing_block_destroy(stack); + jl_timing_block_end(stack); stack = stack->prev; } } -jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block) +static const int get_timing_subsystem(const char *subsystem) { + const char **match = (const char **)bsearch( + &subsystem, jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, + sizeof(const char *), indirect_strcmp + ); + if (!match) + return JL_TIMING_SUBSYSTEM_LAST; + + return (int)(match - &jl_timing_subsystems[0]); +} + +#ifdef USE_TIMING_COUNTS + +// This function is analogous to __itt_event_create but for the counts backend +// +// `event` is required to live forever +static jl_timing_counts_event_t *_jl_timing_counts_event_create(const char *event) { + const size_t n = jl_timing_counts_events.len; + for (size_t i = 0; i < n; i++) { + jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; + if (strcmp(event, other_event->name) == 0) + return other_event; + } + + // No matching event found - create a new one + jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); + arraylist_push(&jl_timing_counts_events, (void *)new_event); + new_event->name = event; + jl_atomic_store_relaxed(&new_event->total, 0); + return new_event; +} + +STATIC_INLINE void _jl_timing_counts_pause(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(block->running); + block->running = 0; +#endif + block->total += t - block->start; +} + +STATIC_INLINE void _jl_timing_counts_resume(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(!block->running); + block->running = 1; +#endif + block->start = t; +} + +STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { + block->total = 0; + block->start = t; + block->t0 = t; +#ifdef JL_DEBUG_BUILD + block->running = 1; +#endif +} + +STATIC_INLINE void _jl_timing_counts_stop(jl_timing_block_t *block, uint64_t t) JL_NOTSAFEPOINT { +#ifdef JL_DEBUG_BUILD + assert(block->counts_ctx.running); + block->counts_ctx.running = 0; +#endif + jl_timing_counts_event_t *event = block->event->counts_event; + block->counts_ctx.total += t - block->counts_ctx.start; + jl_atomic_fetch_add_relaxed(&event->self, block->counts_ctx.total); + jl_atomic_fetch_add_relaxed(&event->total, t - block->counts_ctx.t0); +} + +#endif // USE_TIMING_COUNTS + +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { + int maybe_subsystem = get_timing_subsystem(subsystem); + if (maybe_subsystem >= JL_TIMING_SUBSYSTEM_LAST) { + jl_errorf("invalid timing subsystem name: %s", subsystem); + return NULL; + } + + jl_timing_event_t *event = (jl_timing_event_t *) malloc(sizeof(jl_timing_event_t)); + event->subsystem = maybe_subsystem; + +#ifdef USE_TIMING_COUNTS + event->counts_event = _jl_timing_counts_event_create(name); +#endif // USE_TIMING_COUNTS + +#ifdef USE_TRACY + event->tracy_srcloc.name = name; + event->tracy_srcloc.function = function; + event->tracy_srcloc.file = file; + event->tracy_srcloc.line = line; + event->tracy_srcloc.color = color; +#endif // USE_TRACY + +#ifdef USE_ITTAPI + event->ittapi_event = __itt_event_create(name, strlen(name)); +#endif // USE_ITTAPI + + return event; +} + +JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event) { + memset(block, 0, sizeof(*block)); + block->event = event; +} + +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { + assert(!block->is_running); + if (!_jl_timing_enabled(block->event->subsystem)) return; + + uint64_t t = cycleclock(); (void)t; + _COUNTS_START(&block->counts_ctx, t); + _ITTAPI_START(block); + _TRACY_START(block); + + jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; + block->prev = *prevp; + block->is_running = 1; + if (block->prev) { + _COUNTS_PAUSE(&block->prev->counts_ctx, t); + } + *prevp = block; +} + +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { + if (block->is_running) { + uint64_t t = cycleclock(); (void)t; + _ITTAPI_STOP(block); + _TRACY_STOP(block->tracy_ctx); + _COUNTS_STOP(block, t); + + jl_task_t *ct = jl_current_task; + jl_timing_block_t **pcur = &ct->ptls->timing_stack; + assert(*pcur == block); + *pcur = block->prev; + if (block->prev) { + _COUNTS_RESUME(&block->prev->counts_ctx, t); + } + } +} + +jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block) { - _jl_timing_block_destroy(cur_block); + jl_timing_block_end(cur_block); return cur_block->prev; } -void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk) +void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk) { if (prev_blk != NULL) { assert(ptls->timing_stack == NULL); @@ -171,7 +323,7 @@ void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t #endif } -jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls) +jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls) { #ifdef USE_TRACY // Tracy is fairly strict about not leaving a fiber that hasn't @@ -323,7 +475,7 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str) #endif } -void jl_timing_init_task(jl_task_t *t) +void jl_timing_task_init(jl_task_t *t) { #ifdef USE_TRACY jl_value_t *start_type = jl_typeof(t->start); @@ -359,20 +511,12 @@ void jl_timing_init_task(jl_task_t *t) #endif } -int cmp_name_idx(const void *name, const void *idx) { - return strcmp((const char *)name, jl_timing_names[*(const int *)idx]); -} - JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { - const int *idx = (const int *)bsearch(subsystem, jl_timing_names_sorted, JL_TIMING_EVENT_LAST, sizeof(int), cmp_name_idx); - if (idx == NULL) - return -1; - int i = *idx; - // sorted names include events, so skip if we're looking at an event instead of a subsystem - // events are always at least JL_TIMING_LAST - if (i >= JL_TIMING_LAST) + int i = get_timing_subsystem(subsystem); + if (i >= JL_TIMING_SUBSYSTEM_LAST) return -1; + uint64_t subsystem_bit = 1ul << (i % (sizeof(uint64_t) * CHAR_BIT)); if (enabled) { jl_atomic_fetch_and_relaxed(jl_timing_disable_mask + (i / (sizeof(uint64_t) * CHAR_BIT)), ~subsystem_bit); @@ -445,6 +589,13 @@ void jl_timing_apply_env(void) void jl_init_timing(void) { } void jl_destroy_timing(void) { } + +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { return NULL; } + +JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event) { } +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { } +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { } + JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) { return -1; } JL_DLLEXPORT uint32_t jl_timing_print_limit = 0; diff --git a/src/timing.h b/src/timing.h index 4f94bbc3050cf..5ec6ea1fc33f8 100644 --- a/src/timing.h +++ b/src/timing.h @@ -31,7 +31,7 @@ void jl_init_timing(void); void jl_destroy_timing(void) JL_NOTSAFEPOINT; // Update the enable bit-mask to enable/disable tracing events for -// the subsystem in `jl_timing_names` matching the provided string. +// the subsystem in `jl_timing_subsystems` matching the provided string. // // Returns -1 if no matching sub-system was found. JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled); @@ -51,6 +51,11 @@ void jl_timing_apply_env(void); // when adding potentially many items of metadata to a single timing zone. extern JL_DLLEXPORT uint32_t jl_timing_print_limit; +JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color); +JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event); +JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *cur_block); +JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block); + #ifdef __cplusplus } #endif @@ -82,11 +87,13 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #define jl_timing_show_macro(macro, lno, mod, b) #define jl_timing_printf(b, f, ...) #define jl_timing_puts(b, s) -#define jl_timing_init_task(t) +#define jl_timing_task_init(t) +#define jl_timing_event_create(blk) +#define jl_timing_block_init(blk) #define jl_timing_block_start(blk) -#define jl_timing_block_enter_task(ct, ptls, blk) -#define jl_timing_block_exit_task(ct, ptls) ((jl_timing_block_t *)NULL) -#define jl_pop_timing_block(blk) +#define jl_timing_block_task_enter(ct, ptls, blk) +#define jl_timing_block_task_exit(ct, ptls) ((jl_timing_block_t *)NULL) +#define jl_timing_block_pop(blk) #define jl_timing_counter_inc(counter, value) #define jl_timing_counter_dec(counter, value) @@ -102,6 +109,7 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; #include "julia_assert.h" #ifdef USE_TRACY #include "tracy/TracyC.h" +typedef struct ___tracy_source_location_data TracySrcLocData; #endif #ifdef USE_ITTAPI @@ -112,11 +120,11 @@ extern JL_DLLEXPORT uint32_t jl_timing_print_limit; extern "C" { #endif void jl_print_timings(void); -jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block); -void jl_timing_init_task(jl_task_t *t); -void jl_timing_block_enter_task(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk); -jl_timing_block_t *jl_timing_block_exit_task(jl_task_t *ct, jl_ptls_t ptls); +void jl_timing_task_init(jl_task_t *t); +void jl_timing_block_task_enter(jl_task_t *ct, jl_ptls_t ptls, jl_timing_block_t *prev_blk); +jl_timing_block_t *jl_timing_block_task_exit(jl_task_t *ct, jl_ptls_t ptls); +jl_timing_block_t *jl_timing_block_pop(jl_timing_block_t *cur_block); // Add the output of `jl_static_show(x)` as a text annotation to the // profiling region corresponding to `cur_block`. @@ -133,6 +141,11 @@ JL_DLLEXPORT void jl_timing_show_macro(jl_method_instance_t *macro, jl_value_t* JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *format, ...); JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); +#define jl_timing_event_create(subsystem, name, function, file, line, color) _jl_timing_event_create(subsystem, name, function, file, line, color) +#define jl_timing_block_init(blk, evt) _jl_timing_block_start(blk, evt) +#define jl_timing_block_start(blk) _jl_timing_block_start(blk) +#define jl_timing_block_end(blk) _jl_timing_block_end(blk) + #ifdef __cplusplus } #endif @@ -172,29 +185,6 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(JULIA_INIT) \ -#define JL_TIMING_EVENTS \ - JL_TIMING_SUBSYSTEMS \ - X(GC_Stop) \ - X(GC_Mark) \ - X(GC_FullSweep) \ - X(GC_IncrementalSweep) \ - X(GC_Finalizers) \ - X(CODEGEN_LLVM) \ - X(CODEGEN_Codeinst) \ - X(CODEGEN_Workqueue) \ - X(LOAD_Require) \ - X(LOAD_Sysimg) \ - X(LOAD_Pkgimg) \ - X(LOAD_Processor) \ - X(VERIFY_Edges) \ - X(VERIFY_Methods) \ - X(VERIFY_Graph) \ - X(STACKWALK_Backtrace) \ - X(STACKWALK_Excstack) \ - X(NATIVE_Dump) \ - X(NATIVE_Create) \ - - #define JL_TIMING_COUNTERS \ X(Invalidations) \ X(HeapSize) \ @@ -204,18 +194,11 @@ JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); X(ImageSize) \ -enum jl_timing_subsystems { +enum jl_timing_subsystem { #define X(name) JL_TIMING_ ## name, JL_TIMING_SUBSYSTEMS #undef X - JL_TIMING_LAST -}; - -enum jl_timing_events { -#define X(name) JL_TIMING_EVENT_ ## name, - JL_TIMING_EVENTS -#undef X - JL_TIMING_EVENT_LAST + JL_TIMING_SUBSYSTEM_LAST }; enum jl_timing_counter_types { @@ -232,16 +215,33 @@ enum jl_timing_counter_types { * Timing Backend: Aggregated timing counts (implemented in timing.c) **/ +typedef struct jl_timing_counts_event_t { + const char *name; + _Atomic(uint64_t) self; + _Atomic(uint64_t) total; +} jl_timing_counts_event_t; + +typedef struct _jl_timing_counts_t { + uint64_t total; + uint64_t start; + uint64_t t0; +#ifdef JL_DEBUG_BUILD + uint8_t running; +#endif +} jl_timing_counts_t; + #ifdef USE_TIMING_COUNTS -#define _COUNTS_CTX_MEMBER jl_timing_counts_t counts_ctx; -#define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) -#define _COUNTS_STOP(block, event, t) _jl_timing_counts_stop(block, event, t) -#define _COUNTS_PAUSE(block, t) _jl_timing_counts_pause(block, t) -#define _COUNTS_RESUME(block, t) _jl_timing_counts_resume(block, t) +#define _COUNTS_EVENT_MEMBER jl_timing_counts_event_t *counts_event; +#define _COUNTS_BLOCK_MEMBER jl_timing_counts_t counts_ctx; +#define _COUNTS_START(block, t) _jl_timing_counts_start(block, t) +#define _COUNTS_STOP(block, t) _jl_timing_counts_stop(block, t) +#define _COUNTS_PAUSE(block, t) _jl_timing_counts_pause(block, t) +#define _COUNTS_RESUME(block, t) _jl_timing_counts_resume(block, t) #else -#define _COUNTS_CTX_MEMBER +#define _COUNTS_EVENT_MEMBER +#define _COUNTS_BLOCK_MEMBER #define _COUNTS_START(block, t) -#define _COUNTS_STOP(block, event, t) +#define _COUNTS_STOP(block, t) #define _COUNTS_PAUSE(block, t) #define _COUNTS_RESUME(block, t) #endif @@ -251,15 +251,13 @@ enum jl_timing_counter_types { **/ #ifdef USE_TRACY -#define _TRACY_CTX_MEMBER TracyCZoneCtx tracy_ctx; const struct ___tracy_source_location_data *tracy_srcloc; -#define _TRACY_CTOR(block, name) static const struct ___tracy_source_location_data TIMING_CONCAT(__tracy_source_location,__LINE__) = { name, __func__, TracyFile, (uint32_t)__LINE__, 0 }; \ - (block)->tracy_srcloc = &TIMING_CONCAT(__tracy_source_location,__LINE__); \ - (block)->tracy_ctx.active = 0 -#define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( (block)->tracy_srcloc, 1 ); -#define _TRACY_STOP(ctx) TracyCZoneEnd(ctx) +#define _TRACY_EVENT_MEMBER TracySrcLocData tracy_srcloc; +#define _TRACY_BLOCK_MEMBER TracyCZoneCtx tracy_ctx; +#define _TRACY_START(block) (block)->tracy_ctx = ___tracy_emit_zone_begin( &(block)->event->tracy_srcloc, 1 ); +#define _TRACY_STOP(ctx) TracyCZoneEnd(ctx) #else -#define _TRACY_CTX_MEMBER -#define _TRACY_CTOR(block, name) +#define _TRACY_EVENT_MEMBER +#define _TRACY_BLOCK_MEMBER #define _TRACY_START(block) #define _TRACY_STOP(ctx) #endif @@ -269,133 +267,60 @@ enum jl_timing_counter_types { **/ #ifdef USE_ITTAPI -#define _ITTAPI_CTX_MEMBER __itt_event ittapi_event; -#define _ITTAPI_CTOR(block, name) static __itt_event TIMING_CONCAT(__itt_event,__LINE__) = INT_MAX; \ - if (TIMING_CONCAT(__itt_event,__LINE__) == INT_MAX) \ - TIMING_CONCAT(__itt_event,__LINE__) = __itt_event_create(name, strlen(name)); \ - (block)->ittapi_event = TIMING_CONCAT(__itt_event,__LINE__) -#define _ITTAPI_START(block) __itt_event_start((block)->ittapi_event) -#define _ITTAPI_STOP(block) __itt_event_end((block)->ittapi_event) +#define _ITTAPI_EVENT_MEMBER __itt_event ittapi_event; +#define _ITTAPI_BLOCK_MEMBER +#define _ITTAPI_START(block) __itt_event_start((block)->event->ittapi_event) +#define _ITTAPI_STOP(block) __itt_event_end((block)->event->ittapi_event) #else -#define _ITTAPI_CTX_MEMBER -#define _ITTAPI_CTOR(block, name) +#define _ITTAPI_EVENT_MEMBER +#define _ITTAPI_BLOCK_MEMBER #define _ITTAPI_START(block) #define _ITTAPI_STOP(block) #endif /** - * Implementation: Aggregated counts back-end + * Top-level jl_timing implementation **/ -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_self_counts[(int)JL_TIMING_EVENT_LAST]; -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_full_counts[(int)JL_TIMING_EVENT_LAST]; -typedef struct _jl_timing_counts_t { - uint64_t total; - uint64_t start; - uint64_t t0; -#ifdef JL_DEBUG_BUILD - uint8_t running; -#endif -} jl_timing_counts_t; - -STATIC_INLINE void _jl_timing_counts_pause(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - assert(block->running); - block->running = 0; -#endif - block->total += t - block->start; -} - -STATIC_INLINE void _jl_timing_counts_resume(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - assert(!block->running); - block->running = 1; -#endif - block->start = t; -} - -STATIC_INLINE void _jl_timing_counts_start(jl_timing_counts_t *block, uint64_t t) JL_NOTSAFEPOINT { - block->total = 0; - block->start = t; - block->t0 = t; -#ifdef JL_DEBUG_BUILD - block->running = 1; -#endif -} - -STATIC_INLINE void _jl_timing_counts_stop(jl_timing_counts_t *block, int event, uint64_t t) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - assert(block->running); - block->running = 0; -#endif - block->total += t - block->start; - jl_atomic_fetch_add_relaxed(jl_timing_self_counts + event, block->total); - jl_atomic_fetch_add_relaxed(jl_timing_full_counts + event, t - block->t0); -} +extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; +extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST]; /** - * Top-level jl_timing implementation + * Stores all static attributes associated with a profiling event. + * + * A single event can be used to create many timing blocks with + * the same name/source information. **/ +struct _jl_timing_event_t { // typedef in julia.h + _TRACY_EVENT_MEMBER + _ITTAPI_EVENT_MEMBER + _COUNTS_EVENT_MEMBER -extern JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; + int subsystem; +}; +/** + * Stores all dynamic attributes associated with a timing block. + * + * Every time the application enters an instrumented block of code, + * a new timing block is created. A timing block corresponds to one + * "span" of time in the profiler. + **/ struct _jl_timing_block_t { // typedef in julia.h struct _jl_timing_block_t *prev; + jl_timing_event_t *event; - _TRACY_CTX_MEMBER - _ITTAPI_CTX_MEMBER - _COUNTS_CTX_MEMBER + _TRACY_BLOCK_MEMBER + _ITTAPI_BLOCK_MEMBER + _COUNTS_BLOCK_MEMBER - int subsystem; - int event; - int8_t is_running; + uint8_t is_running; }; STATIC_INLINE int _jl_timing_enabled(int subsystem) JL_NOTSAFEPOINT { return (jl_atomic_load_relaxed(jl_timing_disable_mask + subsystem / (sizeof(uint64_t) * CHAR_BIT)) & (1 << (subsystem % (sizeof(uint64_t) * CHAR_BIT)))) == 0; } -STATIC_INLINE void jl_timing_block_start(jl_timing_block_t *block) { - assert(!block->is_running); - if (!_jl_timing_enabled(block->subsystem)) return; - - uint64_t t = cycleclock(); (void)t; - _COUNTS_START(&block->counts_ctx, t); - _ITTAPI_START(block); - _TRACY_START(block); - - jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack; - block->prev = *prevp; - block->is_running = 1; - if (block->prev) { - _COUNTS_PAUSE(&block->prev->counts_ctx, t); - } - *prevp = block; -} - -STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int subsystem, int event) JL_NOTSAFEPOINT { - block->subsystem = subsystem; - block->event = event; - block->is_running = 0; -} - -STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) JL_NOTSAFEPOINT { - if (block->is_running) { - uint64_t t = cycleclock(); (void)t; - _ITTAPI_STOP(block); - _TRACY_STOP(block->tracy_ctx); - _COUNTS_STOP(&block->counts_ctx, block->event, cycleclock()); - - jl_task_t *ct = jl_current_task; - jl_timing_block_t **pcur = &ct->ptls->timing_stack; - assert(*pcur == block); - *pcur = block->prev; - if (block->prev) { - _COUNTS_RESUME(&block->prev->counts_ctx, t); - } - } -} - typedef struct _jl_timing_suspend_t { jl_task_t *ct; } jl_timing_suspend_t; @@ -413,16 +338,19 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N #endif } -#define JL_TIMING(subsystem, event) \ - JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \ +#define JL_TIMING(subsystem, event) \ + JL_TIMING_CREATE_BLOCK(__timing_block, subsystem, event); \ jl_timing_block_start(&__timing_block) -#define JL_TIMING_CREATE_BLOCK(block, subsystem, event) \ - __attribute__((cleanup(_jl_timing_block_destroy))) \ - jl_timing_block_t block; \ - _jl_timing_block_ctor(&block, JL_TIMING_ ## subsystem, JL_TIMING_EVENT_ ## event); \ - _TRACY_CTOR(&block, #event); \ - _ITTAPI_CTOR(&block, #event) +#define JL_TIMING_CREATE_BLOCK(block, subsystem_name, event_name) \ + static jl_timing_event_t *TIMING_CONCAT(__timing_event, __LINE__) = 0; \ + if (!TIMING_CONCAT(__timing_event, __LINE__)) \ + TIMING_CONCAT(__timing_event, __LINE__) = jl_timing_event_create( \ + #subsystem_name, #event_name, __func__, __FILE__, __LINE__, 0 \ + ); \ + __attribute__((cleanup(_jl_timing_block_end))) \ + jl_timing_block_t block = { 0 }; \ + block.event = TIMING_CONCAT(__timing_event, __LINE__) #define JL_TIMING_SUSPEND_TASK(subsystem, ct) \ __attribute__((cleanup(_jl_timing_suspend_destroy))) \ From 5850227a3bcad04649a918a793e8205414cb6664 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 25 May 2023 12:11:45 -0400 Subject: [PATCH 100/290] Add timing event locks + de-duplicate ITTAPI events --- src/timing.c | 83 +++++++++++++++++++++++++++++++++++++++++++++------- src/timing.h | 4 +-- 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/src/timing.c b/src/timing.c index 8af101c0f8026..7e4fe7fb5cccc 100644 --- a/src/timing.c +++ b/src/timing.c @@ -26,8 +26,6 @@ static uint64_t t0; JL_DLLEXPORT _Atomic(uint64_t) jl_timing_disable_mask[(JL_TIMING_SUBSYSTEM_LAST + sizeof(uint64_t) * CHAR_BIT - 1) / (sizeof(uint64_t) * CHAR_BIT)]; -static arraylist_t jl_timing_counts_events; - // Used to as an item limit when several strings of metadata can // potentially be associated with a single timing zone. JL_DLLEXPORT uint32_t jl_timing_print_limit = 10; @@ -41,12 +39,23 @@ const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST] = JL_DLLEXPORT jl_timing_counter_t jl_timing_counters[JL_TIMING_COUNTER_LAST]; +#ifdef USE_TIMING_COUNTS +static arraylist_t jl_timing_counts_events; +static jl_mutex_t jl_timing_counts_events_lock; +#endif //USE_TIMING_COUNTS + +#ifdef USE_ITTAPI +static arraylist_t jl_timing_ittapi_events; +static jl_mutex_t jl_timing_ittapi_events_lock; +#endif //USE_ITTAPI + void jl_print_timings(void) { #ifdef USE_TIMING_COUNTS + JL_LOCK_NOGC(&jl_timing_counts_events_lock); uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; - for (int i = 0; i < jl_timing_counts_events.len; i++) { + for (int i = 1; i < jl_timing_counts_events.len; i++) { jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; root_time -= jl_atomic_load_relaxed(&other_event->self); } @@ -66,6 +75,7 @@ void jl_print_timings(void) self, 100 * (((double)self) / total_time), total, 100 * (((double)total) / total_time)); } + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); fprintf(stderr, "\nJULIA COUNTERS\n"); fprintf(stderr, "%-25s, %-20s\n", "Counter", "Value"); @@ -91,6 +101,8 @@ void jl_init_timing(void) _Static_assert(JL_TIMING_SUBSYSTEM_LAST < sizeof(uint64_t) * CHAR_BIT, "Too many timing subsystems!"); #ifdef USE_TIMING_COUNTS + JL_MUTEX_INIT(&jl_timing_counts_events_lock, "jl_timing_counts_events_lock"); + // Create events list for counts backend arraylist_new(&jl_timing_counts_events, 1); @@ -98,9 +110,16 @@ void jl_init_timing(void) arraylist_push(&jl_timing_counts_events, (void *)new_event); new_event->name = "ROOT"; + jl_atomic_store_relaxed(&new_event->self, 0); jl_atomic_store_relaxed(&new_event->total, 0); #endif +#ifdef USE_ITTAPI + // Create events list for ITTAPI backend + JL_MUTEX_INIT(&jl_timing_ittapi_events_lock, "jl_timing_ittapi_events_lock"); + arraylist_new(&jl_timing_ittapi_events, 0); +#endif + // Sort the subsystem names for quick enable/disable lookups qsort( jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, @@ -171,24 +190,60 @@ static const int get_timing_subsystem(const char *subsystem) { return (int)(match - &jl_timing_subsystems[0]); } +#ifdef USE_ITTAPI + +typedef struct { + __itt_event event; + const char *name; +} cached_ittapi_event_t; + +static __itt_event _jl_timing_ittapi_event_create(const char *event) { + JL_LOCK_NOGC(&jl_timing_ittapi_events_lock); + const size_t n = jl_timing_ittapi_events.len; + for (size_t i = 0; i < n; i++) { + cached_ittapi_event_t *other_event = (cached_ittapi_event_t *)jl_timing_ittapi_events.items[i]; + if (strcmp(event, other_event->name) == 0) { + JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock); + return other_event->event; + } + } + + // No matching event found - create a new one + cached_ittapi_event_t *new_event = (cached_ittapi_event_t *)malloc(sizeof(cached_ittapi_event_t)); + arraylist_push(&jl_timing_ittapi_events, (void *)new_event); + new_event->name = event; + new_event->event = __itt_event_create(event, strlen(event)); + JL_UNLOCK_NOGC(&jl_timing_ittapi_events_lock); + + return new_event->event; +} + +#endif // USE_ITTAPI + #ifdef USE_TIMING_COUNTS // This function is analogous to __itt_event_create but for the counts backend // // `event` is required to live forever static jl_timing_counts_event_t *_jl_timing_counts_event_create(const char *event) { + JL_LOCK_NOGC(&jl_timing_counts_events_lock); const size_t n = jl_timing_counts_events.len; for (size_t i = 0; i < n; i++) { jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; - if (strcmp(event, other_event->name) == 0) + if (strcmp(event, other_event->name) == 0) { + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); return other_event; + } } // No matching event found - create a new one jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); arraylist_push(&jl_timing_counts_events, (void *)new_event); new_event->name = event; + jl_atomic_store_relaxed(&new_event->self, 0); jl_atomic_store_relaxed(&new_event->total, 0); + JL_UNLOCK_NOGC(&jl_timing_counts_events_lock); + return new_event; } @@ -244,6 +299,10 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c event->counts_event = _jl_timing_counts_event_create(name); #endif // USE_TIMING_COUNTS +#ifdef USE_ITTAPI + event->ittapi_event = _jl_timing_ittapi_event_create(name); +#endif // USE_ITTAPI + #ifdef USE_TRACY event->tracy_srcloc.name = name; event->tracy_srcloc.function = function; @@ -252,15 +311,17 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c event->tracy_srcloc.color = color; #endif // USE_TRACY -#ifdef USE_ITTAPI - event->ittapi_event = __itt_event_create(name, strlen(name)); -#endif // USE_ITTAPI - return event; } -JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event) { - memset(block, 0, sizeof(*block)); +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) { + if (size < sizeof(jl_timing_block_t)) { + jl_errorf("jl_timing_block_t buffer must be at least %d bytes", sizeof(jl_timing_block_t)); + return; + } + + jl_timing_block_t *block = (jl_timing_block_t *)buf; + memset(block, 0, sizeof(jl_timing_block_t)); block->event = event; } @@ -592,7 +653,7 @@ void jl_destroy_timing(void) { } JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color) { return NULL; } -JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event) { } +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event) { } JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) { } JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) { } diff --git a/src/timing.h b/src/timing.h index 5ec6ea1fc33f8..30f6ad0ab3b5c 100644 --- a/src/timing.h +++ b/src/timing.h @@ -52,7 +52,7 @@ void jl_timing_apply_env(void); extern JL_DLLEXPORT uint32_t jl_timing_print_limit; JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, const char *name, const char *function, const char *file, int line, int color); -JL_DLLEXPORT void _jl_timing_block_init(jl_timing_block_t *block, jl_timing_event_t *event); +JL_DLLEXPORT void _jl_timing_block_init(char *buf, size_t size, jl_timing_event_t *event); JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *cur_block); JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block); @@ -89,7 +89,6 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block); #define jl_timing_puts(b, s) #define jl_timing_task_init(t) #define jl_timing_event_create(blk) -#define jl_timing_block_init(blk) #define jl_timing_block_start(blk) #define jl_timing_block_task_enter(ct, ptls, blk) #define jl_timing_block_task_exit(ct, ptls) ((jl_timing_block_t *)NULL) @@ -142,7 +141,6 @@ JL_DLLEXPORT void jl_timing_printf(jl_timing_block_t *cur_block, const char *for JL_DLLEXPORT void jl_timing_puts(jl_timing_block_t *cur_block, const char *str); #define jl_timing_event_create(subsystem, name, function, file, line, color) _jl_timing_event_create(subsystem, name, function, file, line, color) -#define jl_timing_block_init(blk, evt) _jl_timing_block_start(blk, evt) #define jl_timing_block_start(blk) _jl_timing_block_start(blk) #define jl_timing_block_end(blk) _jl_timing_block_end(blk) From 3b90d5edd83630c64e2b037efb1c425d7d974f4b Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 25 May 2023 20:49:51 -0400 Subject: [PATCH 101/290] Use `jl_timing_set_enable` instead of `DISABLE_SUBSYSTEM` --- src/timing.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/timing.c b/src/timing.c index 7e4fe7fb5cccc..19357c017cfa1 100644 --- a/src/timing.c +++ b/src/timing.c @@ -154,14 +154,15 @@ void jl_init_timing(void) * files, so we disable them by default. **/ #ifdef DISABLE_FREQUENT_EVENTS -#define DISABLE_SUBSYSTEM(subsystem) jl_atomic_fetch_or_relaxed(jl_timing_disable_mask + (JL_TIMING_##subsystem / (sizeof(uint64_t) * CHAR_BIT)), 1 << (JL_TIMING_##subsystem % (sizeof(uint64_t) * CHAR_BIT))) - DISABLE_SUBSYSTEM(ROOT); - DISABLE_SUBSYSTEM(TYPE_CACHE_LOOKUP); - DISABLE_SUBSYSTEM(TYPE_CACHE_INSERT); - DISABLE_SUBSYSTEM(METHOD_MATCH); - DISABLE_SUBSYSTEM(METHOD_LOOKUP_FAST); - DISABLE_SUBSYSTEM(AST_COMPRESS); - DISABLE_SUBSYSTEM(AST_UNCOMPRESS); + uint8_t error = 0; + error |= jl_timing_set_enable("ROOT", 0); + error |= jl_timing_set_enable("TYPE_CACHE_LOOKUP", 0); + error |= jl_timing_set_enable("METHOD_MATCH", 0); + error |= jl_timing_set_enable("METHOD_LOOKUP_FAST", 0); + error |= jl_timing_set_enable("AST_COMPRESS", 0); + error |= jl_timing_set_enable("AST_UNCOMPRESS", 0); + if (error) + jl_error("invalid timing subsystem encountered in jl_init_timing"); #endif // Apply e.g. JULIA_TIMING_SUBSYSTEMS="+GC,-INFERENCE" and From 3c915f304d186cf82fa561f96346280ecf14d38f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 25 May 2023 22:30:52 -0400 Subject: [PATCH 102/290] Sort jl_timing counts events before printing --- src/timing.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/timing.c b/src/timing.c index 19357c017cfa1..d933f082c816e 100644 --- a/src/timing.c +++ b/src/timing.c @@ -49,17 +49,32 @@ static arraylist_t jl_timing_ittapi_events; static jl_mutex_t jl_timing_ittapi_events_lock; #endif //USE_ITTAPI +#ifdef USE_TIMING_COUNTS +static int cmp_counts_events(const void *a, const void *b) { + jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a; + jl_timing_counts_event_t *event_b = *(jl_timing_counts_event_t **)b; + return strcmp(event_a->name, event_b->name); +} +#endif + void jl_print_timings(void) { #ifdef USE_TIMING_COUNTS + qsort(jl_timing_counts_events.items, jl_timing_counts_events.len, + sizeof(jl_timing_counts_event_t *), cmp_counts_events); + JL_LOCK_NOGC(&jl_timing_counts_events_lock); uint64_t total_time = cycleclock() - t0; uint64_t root_time = total_time; - for (int i = 1; i < jl_timing_counts_events.len; i++) { + jl_timing_counts_event_t *root_event; + for (int i = 0; i < jl_timing_counts_events.len; i++) { jl_timing_counts_event_t *other_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[i]; - root_time -= jl_atomic_load_relaxed(&other_event->self); + if (strcmp(other_event->name, "ROOT") == 0) { + root_event = other_event; + } else { + root_time -= jl_atomic_load_relaxed(&other_event->self); + } } - jl_timing_counts_event_t *root_event = (jl_timing_counts_event_t *)jl_timing_counts_events.items[0]; jl_atomic_store_relaxed(&root_event->self, root_time); jl_atomic_store_relaxed(&root_event->total, total_time); @@ -90,7 +105,7 @@ void jl_print_timings(void) #endif } -static const int indirect_strcmp(const void *a, const void *b) { +static int indirect_strcmp(const void *a, const void *b) { return strcmp(*(const char **)a, *(const char **)b); } @@ -106,12 +121,12 @@ void jl_init_timing(void) // Create events list for counts backend arraylist_new(&jl_timing_counts_events, 1); - jl_timing_counts_event_t *new_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); - arraylist_push(&jl_timing_counts_events, (void *)new_event); + jl_timing_counts_event_t *root_event = (jl_timing_counts_event_t *)malloc(sizeof(jl_timing_counts_event_t)); + arraylist_push(&jl_timing_counts_events, (void *)root_event); - new_event->name = "ROOT"; - jl_atomic_store_relaxed(&new_event->self, 0); - jl_atomic_store_relaxed(&new_event->total, 0); + root_event->name = "ROOT"; + jl_atomic_store_relaxed(&root_event->self, 0); + jl_atomic_store_relaxed(&root_event->total, 0); #endif #ifdef USE_ITTAPI @@ -121,10 +136,8 @@ void jl_init_timing(void) #endif // Sort the subsystem names for quick enable/disable lookups - qsort( - jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, - sizeof(const char *), indirect_strcmp - ); + qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST, + sizeof(const char *), indirect_strcmp); int i __attribute__((unused)) = 0; #ifdef USE_ITTAPI From be2c35a8d2c29acdf9f250ba85f106bbac3dc143 Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Fri, 2 Jun 2023 09:26:11 -0400 Subject: [PATCH 103/290] mention `Cmd(::Vector{String})` in `Cmd` docstring (#49684) * mention `Cmd(::Vector{String})` in `Cmd` docstring This seems like a cyclical definition without it, and naively `Cmd` and `String` seem on the surface to be similar so it can be surprising that `Cmd("a string")` does not work. Adding this signature in the docstring should obviate the issues for first time users that read the docstring without having reviewed the manual. We also clarify the behavior of `exec` and command literals. --------- Co-authored-by: Steven G. Johnson --- base/cmd.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/base/cmd.jl b/base/cmd.jl index 9e274b61b5e9e..475a62a82d4d7 100644 --- a/base/cmd.jl +++ b/base/cmd.jl @@ -41,6 +41,7 @@ has_nondefault_cmd_flags(c::Cmd) = """ Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir) + Cmd(exec::Vector{String}) Construct a new `Cmd` object, representing an external program and arguments, from `cmd`, while changing the settings of the optional keyword arguments: @@ -70,8 +71,15 @@ while changing the settings of the optional keyword arguments: * `dir::AbstractString`: Specify a working directory for the command (instead of the current directory). -For any keywords that are not specified, the current settings from `cmd` are used. Normally, -to create a `Cmd` object in the first place, one uses backticks, e.g. +For any keywords that are not specified, the current settings from `cmd` are used. + +Note that the `Cmd(exec)` constructor does not create a copy of `exec`. Any subsequent changes to `exec` will be reflected in the `Cmd` object. + +The most common way to construct a `Cmd` object is with command literals (backticks), e.g. + + `ls -l` + +This can then be passed to the `Cmd` constructor to modify its settings, e.g. Cmd(`echo "Hello world"`, ignorestatus=true, detach=false) """ From 0efb7c5d953afd1a656bf76a0d8df4ec353e9ddc Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Fri, 2 Jun 2023 09:26:41 -0400 Subject: [PATCH 104/290] add docs for `Base.shred!` (#50032) It was mentioned in `Base.SecretBuffer`. --- base/secretbuffer.jl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/base/secretbuffer.jl b/base/secretbuffer.jl index 935c50fb80fd6..eedfd8cbe84c1 100644 --- a/base/secretbuffer.jl +++ b/base/secretbuffer.jl @@ -179,6 +179,21 @@ function final_shred!(s::SecretBuffer) shred!(s) end +""" + shred!(s::SecretBuffer) + +Shreds the contents of a `SecretBuffer` by securely zeroing its data and +resetting its pointer and size. +This function is used to securely erase the sensitive data held in the buffer, +reducing the potential for information leaks. + +# Example +```julia +s = SecretBuffer() +write(s, 's', 'e', 'c', 'r', 'e', 't') +shred!(s) # s is now empty +``` +""" function shred!(s::SecretBuffer) securezero!(s.data) s.ptr = 1 @@ -188,6 +203,13 @@ end isshredded(s::SecretBuffer) = all(iszero, s.data) +""" + shred!(f::Function, x) + +Applies function `f` to the argument `x` and then shreds `x`. +This function is useful when you need to perform some operations on e.g. a +`SecretBuffer` and then want to ensure that it is securely shredded afterwards. +""" function shred!(f::Function, x) try f(x) From f09e46d163f633da606271a07cceacb151e37da5 Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Fri, 2 Jun 2023 09:27:26 -0400 Subject: [PATCH 105/290] add Compiler diagram (#49893) This is inspired by the diagram presented by Lionel Zoubritzky in his 2018 JuliaCon talk. This gives a visual overview of what is happening in the compiler. --- doc/src/devdocs/eval.md | 3 +++ doc/src/devdocs/img/compiler_diagram.png | Bin 0 -> 71878 bytes doc/src/devdocs/img/compiler_diagram.svg | 1 + 3 files changed, 4 insertions(+) create mode 100644 doc/src/devdocs/img/compiler_diagram.png create mode 100644 doc/src/devdocs/img/compiler_diagram.svg diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md index 6a153c67daa13..8f2fd68159676 100644 --- a/doc/src/devdocs/eval.md +++ b/doc/src/devdocs/eval.md @@ -18,6 +18,9 @@ function, and primitive function, before turning into the desired result (hopefu Abstract Syntax Tree The AST is the digital representation of the code structure. In this form the code has been tokenized for meaning so that it is more suitable for manipulation and execution. + +![Diagram of the compiler flow](./img/compiler_diagram.png) + ## Julia Execution The 10,000 foot view of the whole process is as follows: diff --git a/doc/src/devdocs/img/compiler_diagram.png b/doc/src/devdocs/img/compiler_diagram.png new file mode 100644 index 0000000000000000000000000000000000000000..5c4b780bca4553efb8f707dddb90e25da094611c GIT binary patch literal 71878 zcmeFZ2T)Uc8#f4|pwd*NHx&V;hTg#fBGN1ucV*|qtH^nY2a_Ml5wi$;>dBwKE@n9C5jF=x3adj zk$ZRg%DubSE?s(e=`y|0t9R08g|586#1?kt@+))neuc_0nMawh0`()VAWWqQWYzwK z@QnSw)F~+?XcIC0(?g9nSGE4vzyF&haQ%wppKo}m_2Mv7efDhPpK_nQz6-tZQdsBD zUwM^}CTd+R2K}j+>zOu9g0E{@{``|%|5yLh4>7uJA1wC#<5l;d118+VhbOlWNjR8$ z)8UiTm!F@z;`jXJ&nJUDZ;3zu!o@!AaF5#P#~n|8ofXp63x;elOo5lx=f6;BK9WS< z0`9yJw*K|$jM4{aFH34t%cs4eWC8A|o1I}ItGsMu?wEaUar(7_By#X~y3%vNo!NUS zzfQ;^xP9wI&uhx~yr*H2z@77kEo~wCS3D!j^$+FeJPm6 z?T2o5shNqgE2qWkHO}6+OboO+CFN z<)==TgJ{f}!`C_M60vHGue-RO0o^a7YI|7*Mx`x}6hfN@m(YL#_B=ns)ZnGQCV#Nr ziU{NdS%tAejt@ytPZV!x11l?khh}hr0aYU z1eosr)nsZm7X;od?|6#{UDy{NSV*nz>0~vte`4;sqaOT)f|cjjS=BT8)s83AeTKBK zL@y1;fp_7C@GqGUrI7B8`*{VA^7V$uzf-bYse4}V%Am2P1@NSWL^Dn+kC4MB*nlJ2 zbZD@5iP+OH+Js+9&Fg_eUKa>LL;b-w9_ z2E9j#mbOj>7W~FRX^;Icg{h}%@&vSz52*`_;AxKN()Y%VLA8*cy$yV?Tm9-hHm~MTG20eb=#rh+bd=Gla zPPM{z&>|?Clt4p@&sbw6~a z0#Y@es5;{-&fvqjW#VNvUh99%Tn&-QHxEoK2$-RtZwM59)QIZxz}4*UrstcP;R>S< z6F#SIH&XZRUGwDXxChNjz*fg;KVS^bZ%9O|SxwN)&-XE=RTR59#jE0Ny!)!EVWw-` znLfprQa*lm!!zt2X8Lu;%?}A==p+ly2YOm>hkO_H21noY+s^~{eRS4q`ux5C^AX{w z;!)0OdiPg^JO6mVJT1S6*sB;;zPtvNjrW0B6fM?{cRd4$eLM6 zy1~1uak^@H7J!DMlo?kWK+c-HY8ivYlsC7#<+!ZrH@AjbsEx*7|1~!9s2(C3Rt9joE2gcBJ2zG_ns!o42 zvEaI~>8IYS$S!IdDYh;+T08L>{++`@Azl=Y88^eh6Cf^^fBTu_Gz=6-limniX`JlD zm!R)U`4%pUPG3J-qfS}+&qU^FCh)NH?|`I17atcs+3Dl`mI98KNEFoHjb}W8??9P* z_saO;nDmvc>z{6glx}ozno51^+TX(`6kWeXkVBpL9DLBh>F&P(`TE&-Sc!THW!+U= zGkX^EA)OS#m!LeR#t&#A`H)hfi{bQl%G-S60JJH(ob<+<^z9HoixSRwYSSm9fHQMrA3M=4I-U64T2VMCUy+6L`3bXJT z)UYrMFlZBru+It}bwTWmVgrW5qknghp@-*DYuSel)+K-PO) zAPuP5?l_UAASAdH;d4biyGmfCVIH3)wf~pjF8|1loAFww=#+UpJlL7TyG0VFJ;&Lm zQ9RHd)rRTSo;;YF6N`wE)tz`Q$5AtHU3awzG! z4aAgH65M|KUVzo0CvNvqoplV0SgFyJ@!sf#q&l1KNaufbwHh_CR}rU{c2FA-mSX~O z?&g`g+K-y!LK&TltKz2?kb9;OGg)eM*=`*Hv-V}rg%vm1XE5buDlTx^#9iWueFK#d z9MDxfV@j;4C-K~<;yYNgmOpx7vXJU*vI{blB_zRPc67O;{paRr|H!(pfeItA$&f-y z+1o`7xz{ZUN4F^P-q4n=du0>!%{rQUeih=gf*ur>drRr}PN{;BMh~_uGLaD%_#yyv zvEh(&b3rxQh=}w^ppch7IPK`!hH)>-#<1LuEkpp$H=HJ^Bb4;%gVGo^n|a0HY-gE6 znNJokIO`Gn-p-h%nyaus+oBNTs5!*EWRhNP5>&?fQ^rcNxFBz(UDhmm@T8||{F!nf zFpd*ViCBd0>IC3%6xtCRP@^&%jV>A?|8?&v-?8y62w}9B%j?T!3p$f=``)ytSV3BFsriBMj)1* z3Q%sV-e1c6W{a$p1^Q;?ea@iTOq%6m`RC=oT7TLYUd-$*D^N9T{3C2#T*5Uqk)WqG z--fmBZPZc-uzg>t=zn9Auao*_=xWPvHn-kexhPW6wT81SScwS4Pk42IRmt_6@-^&7 zt8Cw2Zn_Bs^DR|NMXH7=KTLr3F7igc)Z+p>TwfJs)cJ4n4f-N-7h}n@G2%V!_USY2 z+1iT9#^;3;X?@_-{y?Ir2A4&Y<=lzeoaIUpiQ`qz8@~ULKJN8=ogH12tP& zR0`OWN9@rH8>T#=hx@Z9)H?fQGT%rTnnaoR4_RynF zuw997SJy14bIBotPJe5rEq^$cGVGP3j}c z7OlgyX62u}u(|MeQCNpnk2^9d2}_Kgn%|7LI6;xm*?sm$g&q@qeZJ<^{kv4L&reiH zc#Ft?J9+#vu+DSJVQJ22z_IOfZjAvr-km5jA>9AkDd4k@Io><&k6QGuB2{{)NFTH9 z;{9WHjtjve+-+qyfwF#qJ348?N=K1O*>UJv`zqWYK9?o0Izq*A`IT16r@M18!a%1OtlHT@0oGYm;j`TA z($RcR4;t;5lc(unLm8@VJ{2#%0>t2wrU#G3`#ircu?B3&=qcc0ymOZ&gGo8!S9eL7 z4PP;gX(S#(k^Tzu!d)&)Ec6TiuQv9@6{F5@uk@ayNK zYx>+3sd}bTxs5`T!4k45c&?E`L(;7EqJr34N(R_Ct8XugqOy(}4;W|JHWTU{^P91L z`e(N3dnwZw9x{LUje73|U;O$kSAmIhnT0eeJYZ`+pzIR*!qN7yK9np`oJ^3q8^a>n z03JfnTz;kTw*zb0hM7BGu9Iq=Zt1bIxf)LeiepqJ!wubpgq*_Mm*vqhB`+vDHr2Qm z?#k%N=u!K3uR=Cvxlmsl+hR(9!q*}l94B*F?0S=G@@U2pd(>5mI*Nua^wIq5*x-)X zjgUf$x6bb&0=+yE*phONV)l&bx}&b~wB+M0m-PjAI#uF0qRu=WQ~cYX`%) zJ}Yb!hG7U8jq!fhEsH{RQn=T5TUeyy(Nf|&v-(XdcH({K?ZQkM)4#2j&>6u}`!g8- zD7rO7(JT6RPUHAeRT!&>m?K4F@m_k40_pXnRM=4qzymV|GfQ|`rq;ox&QjdAz1_6Z1 zu|J6l*P#Oy%MnZH3Od4RALz6gXb4&Ugfh0Y@~GK>4xy;Bdu7Gq5G3V4kHSjae8@ii zqjG9jC^cY28Maa2cBZ`nsg1CW$g^90k&{iTC4sGka=*5 z>1dt!IH2f#f&AR>Z<&N`bcmgTSuTOQD@$>!8n^lvn(1oHSQ3j*vEe1mb%dyTz)}%F zqnMu=`03}d@?i&GUKgKA5xfF8sh-SQjignOY?j(1z&X2Nj}_ihcC0LN(MU=lYn>=N zQk+$r&b43tThD3GMnUBxtXJhHa;^VqI8c906)N;Ss4i2xqeYEwXYQkb@HQaR&xtM*?X~fu^8GLrAQ)Eek?fs_+vsiI|IgmT3Ip#FWc^%@hQuavk`L{?$`{TeHu6 zRwqW3s0qB@W@7Z9q?5BLKl%YvlI)Mxyf;l|YnIDX0tsLu@M>OMdnvK@+ylYC%d&%l zG5zSh`S%wIRBQ_h4jr0U35vu^p2|96sPN0;L1~*@4`h)GBU{ODP9*a2NSOwWfrC-lwWsN}?F0fM3 ztq(+if#z=hEo%^?CU0x^`shU) z{5{l*wi?j%vg18{vb|;bqdUnl6o42ClAcU>_AhHmfOgl8JtdKHzFf2I)8FM*C5D7c zc95Od>pQ_u+~a%y72zZu&Z}43A5EPg6Pubcrq2>!S6H0eZGxu^2rY`BZpkdFbl*{9 z3z4w*b21KP9V>lG#Fh5q`n5Xj=A8#}NKW^LvQ$aidk;lMxuW(Lj6gPRr%P6VoE~Nz zrNE$l`MtIh%I2YTyawTLbFLeK*3qi^JQT@lk%I1Lw3vMwR`&cn>Rn{;+GMRoYE2YZ zjM$O)e9wkW*r=`^8lCsM+wt8%GKZMQe-maaBbRnsi~Zx&id1 zPua3e|Gmm9mq1eGhdzy?=Vjbg*_IbSI(kXlo90}EZ_lHupe_NraV<=?OIX>;h2&1@ zHA#{4ynhGJi;D5(p2Yy?*72*vZQjB49mg{KQlR8z++prSut z6Gg{vW9p?KOUJe#hYK1O*8kspbc#H2bHl|z$j;X6qoO9*s~_;CPk>FaeM*i?*dQX{ zCl*2fte-fGw+p+8FHZ0u%T*pVa$fZs3;Z)P)80Ljn;bZXUFdkN$+ILH{Cj*fZ;1RH z+tCLtf*&IvE&*ShO?!Dhs+NGME-}2rCQnm0mY4QT=(NCM8p!$?B*1DhZs{+1y{q;+=rr`uD0?F)qb6ZHFhZE#r2czvoiIO_V{ZIi#qam-~~4nWk&)3LZT1 z?#JuW;>D6yNh{x1X?_oebFSDc zVf(e)B0Yk@_7k*3y<;eG9T^sjiR?cE;@NwJsZT4;vDlcUjr;@aKiy#!&#g3C>53UO z3)!E`bDOG5x)aASUiI{m?((AtphQsY{q7`Drny#!=G?}E&2Ebzp>@?k6Sk8C8t-(f zI}vyQ3S4bh7!Xaxmrc^6K1dgJ8Le4KRm5zy?F}PTEsRj}3k@M?$$SKWgt0S`lfmdD9 zeG=aDI^wF25zh>8Qb^AN$nNU#Us zcK~wfQ8^}2`9G*4^(8)lFS_=)<@c4{tNQ;z3O)G#1^CN*Z$bR^DxFOK!DRie3eZ_E z9`fsSy6JEJCuj8`0f5l64gP0d3E&G7Wo>cbgS`!lbbzjk`~MeR^_SxQFC!5R@~HQU zs<9V-kdN1{r6+*?8~Ow$F%sG7HhO!$A0isynB{E266a<#J+e$aZ?S*mxY+TGr`)T6Q5@?w-QQK{7n z^Nz4W*r&DBc5&Xi&nIqi`f@hIKbc^*U0AY+u?q*OsU>~DN!*+Kj-VT@=`P3WtI(W2 zxMn3~j};NOQ2&8IO&8ycvs0QNtgnANdg%&k=E^!#L-IKIvcR*ud*-rw<3E|d!B;vE z9Wiy4vC59rZfKvfA;lwC(s)$vxOcb}z1Ooi<6-5+5Q{ZB?; zaE*OBnYBkkvkE;PoA&($yL?%+YrA6O2+#cQN+db`UM_IFYi+&j<>E1N5bXCH=j}mJ zKM7w&Zrcg9p|4!yL7Lu<)h2dW&JuwxF8Ph4@D?ZA+BQVcT9t~YmglIN(>H1G@Xlxwj-$w_F)a1 zYoJ**-AU0<*Dr|(rYzD5gXUEm7E$A;iYlYoD?!3>s$C7cN8UZd6#`e^MndU*U)9~> z>ROh`OC+&BKUe#YVd;S+@~{JE-@A4oXHj9tdS>-@B6+;+qTNL;!C2Nev_Z3w6 z;Ogt#uo3E^(`{b#{zea9KVA9!1+$?dyDKlNvu5H#>3s{|%M~fxr#3gNU>|*UY$ikn zi)*z_tJIO6jaHB0(%qSY8`c!+a^k`}Pd73)*t7XJbVP*nYa8m_-8o3NUs;H-Mr?$~ ztlmC(z)SSr-fCr9@F@xgQL;{XKa-g#J~;n~r&40?bKf-n@O|HXcuXRt7|+Mh6dZN1 z*u5ag1bzL;5vMd#G+Me8v#!!X>YvhVCR$cB%b~%Oki!N&)YT3qMrh!TD-2`2UAPTMP5{ z)DWxiL*@M|Hdfd&dYrGaOWp2K#j5ipRfPr25$wTR+y9bGi>fCERljyc^k07dh5MiK z!>c!xcOJ$+$w%eURn61AbSsg_C^{bzpkA&3Ia+y68M5}F^X0dl*ZDL_{7tK?$4bgx zFj2-kY>5uFPq~Y37t65Oabj3RL!_)P4tT}5I=u7~*xXm>7r{mYc_JO?q!d(>SifE1FzzmKsNHo1?L9G zdCNzDoR)K5k}gx3Dc?T^MF95ae4H>EVg@B4Zqo5Yp#d#$_kLqVVO&AUf^aOxEo;AP zOUpKC>=Z{)_MSz!AxlpcW?w?v-rHBhIC$?Lm85ITg!;4H>| zMMZYLr5q}1lqrSWGut!w_uGABx165eD^-!D)Ao*Z%Eru`0sseYXVD0%Sb+!X;-Ji! zb%h6FA!^cFwdeLD{wa?G4&xwE_u=^M==K$M6g<8ahADb`{_vCjuSFQAeVIIRRHmGUXYgH?E2o9n&A*C;!yh+718UKwN zQ%6H5ZZjwpuc%>eHn1FtfvW|AoA(aT4fOt=>Np7V_!Yi7}iql~Bi7|Ny}#JV2p zb3Ieu+h=H}rIo&+gMT(5d)6YuYIppH=+Gv3iR8nmUH3(?Ox^19i-=+J}ecI!?u7kvRy zfJXgS>DJMqCQ{badxxMtiX7%t5@iGLie};cflKwH@;=QSgua9+Ys>tc{u(i@xw=uo zFm<&W@eE2ofs+d@&apIp^SlC-zB949DU^4z6I>m_I|yQYCgWp^_f~CwrAG&LL^+v# zw?3Li0XKkms)ow+_7 z1LC4w@KI43VxQg=7`Uy^6l?5!8-SE{wK<#7umU|g{iMukKNTJ&*XL97bfxtni)sC+{FWUZ|LxS9 zSXH{)NYz-TJMOeDv}CdAnREJ}g}b(4Qy`kL$Go`E237JJhu;fIxHbAD{P9-`y#J&e zn>%Ag44J2iwU4e|YjzTPqyLQ9?bEL<|EUyb=BJ7kYjY@Dl`OB4$#EOvbYT=20*-$5 zi~ni|!sdXNP8M$60D`~&&=ZGg*C_=tEcU$=8w6F{e8g@S77n{cGbJODBZcLwQv}1L zia}@Hc1?POV%CQCf)+XAJO`RLt@O^*;Nv~}_SD{Z%N6ug{8N|+<02%JL6i2-RPK-s1{uGr+6q>$cG z-tpoQq0K{PxNmPIm<*qvBrcz@t*rql4hMQYhgP$J+x)B17Y`o1|jTG!IvI0?pHAZ^iqpHLSJ&HG%*oa9EiylZ>_N z!yY)tWP)cA@LD#V<>$xXA)y|BuJj#tcibvp56N#8yxzZ1MEC)p5I ztU5GyE*)hzx6;r0P-@J^oq6G5QR(v`jhgY;`e&LmIi~0{Q~NIP~JZvzcl0sGLvcyyZ^hd(S0TFXfRKsT<(<(1x=v*~Zkv zMY66e+n26WO+f6nHaE|mOJX%ipRb903_TZwenvinc_tM`)sx3{a5O{t(1Y#9x!Bt% z2s??U>nxU+t(=0X)(hu5pK7c>k@TSLl>HNGf6E#_3cSS)cDAh$(z;#d zbre)H*&&@Ni;^*O(jHPFO?ALxW%_A!{*wQVlZmmLAZwNm?)>GvI2L!3Quv1vp0uxW z1;vr)YNP)#)k!0-cn`$^vyAb%8~_rt=id5{?eergfYaPN1B~#V{BOSyAy&v%eHvuU zMr6IBXN9I{>|T7+LwsmC)3SF(Eo7ofznc6Oi*6Dp?m-%#T9C&hkLHfy?qT|D|C3S2GL zcP_rAX#)PcAN6-peXjwcr|Z+O54Y}88&MQno_F^N{l)@Z#rFbVb@5S|J^P7x@j`$+ z6GvBSa2gnAM(zmD#;qkxwgA^tmii9@o`z}P|A}tlff?z_*OW*4Lddcoq6L9Nz-ixxIQb%|EMg9-H*=H#z2 z(=~ug;$*ifI0uY_hY<|hIH!1PBY`VzV`lc|7mDO}KL=e=+<*uHV_?1|i9F$P_{u+r z%vb&6+C28i02rUWaPjAu=>HPh|4W4y(~F9rfn_nONb}w$1E`S z;)V;xi2XVXm959EQAzINjpzf93CDS&%$RhOdv%8K?so##^Z4=qo@a2%G}i6gVd{1Q z$h%lolv0IVY8Io~WCudZKEC?6>G@B$F?BhD4BVVgD1vXoBl;D6%jfroc~lyL|BoIW zg0W?VeLeQ*x(+-V!fIv(KAQ9gyW>LXe;RXjiil|g`T(&zF^@`5=6b!+4hS}jhW2WEj?qp5vHc8Qp;au1hYcoYZtpXb%@b- zU}M;0Y|NqfrN82amur|Ml&IvreqJ+KyJ?200e`l=W@)RpfidK6gX;2glRV>kHw!a^ z-~NmmIc=B{gk6tNp%Iu&;DP#+o5@T9Y!%C&<`a75m^#T3(gW33v8@gs{G7TEEvo9Vw58^p zYmf^l-}lamTamzJ1?24qlUI9jwxsd=sd~ZqLyxx8Tc|u(np*eX`zNAXgtdCiRlYOROz|Ntiac@0e--S43SnD<|`x!5DXKW3ZJG^X(v? zNw2h1!K6@ret|C@)MQX-cQpBw306ZxYCatl01Og7CTuZq_b8P#%yKum3|`sNDzT8I z?l=*pSi{?SKx6w-HU2O=z`G(_7uH4&yS!iS_y{Uemn8n1Z4`)q=JM zZg-N89NQ0}J0q)z_0gdZfN^9>g_e{>$&@%K_?6+Zg5fgddsw_oycDpauuC~ilcR|N zZ>Sutwb1suE3H1odZCgI$xT!YrFI-8$^B`z^qcycyso?kwWN(!cKZ}H_@v|u;&jb;7!4m5S)7?{RB8ZWR^^fueR*!2eo8F4r9Jd8eHZ@H(uJp?F zCJ1Ict!_T|Zii5^Gg=|AfB2G#36-N<>CtR;EN|M>Hk%?_ z=x?lu)d-#}7lFna&;u;;bz;x9oT_x9rHPAMQ(;W)(k|R?G37eelL9Yo;#1TA)!i$4 zX=nNxhMyCI(u4M<2ORP5B6fH;a#QzOIQ^7DzIy1C(xnsn+=7d&-7q_pQocv^TI3B? z9uB3Eqo!jpQ*T^#RoOz&S{0;$lqmXVMtA@HMs-)ys3lHdXLOpn?n(A3T2TYF%pk88 ziY6&2e2U~7K|T5C(FPX?xEaRy*JiAO?lqLZyyZ-hO`eFcSc`E-|gE|v})N$ zgk{C6jPkUqT>t6fH$tKt%Dl5^5W!aH=S%aMG(3{txFg9j`T*e$TKD?H_+7MbR|qZ0 z&Ho_wlol$}XJUMQ$k~0fOTAk%YN2+FJCF_(&F$L|;C~=pG0tL|3V1nA+HJgf4My%W z%(Jdg>g}svb?mRge&@s}Z3%1%r&FS{E+T8E3R|{os+*G#TbWB#O zSY=R3MHF&7xO+{mizBL9%tCJSRUA=sPT)ZyR013*57tFh#)LApOX!e+8O`xyJc@v& zY-N(zE4#@`G_lXLA2oq;SM>TRp-HhdGy1*Ts`|2j)m;xxoRP1S#~(Y)eb3g2X;al% zi4e<7@ma{n$5)72yYbd}g?x#6*VB+qS2;PmlTX6AGwtZ}_+PrtK=>Ro{_*wUv7idH z*u`2*s=@78NxaDgr4>R-c+K!^EvuRIfbI|dhIi(z-={H839uA$F+Z~Tbr9Ds~Z_m#wrxCig=VaH#MbO*r(#U7Uw$kU1c^yjp?sCTCb4~6f5X< zN5Fpzn5)CTOeQ>g<~>bAEmr~ynr;kF7?-b{sPh7ivPrm^ei9qFNu@kDQr$ZNgcRHu zZH#Ie#!hs9k}DucmrS*2@@fMd<+a`_1)kCL!!SLXpWx+I9>n)2(*NbK1~;6=-mH|A zG+`U>2!DmJnpb+^giFvH(Ks_`QE;4A`Lhc3CUbabdj@1;6isvA$yR>PPGE{;#uEz_VhK`_`p{om2A^XNz;y-rsK6qyehVtHn)DrhD+=fin&$KJv@o#hrZKnZ4GpS+C{A~qU39+5Lt&ZCX(t+Pr%!Hm zwXPcTq;tU9;-48jnNm+Pag>{>j9CJ?Fp7}8KEFn$8DDJ`L|9p7`A-TV zJF?c@!XE=*Uy_m{`fyQVOE)-)9q zhzo|BO3zv`oFfsiP{A&3nq6@4%(Sd&pHSgdU6F4{j-sZO`jTqIZ@*f>_Su*uQA1V# z8Qa*06p63U2{iQgTyB$&EE(i4E%6u)!i!cf#Ia0GZ#1oz-nNk}QcrD-Dr(?8mgsoV zOj2QsfYM7ed$zU%4x|yLrLK?k7He#AsmAxcOlLuy96UAA{=QI(!@`X3-ft#V1hkpg zcY-jn`xP)nzF{?aD%sM$dTU)NRR~IdMQItLi)%mtZE5{j>9Y!-& zJ!OI!PJS3?OCs9kZ-wPE(;?gaSoU2R6W9Ph2i|v3h#XMw=U4Ci+82DuI&iR@+&5XP zHFxN-T;La5?7Y#Q3nVA5*qab1pVx5enJlqk%eU-UtDdnj3+MXmO_AypU)GL2!uOp- zI4j4m65QzEemy*+!D+lD601th7zLW)tP{&&W~)dBmseXtFs^edGOlTHttnHuF8IcV zJhf-a%>GtA5e|;^BwF_<*Xi<6Grk1{jP}bop`lFXMV_%^LS|2aT8V1@*-eXA#e-jr z2Oo1_Buu3_!16p+W*{$3IfLSC>J(0UkRBU#dIICKCr(`TS|hzH>rIy&4%=1PtX0K; zE!Tfl3rreRvHA8cVG*LJ>7)1hSJ|yT_eKf2oBfog1On#P_yNL&4#_v~Igd(rI407q z$Hb`Vz6z&IrWE4^x?^Aio1z%^F|5;P;1$6FTyTYrLdV8-?4(}>I@{Uq9WyQd;_E${ z?pve>vC-=QGddkGUWpo&RO|V3)(-$LV2;wR@TKK`s{kyG@g=b<*HB9N=3rz zsc@Qy&F?8FM0?dT{j*&~kIL+Hl|nMRugeChv?Bd2Qu?iE#fJ;V&cPUM%0(LPh$D;O zE_^Ox}G}XO9TVQC=4<0Hoo^raAyE+&NWgb-C8BB;#`A8HVDB@7g=dT17@~MPG^i3MjpUt=W?-J{`8yQ3rgV-ADAkd=$W6glmF zt#PvOi0vua9+zmmFfiR;^C42EBqzV|&5KloPh>_7vWl*r_w{rjFw7r2msFZnZ(4um z)TB#*B=YUAE&V>Op6g*W&ixHsnfI-0r4KBuCfP!UCm=*3zi@(@5(*{);%){4J#vTh z)IcPY=;M^gao5&zO?1?gLSW0MWA=$i%EQWg?{{bkZ(G`I3a$c%hX*}5-$AO*fWV7v zc^P;!(Im$No_S2SnD#4()%DyGw}wFu%XAv&0vqxrrd+Y~eEIK(>alV=#51 zSzp<;+WL$xtO8!Abe+XKZ_r#(UcV=kY*5W)a7r-0|UzB z^{hgvo5JwdO1QifXpI+f)8m-$^6Vjl@>8Yn&rM_*rsdiXd7Bhcu77R6vIHTwAfE>X z6`TD){dC_G8Fe}$nvxyfw%q2$0=9o?d!hxx(W%hEXR~+S0`DJd>Qh#yrd~WvNIkPx zTqolv0cRC0^hZByI4|0Y=IMdFrpkHDN-t^jiOG2tzWP0El_k#j9dGKMI|N+Gf%o5z zQrCO+(rC~j!b?g5>F~DWTOqLMU1zT3k9?8uaunm)Y((A3?kv|VY!@8m*ukZk@*6G2 zDGTzFlqz-ePt&}YZh5TfeV}kGmBB;q!X(E7qra2C`M-gEs@D)&Zw*s>zDMe07wymuR4?~N(r&FXWlKBan49)-**#U~Y76>=nLwc*^^9OU#Q<4GtQ zzB}mg&M8ULD;(=PJr}>d|Ds(z6@tp85vwO6vdkYP8}i;H*(+aa$DMAQ$wX2;*xo(U z@m_<)(}DdCm%s<7(PEg)XHcmaLn!38gly(i9A}_nHl6hDyI7e~qhW05B9<$bFaW;M z@}=JUvSpDD$^I|Yhrh3JUNb`f{%&=vf!d~_H?-fqflXkH+UVQA7nAPvmweS&$-SjrIg)*>j_yFq*hoi*$T!}csiUCC-Cfk#V4lk0A}TOFckwYYH8+^2XHe$|B=&W5~@imZ1fq&@UT_VQTMPm zeB}C5=HmdBxe(rTDpb7Nwrb>97Us5-Z@Yj9m4ouE!^TOaxSj|3hyCTIRqN$X%zIP# z^baBFrGK4iGd#RKb|*!>&DFcxvSx1ZnN<(l1)mj~6nO&jwljJZ%!S&x`6^?&egk3D zg(6ekI;iSXw0N_0W7G0Mi0byh*)6gx#(?dJ>q8f-9B$Se9~35$u(%WES?h*DJ8rE| z@%6MfZ#e9?KfQ$}uDxD`OtibTQt`zzKKx7uH*c2oR{IhH1z5Z7JrPPFsw*IO*fpN4 zsiC3z;%u)!TXy>h=dwsoUjddVmJSx`u~c(5+(nYtlWDnw=iZ^R34KYF@shtfF*8-S zjzAs%3zP95_(3tEZ@g5gm6g{5>Ko%ssysHd5+sUrTVmCzWSWfMKpGA4Podic?GcU~)hkxedo?e^DD2?6 za(?ATBpRfeaAoTY& z#{1Z-!1BxBzcX6nzv)w%WiHVJ-z@M4GbgXI1DmvFTH}d$1QFCIWZkVd&M=4EUlZeQ z(|z;SX({F-|6Y8xCA2pvkcgNkOZ7{Kx|KFJX*DL@>PjhbuW8mxsswOhcT3f4ZHJzZ zBLsHI6QscntZG;$0~$wYez5Bs%|o?H*bLN74_VWY6&O)$0pe&_wN471NA7=!@!JH2 zr6-2Oo{GKX2|_a2HM-fGwaY6iYjtd1SuppiO5K~iB`(Y4G*S#sc0wMPrFS-Ka;?Uh zzdHNN%F{VSI-lXVL;{}U&SDusj*Le>R%YTB;08RfBI622J8?>dbes_95w+wrocg*_ zPPV|Ro|`+VcJp1~_LwA+eQWEyW{Tw}LlS%;CJ4uCG~_(oTP+Q9EqgrS`(Ut0VK2CY z#K~mmTC*YG*J~NitFpsf0RU{d<~AY>q-ajT1KZ&|tm*^hy}%QV@j@( zsp=Xtr1_>y(&oxWWHGQGP~H)*`8m&VPHETGb`8p3MSz~}F-l+p7foAIgT*Y|m<2{m ziZlBWyYW-Nl7u~!%P#be^AiAK(2ZdrrEZDWkNSPMI%yv+_tQnMXVBY z1ep$>2O0|5CgA(3_UF@_KRdSbVtM=I`5rLXSjP^k?2{*|ddGGMuw!no7jdSZ3mM0m zL^#HUx+J zXps^nhHg-il2S21xCN1{mUZqwce}=d;f~=ktH_ zfAf7czK|!@v+i}TwXW;FR%!3!ZM!R^lhWm=I96a=T}zA2kg9J^qZCtp0tptw%T5mR z7H&36b~AGqp2ZaOtFu1$8b6wopXeJ>K6N;nobj+Vc$)erc0cnHK^Bzja$S*&4o-+2 z!Bj~Vejp?hw;OzVgCIT@CY;FniTnqY4f-RT^!7>(!Ht{!sz>D3W960nRl{K~B{Oz> z{8?KpOC>@~BcB8DD6${5zmQi|$JGzA{q6P&rFTwcjp^fepE1Azyy?hFC%0}Nh)2u- zv`Wsx%?&t{N$;1w9(~Q_BT|v3*tY3B5Q*=kdbHtnrQ|UvdTy_XhH*@{YEwrVEgvZ|9&V!C(h3V+hbK;8f+hwi6{O^VnJR;lDya_ zzre6ju~8L&PKY#+4~VAdJslkxSt?d>Ks$HOAV%MQYfMZqptk8~32Pg z`!3mM8+rTt{XEu&*Kf_elkbL&l*zl#cnqG?Wqs;he|~gW*vef3tHe%)%L7@s@e`QF znK-xEn}dDO=$NO_eo1f|a=#F5Wn53|H1IYXHW&puVoNQERGmtu?sy^D1d+Fo=K)AR z1V^C@6eLVyxEq9DYNZmt_!rLxWpS(a>_+28Lk92MN^moII=#JGSIY?iNOi^P!Zie! z$i%OZ`$yi>9L``)M9WXxXq$Lz`H-`yLlDF_NZ-2&utZN{Ljg$ltK2zud<1I>vOsXt zAu!ej*D_gOpE#JsY)T@5*;hc);U4BqpNs~${I9@zOs=UP50~(GOx4y%r!aKr8B<(8 zFoH#RY>!z^HpEo6oRuUgK@gisM7_jp8UcG+650hm!q9+Btf5|K$9b6vo@! zwTzQ|Y$&fR$O0Wnb)kPOUR_bTN^jW4jfAnhoBS3RHx_B=%~g76jB{PechO#AGfA-a zJ8`vt+5^1pFDg?asQBh%@|OJb1uF?ukXfJmuW?J=@Pu@X5GVvJfwsIOt$;miwJJY| zcR!W+0O3gkqI;+FUG;9`xe%hp!|I*d}F zIK78@YMC*fo1J&m)lQtrtfv>>dG8U(+3p-@_YJy)1K4`_(_k-=JyRUr%hj#rT}-Bn zjDPM(q2b#E8{(0K5w$U=Rz>cXYFyQkPvh)gs7iz1OK777(r@CB?HeftI|8eiPY8>r4Nb5W@Q1=u*5>E#x=nC;uTgqGgj-tnY;%ET6)>c zC3t_00sXP)kG&(K6<5?aAq^Df4W-POoa=av!UO^C_8alcW+XO%jKWH(%Xd-lFpk>F zOz(-Le0|UEs_V3&&%YoY0A5C;a?W%wFF6y@dPZp7W%@=YN%u%_WSEm@r4Qf;XT;7B zH-Hvgp}IpsFVf0wIc|UjQ2qfE4yb(sdYk{5rrOS=qN5m)_Yu$=mE;5=vW%Aas4BMmn)=qDq^J#=69bc1UaD!)r z2})4^PSHmh4fL}&dNS!S)>}SORSYBx!BVzYF+Usn=Xa}7CHC)&D%6>w<9-e z$(u!naMg@6!TocQZ{jfkwJlfc|Ll|1F?S-6@cqNZY6BkjI$)fymCgrK8G_|%`TI%y zF-0c+f3<~QcT%lK-^uLKQP-KCJgyv-+~W+skx$p=sDoSe{8#8)@W3Lqpx`lp$t#x( zWUF52UKXcd{i{VLkz@tt=*I@5hdQ_aMb{v91Y(*dB9ZCxrs4?tP;l~-ye-g+N9NvMAGEVZK23{cgMbM%EDq+`aM3jfkN zfaIK(`V%9(F~@m;D@4bFoZl69sPLC1afyrTXo&Aw=cqtMl?-0Sy#KVIDAuLJpD0rV zn@_^eNmskM;xf*_MQV;OY6l4tnz?i&0u1l00_g`(~Y@D%HIpr#qWE5L8d;YJ9`(?d9 zt@}GE^-qc>_>9ufe__)9i53brYF%>Q|4K#uhiCsQlM|%B|MzHr5_td5YQIsC|Nrr| zBwcU%_<|ZSkyJj@t*WAw_5#U4FBct6%EyE{c|ymE6!pD$Tt7+T338Z;9ND&L66?aF zjf&6JTQ`6T^&R}*4qN0goF)7ganD2M7`_Gka3NCn-W-O*jUg|W#A5;Mn%|D*N>CdP z*!nN<0(pXCsB#yCA8@q2iS+kN-ty%r`Ye(BQvx_%@`qsdo2Bu;o=yJ03;lny(S5ln zc6I$@VX~7z&=#wot2a^eT7LRi|JI)^7)V^O&>kJ@s&Yq819@Ym2vO_NvX34@Dr;r_ z6kOXu!RIwqh%d9R=^rZmc3uGFy%xpNTa+F@@|W>_5Z7N_W6Co@3H0lDdA!WhDs|-^ zTm$N@tdWvNEf={6k0pC&zBbZf3Tu(4f`2GrtEvO-?!G(4PC#3^tHO`Y_1b(HSJ@g+ zKjC)90CFWzb^UJH=;J$%TOQ0yrNHRC{OzoPMTlN2#~0Mxu?sij&60rw7k3!g3h`xH zr$66MR-9}IRKm=(l}{RZ9~l9O;BE2bEayas4bN{!N=p=g~LgE0YJS2 zsLR_NHud3E9!-TCAG1Lm8D7ar3lKtC$QupzIagaFU<8`v!*}lSnzZK=hOiduxA0c& zvg~N~ddb98e9v)n$UO6wAjg25VM%R7Ud(^;#R1ns`V(#co z?$l4sYa9SXJjD@{v}^T#60*`sXhteYvr1XEd2h3MLXUH%12wPL^XaK{MWqQeJzwd0 z_sdejb`G#|wa=%=t8)dq#+xlVZl1*svNl|#SSg6kc*l2kOZYTHq@N{s6-;(#S9c0? zMm+0tx*JcyEe>mf;R5r1xGhu*E~!~mA+V`d5}bJ&D5&3pTVrjk;f+8B+0noMY+k@* z4NsNbEKnq}L^t%jpp@O;^wf;&CRlw>0vjc6AL~d}p%4Bl!}S%n^G>Enmd~@xGx$Ll zes>qi-j$A=*Tj=%l6+N;?+Cx`pYFzsZpf8OUP!F)Fgi$Zy8C#fBv+t1@ZxPb{lUuw z5D}*p8eW}CGR<09fm2bC9G@|o@mxUu)6I|1ov8I|q`?iEB-YcU_frNn;)&Wh@n3V& zA-=g3uWbV9cC4iH4K3BN2I@H6jY)pJS}TG?11P*nVqz!Z+`E&|S?iafjuqoJ+O-Kg z3e>ug=0hM6n=}o58aOetBI3$-^S_uoq6nYm)j2`*GjCQE#FDTddPkS^b7_VcNe%&z zf`M~3V%DEYZN;E-afvgy5igohy9h{db8nS8EIxiVW6A`PQpr~q`E=9{!Y^xz*c*Hq zn&H~Wb!J~r5AT5z6n^(&gc-qbwvHrfoq!PXCe3GJzC>C2!#{dJ2i}F-*TXFNtCX`p zP-9_+@Vidx(okI@xQOAShMbAfayK{M>8q4crJhirSeRPkbVoE5FUaug|$hkKjm_n}QWMxg3b+l2=H(nUouwLvvCsbiTuFfo5DIl{gw z-`+>!G@r#pjq?y0Nlro%J|~c%IKyzMxe%%g3GHnpaAFz11ODkC^Zi=oN!6yDCBR@K z2(V~fafQ>De^P82?^cnF1PZh5*r&Bi_zlIwMi*EV*J#LeXGX;9;VR3(4jHqj383aq zw5*d(SO59eFR8|dCxcVLr?KEi>wD`BZLW`}r6S(-rI`{yl4m)IU~hIr^Vho-uASnq zf(tHY`P_ND;_}r>m$UOmX7;7p#!jCMooq3ZwYs+##2ZEX=LD+cY^K2@981lj*5L-r zSyBY?HGl8T&%CR{9{656e_PIsDG$Hz%r~3NmC+|mi?-2=diA#;Fl}@K-`R#v_3$FT zoS-b6JFFaPyA-ilsY<3ws_(sZ&NKmn#D7~-XirL|dtF9E_i4#3`jb?JHhN;qeuM+c zop{modeHGtp)zBY<*qSy87qBChwiK5>KT2NOs~#`m=X+wfe)rJ;7D&iw>`BklfG^A z{BARdvG3T(TJa1JU+HQx+${K45YOO72|2`=7bCOXEWHVa#F*DxyRAG`YA2ete1*}j zN`^4QrVzNRm6QsBV!a%ZqJrR@&&}G)oB^o^9e8Lqs__lR&-jN18-~1O1Mc67n?%2# z-Zif~t>ST8ZknZ&a3JC4;s-yvcT~KMSE7u(zeWU?Dq_19Hek6oCtdArIgpXk#J87|q~oBQg(BInD|aKGXuoCa_oc%4Ch)M|nlgktet#H+Pbi|mUm<1_X9L%A3Un3crdgM)lJxJA%nWpr1CO3&2?03 zvf8kc+d;*gZID}YyR|zS)oHVd^mRd^z;0vC!l~`X}9^gSJ(K?QOYFOr3;k ztfsa$QS8@S@oG1#Pc9hHhUm6fuIO$LXKRHLLKlxpbX0tDffAccQRn1h1s%KS$6)*e z95%S2rd)?w7f&_a&&s@mSaaySEML{bfWKXxCn(2vG16ZihE6AX%&-wu%2I{Zqpi8T zqPo5@yn3j7-qM>JdAU4pWvO;&qWNY2^ zTR+H>Hfab<|KoS$uDY_iR%@gcCxAF0echYhdY3;x=-ln|ZD7=iMbx>M_}$GJg@l_q z-<=^_4l|8IXR|J0eDuCJ@UTmsO9e|H)?mGqbL^?^=k;l#ECNZ(;K9Ow-a3Kjq$AwB zAU@iF3vVL)F_M5N){ZMtY|xGdjQa_@KDYg*L1k$5{@uS_led7*s_5O)bs4*$n)2hw~I}`H8--c~1yQ=F{e| zM9+;LW>5$|I5R@mxg(DgzbpT@E0Uc+5!7}MAKSa#nPgqYUSs}S58U9XYBg?;kZcSL zkF+kU*470zBU_+Vu<8EC1G-Ni0riM_^VWMkVv2@tPgC#ZizGI?^B55 z+jTLi(1L4x{@?}%VqLb9nUT3bIpIt)A5?4A)T1_|aa z3H3XR%5q^RaLNWT`9EAE>1n~8FOMUWq)53h(s|@xjCVpz>A0p%@td<0V@Ge8c^xea zgN1TBjr|||E?gM_IDqq#KEEHJXa(5$) zZbBT(XDn2tB`S}8w?%#inC;;|tSmh$UB7(EBfj#gdbd@M`ra=wboyF99`v7Hkewj@ za56Y-x0y9;y`Ca$8zpJ1Gt?3pb}$tlR%j8EDslX@>TCb`Y3~sP=%K)z2#H|<)iO-G zF~|7kDPpm$ z{lY&unW1-o=7qG!O1XR6fh#wJ7TKF!w;svkCdn{v6O<;-!9To|{2wl$?;W~yw$WHI z=KRPkRCgb7cz{h2^H|YDeFe9cGphcFI0k`T6PdfxY>5^qSr(|mlls6bxl{Ec1pXIu z#UFh4zxpWs?zSB&hN07nAfDSUMK1Ds*m|4b|KiuWA?Pfv=6ka`Ui^eY+3LK_E>q-e zqfR<(-*)stPI0`VC;oFpgF#?0jCX%V=sqC{a`zo}K+0C5C@R15X!}Rl{6}lIf{Ynl z-u@qh9|gj*WJ<^7)0F-4^%#T3yF>3TAw+%{^B-InYzjd)gm4yYpu1v6chA1(ewfq2 z77O>|nUC@;!ch|c!!ivz&s|KZsP%OJ3(g%OQ++haOikFz!-$boGlPyOeLGa?ng zJswsaFKV~%tU~$UR+S6Yc>b7IbUZ5}B>$6wxY6Ia<9PlY-0C~gv;Jn@^*R+3`fux= z4gHsY?L+qwgz)1QCtuZzPrvpleA0F!r94GQYtmuCAYxKZ|NW1EGx4&x7*=L{CK=Pt zOMho)eo3lW!uR-3*MIjT8=I1{;!y+Vk%wv5H}*d}{N)N^xGI8CGDeDP{f)Bm9hBeC=Ogo;*}+E0`0 zSeQZi{3MTkc;fqSE5t@f7iu5)!1vbJAoGZ7G}GmucalKpbv*h4Poz0K&J|wy{}#-u zCO-JxdNNijN2sK;oF;po_-8ps1cU6o_t95`{kqZbo2!lUoL-00im{^QtDhwbnd`qE z>EHk7H@{ofl~P~J4tJ?kXv3U;eb)DWx9oO!k1ZwfZ`nKct&pO5ap1$;swO&Fx)&|&GI zfbv)WM5y+?@XssL|Mki_)p%ra1y4s0SRA0+TU4kDL*qKZw8-YzpRdaZzFoEOtj&)% zc%b~({ZrQmpHAF5aDV3s%hw3?%GAE64d6AdtCOrrns>JQ?B3Ut2#Ni6E@|GlKpo4dwWdfJm{vdUHc>1 zgRT1z!oQsE#Jg~OHEId^iKIWS3T9rs|Dz(Psw1P){c`mVfueU1m%P=E`zwF&%ei^L zv<~Xqmw(i^co#r1pZ4w?2@GGPf~j}{L(X~p+kp+QBO|dnd-qo&`hO4i_m)$Lh>BtX z=_rbF)|*P?k! ziM1+%Puk_KB~8zi5MqN+ELY|J0}AGc|K0tQbE(Gj)2| zvxF+2Zq)JE>Yc&9CZ=eSrqFtLJy>i*Zj(mna{q%=Wm=5L6V2zZ-N93JZaDuBs}PuH zrR$UbJhvs7@ac{naVin|l99na=!sjNUaKXG1}gQ6$MZ0Yf!KN5*I@B&wfxcV_5Oir zzQPSO-?Ui7IYC4|^JxK6 zm?pW>^T5?02RxsS3-Ayl01C{+gVBF^`}-JFC&nuMe{;FH=daIQ^} zXOJ-|H08N+?9+2Y=Gl={{Lk)S6Tqke+oi9KGH$vI`xW55WoM^3!N8_6x-jeF-W)xt z9|LGtVF>4PFytu4Qj;-&pGTzhb-%5vGXn%Iqr(4+y-RdCKiqz5)AMgq@Z30Q! z5intb#jNk$r5j1O4pHq=M@zHbFOTfU3f~dJ@o06zhSzpaQ-EL1Ih_1=KSH5X|4K$! zJRdp|ES(n5f)+xO-!)z=Pg$WugRW~F9U9DGYjcQzs`~FyQc|+GT=t|Fwl!3wodZBY z#PQtB{lmHoD)3Q5OXL1b4OXv%t>|6a@)mI4@UpTpxJc09A3 z%yJHVO+oZ(k#5T(2vbbJKSI?wou>h(1jbGYm=jl5(3j91P8Qu1L%N*1q}_MH86JYTW+X zr2-lhIF>LSdpysmFk!a@bgN-#HHCr9^~uIKa~bPGxAt|=C|WKy7Hq$_@YC0=c|v!0 zXTy3cD=h-tW#)2u-u9s0iTM|2K9z5u3|80!2%Cuf*&ku4ASM9B%~DyiX3LSMwyWh& za={7h{Ky;PTaff8ttm}M(eX`3t!2h-OsRGz&GBM`8Y-c?gC;XEt#IB(Rb$3I&#;{j z_{z8b=oosxrH|~zGa%)fDJgMt84)=Gr*mOOCZ@c;EMqpcBwwPSstZs9h_0Nc@Y&d0(0*+1grk&g1xb2Vejnb*&p1B@#0o5RaH-kn)8Zh;7r*A`#N&O zfu~{RKF?E`2hK}hAK1@*2s<2cLWnf~GDGArZskJS1L=3zVb!xCcQ$frk#1#k-ULhr zEfMgNdCC1N(th-G9)xQ&msTrE2ir>zT?fB8X3p+kv0T?@8?_WvB_q^`M!)&H7|&lN zC-%CLyp=GLWwQbti;f=R%E#dX6PMcU32r@#bH_57ks z82bWznbjLJqgTl(a``8Ra>z=!x(2H5nIu?JVIrNtaKSkjuHvcl!i-(tY+kYV1XA(8 zvtqnOahPHdBrMY>Lq+>k4*9yj-nGd}PfpO)pWN@4O}cW711}ofJwdfpg7kX{a(I#> z%N1ee6F;()>_7`D@shd-i8XH*&iDh$?o@^QQ(n|JgR}E0$7_f`32ju8Om|}LwAh7+ z@z^5bS4PCM@i(k0wT>&pvsaJ&i8)J}RIPwx5E{+Y98HyIJZ*}WJS5Na`d(Dod;zr? zX6!V~rKB-(gUre0u|)i~&#bfG>fyk(Y%=N_PDO5-RAU30D`rQpHeJjE+;KP&6)i!# z%h^$M<%CUn9)xZ!G`;}|Oil&MbA$93|J<@ad$VsSXit7RMC8KKG6fmYaj7;W;nfr6a*#<#xJp1Rk?Hlhw}W!n!$fav zhA}Hlfq|=UNVy$-uhyQHQcF55UJ6fzd?0ctB*nw?#*@Eu>){is6lw=$@vWXY3AiEL z$P^?m#_&AeL<%pitV`-2L&wkB1tuW04B@^x4=-dDfVsc-RmGowztc@UQU*k+XtRF= zc%{ocYn0?Jqq&K0?W%;ZP2PjlL?+ALF9W$-i{JwZhr9J_Aq?&r2dS40Xr#ym_sIK; z<+*w#{md@~!n(5(TrPJV?$oMA1sdV58bM2zmM*wIw{PV@l)Rt&ocs~CA*r~E+SxOI zyHCK&LfLp(`cP*}Ua{;&Jj`Ox>(1Wk@i=C4!y^LoW=uBOR>Y|ne}?zZ?1Vd? z4T8B`av161I{P_DRnn}om6<9BA@Ri7{vSpQmuU2CwmTrhlo#S=5!m;6{(l4uZP(gHxLi0*}=#k4#7wha>*1L#=OE<)} zETqOzSKfCU;g+4=%n#D;RU3Av2&J{h@EErgbsi+P?>$=3ag5yc@;FM?GHAE^o|YPi z5!>CMZphH5_mtRb!5cK2)lx)nqd%F8XOk_&dt}Ai?hQTwzA?RFZnYU9`kvP`n+n&o znRv7jQBVA5O>-{>vc}*?ZwB<2P2AjC)^k>K7qfE5@QGr`-T;KPf7G3t#3n3ND|p>W%m3_QzHARUl4XWi3U2N1;1sIvqL6Yr(PgO!x2*-{F?UShAQ2{TI_3&QMMk9%!$86tXla621-Y+&p@I~d zi%hV6rQ9T8How!#Fg*8Hw9NIc6Ayj-@uRc^bxwk?*@L%i;xo`@%WfH-t0=>U0LllN zc_~vmp2cZC9@lt{_11Ko<6$4BG?ilI);UY%9#8R}2;2*3;?;DnIrBg*Re5s@X5GbI zzWYS$k*)ib-EEyG|5}#YvxyJvH#=tzV!Yb;XFReOzco|w$$fnG$iOs@@yMY2@DQ!q z*v68PbWYf5+n?)jb*D)HA*|Ieuz3KvT~5puO3YP!)p)FpV?|)OCOGA2E-iQhX*zkr zWQCkv?*oh(Y;%SISslRco*!F zp7{M+nVIxO=Czm0wP!fJNZo&ZXx*Asy3EoqX#VI6KAq6Q1v7`ET4RY*zV73Cjx8|z zl6LhiL+pYZJomY3r9XGosuG;pNqiGN>}o=d5gE&t!mzU6ARy_OE z;^mJDf9USe#`H}2+xzF76O$SRjvH;_L}iuSWONSeV!HMxUHu)z1A?k@OvjO24a0gE zu?0d@gUmn-QWKw`8;+2-tFbPT#?JxBh3<9FD)K_JQa|wzU6T zMH#g`$i$dEwaKEOWbA zZZL(MF$uR8pm#bSSDbsvLvr#IEe~26agXq6k%AvJl$zWkb5Vmfm`t>(zLwi#a_LJU;11 z>yjU^zi+PMu{ozdm~0+5YUTPoVGq`&MkU`bOLR^oeAv!2LAaVipsY)E4Byhyesu2O zH}lC51dKk$!c3w1toYt&ykzVpDaB_!!zie+;Lf>Q;;iEPQ4pI_WmKJ}AMZ)SCiq)p zYI=Ra;h`bBav`n-24eb(0^Y{Y7co0kHCvr{)?f?@Acuicd(7}k<#U5$0Oh!n8EmH! z%=(zB(B^`)>}@f8;W&f4`j%|r6PG45Q}tnb6eZ!Z1Zk~Y;DpEi^Sr>1YBT#_LVH%d zz|W%L(Vfx$dfbXeu0>~w1H!h>Q$Zm8ep(I--~a24Rt4SC%+nM@g-Wi%n_pREZGh3smh8Dq+tPq&iWgO(tkh z>d|{n0DcyluL@20qgVfPm73MBDVvBAE&4pke3m;=vUmw$`|hG9NS{SlXW~AAno)mY z%6`c-sBK{copD4Q?$4}$HdsQWL=h78_#J1=!Ege;AOu#4FJ7$WT47vKZ_Bd0 z$k1_3O3On6>F+J?Fr!*$?!9ga_5D1DnXEmtMHsL@z*JSUI!EHzAG0y?ATN>78T}JE z0?qXy(iws8iPv1;$9wDsQc>0}WJUY#R){X?_F)~i_0pj1dKc~5d()M8IN+x+DB%-S zNAv!dr5C3PEZC&Mc+BF<)IZCZK637&LC`PkJjBV4RpPxJu|*g1r}S${f?^1X`EKa% zSQRa;TqDRzAflK*yCpEmF6wJWSb}5$A&)3uw6pb#9~J7*vS2=6IDPar!hj}Tz;t$2 zn;#85cw=d3+vh+F>xVY<4mR}ML|)+Jh~srl{hD+(z{U&{*eMn8NOkk@w>={r-*cu{ zu#M=nul}%Ozfb4E5T1b&|K>SSJ4gKU_ZaZ;=?rHH-pl7jb*MXyFfB}cLG12RJtgonqpJ$B9foD5`?L*w&-X_zkSe8&td>HjK0*B ze~crU+j;yL77SfXjVigQc)t2aGux8oU^Y=H+iDLxCYwrRaRzFD{D9ioP>|Qity4u@ zKMq#~q~gO&vb{J|rdJww^-A3r(z!WM(ix~89!^;!B}{g&!XG5ur-D!&6hF*-%0GnO zZ)fdR@)*G_5gL6?jy8QMEHL}bwR9H#Zw6?%M-HKv`Ig{Pc9UAyP|J6>u1gkH3*FYl zy;2Quar6!G7N9MSn+)k#m>(r58p4|-AnNzU7+mA?O}&ySHbcnRim;=ZvDS5AVrnZ% zJ-A>y*xMf$f%!%X@(`aNa1aqex6s<)OU3y1ifE!TgYC#uzL}9x7n$$>j@h7`tM_&6 z5mgUMum-*4_6_6Z{sLU*)h1eL*yCOmvTTppDm$1-r# zxk;alTf_IQuT8)RmQEhR`Mqh^s(r27?-7}w@e*)*F}QNnE{iD9yG@|@=#>SmYNXUD zT6E=`)J%KAA{+XAb4Qgd#5E-?|7oU__sG3qX(oMr*@kH#pvhV zCC>W-_=#p+j0|M1#5@9%}UES5AHSM>&)1@FG!=@DA99r&rQ4t z=a`-i%!y#c-qXo2>Cf?=HxO2^w>rc0PFf|el*8YIY}YB}Xg@mDQ|vFmS7be2IOqog zAWZ-|-bn8?yg%6}xpLe+Z%Aja_!J5u0X0a)c-pEr*XBKwIihfJqaTmz6JAeo=Gz7% z%CFs4i|WU|9L*Huj37GB^inSQUM?>xLE_ul=xK+_8Y?XBK7&2Nt4)^N-81R_n%p{h zD&g*N*2?#+t4H-R*WciEgNEKC@y_4hZh*reO$)v0;#;?J1=m6|FF3f8P))NboD`|3 zqe~+3q<9@V%XnkM{fbL;7TGVT0lTQS4<}?FiA*vd5gmxE?&0jMZ;9A(XtgQk3EzU-!Sv5jNc#9Zh_6fKBv_V2ifk zuOh_(bw)5-{85yp_PYj#80sCJgOV=8BrfxjhIcY!7xc+K@$b;SV?+@}dD%uivpr6p z_Yfg<6E1XL9=<))&L8vqBmurIA%5|*AmKx%;^mb>1W#O*joDOV@p_@CxY4a&8Lk-t zF=wZWs*^Z-6(u`GqACE4 z$+h7wgt@xw{)#w}%x&Py?1!fXaF-hJpr>4nj&pRygXhJm$jc#SR5#+T2nKNy|512S zH(ddQYstNeQcLlwt!zt%SuGd4Vw-K|{=3WF3#Yy#OJ?05aDo7YfR`S=&%4IBkA!g6 z;SxZ8TMwp9)G^{;Qy_?$<*L|!=Hf7iL)bB&=VNE2kUL+Me_F-rFs-#PrU?^D?ILkF zjUm78E)n)>xzK-PbYBcvWTBpE==-ZPMPhi)6!f&?2`sV!Na92GFoxT}mr_5|Ps{I= zukYf-cRikGJ=b_qg|m~0^c^M@?H`0^e~8b#RhZceN$UFa|o{pQNpT``o@l=`(%vs9i$uuA0j|FR1Ce!$OeTQ(mj0)qoc zX+`FeK8Cm1c^Aw(gjS$C2q&3WTw0z#5Jup|04afY=nN0X_X z1g4v%)L7Dz(A8g1_)Zg?r@qX(d$tV|Vad@sTg{K{Uv>Fgq`FWQ0HL-`41RDKDqkzs zQVZTHqZn<$=7B0AwH19(X2MTOT6^t$N)-cfdq}Z*cUGYqaIdymZvaYp>$9oSUpTk~ zjH}2QC2ln7OM=MFWJb*1`W=nGeVs~|{aNrP||lT*R6fDHXBhL3#m@2e(ijF6G$IKhR=MGxWi4u$j-~ zF1HFRctM8n^7zaqxdERv%wo3dws*G#?bYk@NAs-5t^VfHU1Ry9-JfZ5G~mCD?uDB0 z9`x%dcAK7n6}Z?R=OC3g>aN=LRcmN*AD3+yM0-R10H@GUH0IipU_Qv_nx7WF z+bNs-yp2jFA+sKZXCjRV&NTbukll#3nHb8CM z$5C|W&tmRl628^Y1|3Fa@twJKSc9N|Am6v7ugwvD2Y&Pp8ScB2^qqTe1*#H}`_hi+ z@;!lU-Zzz>uFhQR1iW^Wojd}h22U#?0S#I6a23&PUu93vtr-oisME(5PS>d%U zAfq_|m_DV+p%K-iUes==+&C>xo~PIUbbI?p(K#Yd3gD4*XP3S_O9LHC*rer zFaEie0q(4B#zbmsNqI*`T9ap|8rQ8u zR~`2$!q*)eY15m(rH#3?+@PY>(Ri+ylUH^I2zixb+TpInmP625UDxu*nLH_Yfh$T< zzVMCBG0*TCgP_o)Cd?8Xtz=zp)SiIrkm}s6pDPbrE{e{m@JOC*x9*WUyV%2BUNxhM znW1D1@#=kqSRA1;^)6rAeNzS|#f?FgAk+e@1x>h%&0}kHZGgzu(_Fc67b6ku8G3;` zx7caKD5w=KQEoUkgO&89tBnvzX5O$WX%@QxD;(eyJ9KU?U2hmLhYDm&ZAlz{o7&>6 z94_nUmE6zy*gjMc2IQfcQA0C`@qEccAg-fxWkR?v@14m>@j{{g6sNJR{mkjWip012 z3WPfm-KZ#(G1r@kZuQC*fFZ>hRd;yO@$03`y1(`Q3j$$52pt~&Zl!t%FbkxBFTKH= z#)#D@nZ@xB)?>Sr#f9_BT`2}mlN( z-kYY59b?Eui(StqrL^>odk<-xj5E?y>3)oDxnUg?K<8(tS32#nJk=+Pc82f-y}+00 zYr)aaUu|3Yrh`}!&*?EgeS##%!`4@_l=q@_(TJnQ7QV$#XuseRub)k@xW20)F%@6P z%e}2+G!$Xxnpff^h?Xn!@%q+8wJh;uTY)PQZU$)#fD&!KpBA*3b5TMH`_2bifx#`@ z#AY`h-@#U^Eov8KGDniwe0e{qL4J|WDgGkD=628f6A-kY{A-30&r-(chzlPlwHiwwvbn&z;h;PAurT6P(7my( zVM}dY>BWY)Qt(F{r?eFe4W3vU19x_+ryGdi9C}m+2LgzS63m>aG1X6W=tS6)&a35E``(W~+faos2}OJ#m0&+GSi9N^S|>9EkFuuv3E=7}l!BqI9wUu< z&TNO|CddTynf>ohP_6Q%yMR1uJP)Kv$O*i+Wc2Df8H+v@*J%J+;KAHZl%TW9_XGIB zJ_f-<1brG#;xQtk5<_`7R#LD#Qg*a1akP*Q7jX_6Zk}4xZnQBr5d~-U+9>1!Qhm-t zylmBu&Vz58@-5UdUUG4(U!lFb*sd8<=yF(2;2Q4_)a#9Pq(6X&{|21vvz#@zD(O{? zI{nw`#?;${bH6`A$4P-WFOvioa2NYr1saFrXWH>6n&3fg)A9O0r@IPWJ#Z+~ZuAYbMI2j@&qIYYGyph`7nAcV>vyAg>LL2`a#+n0s zu;fXN(f3a%RvQDjDmNp9$NecG@_jgN9({OwjNL`zUHCqgQ0 zK2SCU6t5YJcw<}}>C9(H7foszMA&hlHR~28s0uj8+>C73&w~gqEk+1syn283t%<8k z%X$32Kqr(WYefqEu^X+~#hn(>#@^|xUsEB_-RJh7H;}K7q}{afAvT#(r2d0M9k_Se z+WQg*vz1d0hVB0Q_WjQT*nK$-G^lms?Er=bXVPAo)U7DpbXRIX!L19QLtGIf4!Sj8 zD^VH-`rAY=2nRUlmrz!YgGLl=?Cd>$Os{{SB?aUr*HU?Vx~gL|-e~8&v}D`(9?nB#vAK4kWB68J% z%Lk8OIH%JAVOLa=T|R%xF#uC0DFAmVgQxprXgr4BreG;%564?wQi8zoL;m39LY2+6 z_%V`&VNaDMO$gGw!IKHwu{8lQVF_+MxntEAHG8eIfqbzNE${tiv_JHWqyI z9RjxrEF?uLrrQ9nad$L0(Ti@x??V$b3}W}r5~3%Q{lzT^=4!=FTNU7Zv(MO2Ue?+s znv;9BG3@S)x-D~fZM2x&PE>bhoJ&H6gol1mvvo%m@{A){al?933c_u*oEaTAOJESg zjQt9vzxT!I-OuG!CUyJ_ztMz48L0{CPv}57Ojn4FcY^H~Tm!*0=sUHk!m^viUt zHA*Tob$3chg`S}@f*&o81{WxQr7tg^J0*3M9%^Qo*;ctdWU9h_o=A_4anrUMV(p5v zWc&O?BF(roQHWw_5$DkALoG?0u2oFu+7zZkAT@GGS5**`O7&Z59qvGcZURxjc{(H3|9;4V*t%hBR=1x!+nHJo6Mqd`nJqkmDLZ$=Z& zO7Bt*PC^-}PB|gTIY&+C^XlMvo-o(X>#pr(dVU#Cbog=H9t^R!N>2RR>Pr1?CX@IF zLG2gf?_=Gw2+A()oS_T8F7=ZF`l`j$`|HW`bf%jp!7?AmvU)KBJdWWhy?SIQIh8{Q z2Nh1&JJk7ZBJO)@c9ayUWY_+Rp;N;K@ruZ@NMw9#Olmp_%r%g^2%Cal-H1N|c;Yd! zg1>SS3R-5YgCsc`DiXUNi^l+AFfo8mdswF zbe7~ib4i|C$(=zAv2ry&BG93?Q{r*z_+jJ6C!tR*`a0|XAA9c^*3{Ouje;l^nhilf*eYV7OYbU*A|N7Cq(h`b z=q(U}BA^rzQHoSSKx*h6f*`$wng9VpZy^L}0TRer=yq@S`#kRN{c*1AoFCu6?X`7f ztvSaSbCmnOr|jCxtBoX`%LbVBE}2K3I}4ZGY*2h&f|VVYqlD|p29~cb9Oirf&KqFg zqdTmzwNK^Y*>h5}8yDspM6AY0Vm2`@iL$za2^>gn5iI*Y5TN?7FW z0x{Y_>I(3~Kykt0j7nLJU(<(HD%gG%a-C0qdI?Qij`|oBM~__Q)_stWxEmO2&uYS5 z?kpJog42){T{Y}3ieWVi1EqHwpU4#qhih59T;!wZHk?Mt_QbMp7T>c60r}Fveaf zZKU$2V>1A^6}-z3Q2A#{KfcQ(t;?9>_Ruunp?W2GIcWySzF(ZXx>8mq3E7~6OR zMTRRD!m9n8bKwK&oC)Mj0U62`^M3v{bcRTBUqryyH%j6wqTn_r1pq0M#9? zzu6=aEM5zusKwV@4dpZ#S=n;Y3MD`jn)D}t`IVVIgOHNyBr9L%s|W0ciNEFyz>)~% zhdJ1B($I7Btc1s|=4Qwj3a7pnxy(`*e?dibRxTcD1g)U-bYT=$Nm1tj=Lj&02k%jxrYp zT_fwR$$=h_W2bJ+$Sx$Q)5v7A;Yby6{}coOHpu{i@&{#{6-u-p2P*4sD#QRn6zDHS~|$qaDp0I27yAO2oYeUH&Au`J&Xe2PV!cMkt{zKTrg7- z;8EOdj^<|h4;7z5zKPBF1>?UNC%0j!sw+x0*3 z=7$;bYf`8m$l8qu0epmi`Sn*^K*|(MEBP(|85clF_%!+V7ybt*7oe>3vWPXI~!$;zL{DyGq&k93j|Y+G&oB@NZnAZ}bibEmqY1^bpA5G8)2$pUbb zgwAK)0{-=_d#i6wd2RN*Y*aT?BL9}z`(mG1cI%piXx*28(5rrB|Ng)AcPLX+L#gH+ zIu7jw82{$GfG?j2^5SzITUMxh>Isjhn=iR^VrA_2xYf^oq~`fls7S*>|KF*L=8@Ea zHzAPzi@h5O;@4N!qGt${z4iX9>UoeIdZ~=5FRP8jBuHC*a$~m6hcy||CEt{c2iKl^ zze1FaCSzpHU(m+9X=7w?6#A7W_BUnz9RscBZs0~Qezert2|e74PdJ=LysLh;=!oJD zCiqa|&dePzc9V?5<>;VMYd_`FzyiLF42>1Jl;Q2sS~JDiYK=82H1>Vc3v{yk*MKO? z`VU&%UjjO9v@|1xzC`B@5lgnBs%d1v^{I^3LZ6xI3$kSS1)+6D_1)E!YKHuV_3cQj zKYu3#Xhyvu2Qq|N^M^gBW#HZ=5#*Zfj{^O;Ux{mfOMPVC29$AJYCo?DuP#V{o{n^Q zOjloKl#u%p?J9G#mSX=pp7QlXp~a|;rpM?`NiQ*S?o&@+2D_r$jAlk_HrK|>ckowI zbHort1PNQN+yx~&W)cTKI_qF@PrKlKpvCccIVCwh7FX$Ot88~aVBwyNkHe{rYTVyf z%jW49=X@)&Y)w8|^RxkIup`WEzlqc-@a<`HvN{&MI+RE0Lc`w=<>3$+*bV2-BFlCz zURbnOh(#P$7FCkj59+V&EW6f* zBtYhMmV$S13hT4E!W&9HQa3)x!t$;*$U)p)?`}JEDhRvDC&@g>^Ull5>?6MqTpiT+ z0F+p$0Z&o>y0h`fadYSb#{Tj?|Ng8ojNh79=l+hmg7R#+Q@VsMz~2B`x0hU2pAx>M z-T19VzPF+wf9W{UbY!mzlD54)+>C5{QNh(_uAazei^>ykT8K7R_)s~Dg|^tC36EsV zj=_bSm@z`|u5DjKaV)FV65?9`-E>qmdkhvyaQ);{wSu#f zJsl|QDe$2zq}HtLpszhyWPeGowo?giOELV7QJzNR}3nHweM=PM?Et%`DV|9vX9?nb+vN*G=@s9OcF=v!_X>&l4o6=#@Pd zB^Aq@_+JLNBrwrivE{CfI=FPOFb%>Kr=`KZnZD}=qIGGCG^+Pj^8scuNV^C^+Z0-&BrX@Nv{MNx9r!cIjaJfcQk;EeeVOt|HpO`6pks5up?5Bbf4VV0{ z8f-Nq9}x=_Rm10z2#U$;fT8Pwd}jAeqYB{dFAt|!v;((<5O_9A&JA|hQHpHL`&)dk z&q0frYBGgh4FaV3cc$!0ZVSeUJI1Hbew7c9>!pvPr~@Z%pM@AD;0VH5HA85NI(`dw z_nlD>5Q~DksAoHD#;t-o?az)FMA{|&cxI1{b0csvDJuBJYC_kri%pkn4J@n&Jms}4_5C~@6 zu`N{_+WS5Fp?Q&GS3t`KSM#*hoVh&fG}Av)GyX=B;C?ai&bA8W0m3!&318*uzcl8X z$2^D;U@a*yW{}`Hrf4(T4BK5}vFOSdnMN;sA(JsoW{W0Vs=TGb9K*uiI5a#2)#u`u zU}Q$m+th7)c%2qRDWkK5>?P)YR-9~Xn3hX_WIdDLV{Dl`orLu>|Cm(bl*ha3L8h`}WQ(!zKZ zLMI%=%yV?jn2X&#dLb8Txp+f_!n_d(>%qQDEz?bE2orgCHXS`A0bB5};DQL6dS>hi zeX#>2W|MU7{bZ^KhV2}VW?XjF2YdrGFVSYQm;jv*a5Fl!mMq-Cj!3m+wQwJwgU8r+ zXf+{Ut;t_+KL2{>!>B6B$!uMWgND8+3x3pOtVn*0A&+_^kKmO!P5RW|lG z-gdpND}JU&WGr|&PSl}#$YUrnBgUSdU&$dqGW1wO+zH-h9|5C+?tYxqtWy!5{*?fo z0!?Ve@RerUsP|O#F8&%0EuZ9jprUo=-@W+FZm-G(ndK`BbmZvyb~YhD+VTUxNd|zy zo9^-v#bEC+95_&#vWGS+;!@DrBkB}$&c+!HE?9v+HTlZb#|SmQJAd{RL`egV4pRRB zRWq*CjoPiby+;NBmha@m3Tra`D<4FGxZUdwjOb&PZt*R-&^jF@&X zE2-nUmEm4{XB47x4Qyt6&6e{jmch+&_AA&AFtu~s8Mn@c%9{Bh*c;*tnAU6)Tuc+5 zE-CWZ*NX6; z5-^tJzI4VwrG}RvR_}Hl_CJq?Rz;xjdQ?4^_{j0gp|`UjyES;NUhr$_wj$%-Cnmr# zKrjD6tnKNU+8W}_es+@HeDYPKYmG++y~ncbq%UZErKlJp6rqbdqnKGh z{}7>`D1l?ZL0I)dF9p1DIy=ia6#i{TK_Hf0(*Y&eHyuml6oYSX<$J3li)6gcborl0 zr%liU0@IA~2+?kR5&}m-cT~9LakZwK@QtEHx2wWCowin~FfnVPz^v98O*7xBRBAQ1 z)8oHf>$`%QuATKH^Y3^{w6uhAI_7AS=euQ8)vwpOI-+kPWde<-Ho7l{ZcU7MGC<`4 zG}FE7Z^aywPZNqGh4TV3;>`+y+ec?hZ7&1JTD#4Qrl=;d0$hJI?QajMcl@-Hu{V(= z%gk<$QBN*BRoP$C_)ipa(R7K``9|+0Q~DgmZ?&ZR4>BGUI@!1Oa{x?x|9{F;{30FZ zFXGX|*C%4gri<1^U;fGfWFPLD56kkV$GqWBYqrxrA;Zp{pQaLt|27l-767a#UjNs} z9aZKNt$Q)ZaMFmGP>G{JJc)x1d;#wO-jy9>?wqo5af?ew>F@IufVz0kY(Jh!wZw?2 z08%yY$-H?l&#ux316}h~cCokd;8AYqm@Klu{N`_yp1&UA)v`6q4s@p#3xO-#>4SjY zzCJ+m6lwU>bNr4O-@ApWw&%Z1f&rlLo3H!ZwfY8~6FWWXQ)H>o0!SKNm8AG6lSmHg zPoDh#ya0$#`$`%|l{r@h5eCkL>b@c@wF;N{#xj1$_{{&grgVVjI z_lyisldESt!!|HX@h6vHU*M^L$8Lt`R-7y7p%f7?-$ZCta9l5ua>(S2air^V2lKa9MR7q8=pMUhN)|?kZc_Z44|de28`9; z7dM8;HUEoW{{8yri64Ww2~F&jGKU7*)*p5(hNA(!`tcwF4Z{BNA575R1!sT~9$M2Kq0VGt* zw`MNEvN1@uPnA@Wmjh#tx;V^@WQ>AmC(hD$4@lgPX-V zTl-Fs@o> zpxY+Ww*8;a1L)Dz(4k-{gPBJ1_FAVmpy+J0_U`>1paz@)Bp6er*&MRu>Z8dao+2fG&)IwzF16aCR11xL3O;#wY}lxu3o5>( z!VQqIkiP$BAS+50t8Khtru`&p)uVy8DWy_Ft>FrLS2WXAg#!VdUTieL(>1EynxsQI zjRQL(t{9SXf-WF+uZcwqUTI8>iM3y5OB2IORb2E-%x z@ZO)AFn={5f@wWdWxstrr9o48DRR-VQw-g5H6#AQJ`j@wy50j^A4a-^rCWvwr4Ile z+EfV!#C-vzZ402sdW}~*=OyY%iOu^F&#AE4Ek!1|{{GxYX~DuB4-#Cuq?*U=n7HH( z+_6v(7cjlk7@&gODTLqKCEdlobuo#$9_E#zHaOrp%#Ih0yGcO@*gC`NYxD?<#CxmqJ5u5 z++BO+|7tLlJra^tZ=|n*NnvEYsXl%YLdA2OwpTdb7L^mhn^#^R^N_o@z82Ws!Tx;> z4mH7Rn*cTf3EaC%YT{0@8OVAswG1d=ha}=w+XbV6Kex8M=QSU#bZe5t!YI4xK4aBh z={d@gJhLAPn9yE(+jGt9CJI~Q{!RUpK7+M@AW2y40E})6u$bwkB4m`I$Ea4iy;0@V zcNK_zYCW4fd4D(3Zu-+Ey1X&j&o{_VJ3d?rxEya1c-Qhl{QOlxy6pGy=8+j}IS`=Ha`TPkb!j)waRsK!4 zrNN#U#nGY=m*g>k$bHG7W+Q6*oW%)RE&q~0C*TqyEO-{Zh<5W&9Ie?L=B*q}xkKl& z3;`~n$Dwl5^vl2&dBXGpfpV#gZ>>HuZi2i4J!}h7!@bl_=<+5W6SoT}y1yDCtV=i4 z;cO3=gP{rBtoyN@3E+F5PQdANZ2NRM3_f(0c73Cks-&zm4WmLG>xK1-};WnvYo<47%%l8KE; z@T$L;KJ!5OkWg{CX2yf-ru7GlpgqKCL^rj46W;HTjs@rIxYQ zR4<$M#Y}}t0~2+5mK#h(Bp=3@c5=^Mu2z6BJt-A<5KzK;!)-Py4RQr~Oi|TQ{Anwn zA^`!-un%TKzTU0mEfB%U{QEg6B_BTg{HHe7Us~2#9ki@RKe=P;_Fr;k7$9`>Hi505VRv zzC#WST%6EBwm3}AEeBu;mAn8Fj0Tnw`Q*ck!$20+s-`#X(#IFRZ)RdoUuKLj?R$4C ze&h3_|Il%LYz_!I+7c#>011<47DoK4M>?(+-fXy1@0r4R^5I9$w<@N-j)bwAG(e=B zXS=_V#h9bWr#CIy$R_aqlmT>SU06A@6bR^6rNCWB1419>CmQvt07avhtw-?m!cD^n z&zcC}S}c3+&Rfu@c1pVvW1fnO{h6RV_Ko-#T0;03mZj>hn}^RmeXA!0e}%hcv9PZ+ zBT_sh+}R=>hB((+E~LQ9!v>!)N__SG<1(osfdCHdMsF0~w|DQqr$W<(kjK{l={JS! zCVpOpXhV+#4yUgtc$AraFZ78(ToR;#y9|Vh9BV#K*nR1xNW-&=Q3+|UIl8?^@(gAA z)`CsDba?;~gdMjCS-M$PH=&Vj;Cq9+1OvY{_NoHcZ@flv0Sf%WHJ`($9$NN))<_?# z51{KT5JI$Agzd~5DxJ+chY=bd6ky=~>TGm)&TBT-@)JN&m@RoI4_UWDZbUkMPap0X z=ilfxxac8zh=J|i>$cqSAG!4sXWCEUpkLH>S5Q6aYV1Q^a|s{_ws4zub;*^tZ{;|}*(E`p_xOXPtpmxlu?Diu+o~E)Rh-d*Y z+-gWiqaus9eVLJVkNeC3sk+~FB~z$354)Jbb`~ygJTk5P1S{t+tdXJs8SZ=lyvui7SH>Na6(0Jv|+J>QP$I^E%k;=TuK zPld;{1*(<`W6b>~sZ|btDLyy@`Sdb3hfg>QV1;4lc7AT(! z|7i}7dZmq*8!KWNri zBH8%S%E^QGwEoDsV3xAUN1tvud3ZcAsC4`2T)Uls5$Pn05G}yhJ$RlGp^3zW1n$V# zL{*C}!MoRyvTKhA0^3pa#JD?lFJS3(mmueMlaAKCY;^6`cc}PS<6RX&!p|vrEH4Eb zIzBU&FI{JsvFN!2!ss8Z8&Wm9V8yO)az_6n^1`#++DD1o#PHL81G&1?I+QaBO5xPn zbRX@-?BF(;#ktXVE&lXU=8~GM8B}1b=(RtFy8Dfo`|Uh!;m1P7Z7Vo~iha@PZIXY; zqJVja@8sYj?@TQ0Xx;V;s?e7&b>oiJ`KDwCak)=f#C!=$uosuu3Seo&9jyzKI2uQ+ zGokA+<6VEkoj;({CPF#sw|118^~8w+ab~|}zJeDN{V(V;+UVyiZ@x9K9{E4~lT>Qm zMeVzwGe(0IS2Nb9V9k6L+|=b?cudzgS^u###s?hb&r@BzZ~>LhQ1^&Pi^7MGJdLby zN{~L`$8L90obc?+nV3fH`J#I!+3vO`u{!P#YCpWUl}dsF zG|M-as3cI_c4IXD{6ZnQ-Ift`+QwIrWEHBe%Lh$v-~KASg>eAiMka#i!z7x}CFlQr zq4AD&RM?X_b<&fcQ%CZ7;jT1@J?UD*u>V-(!m70hf1mBvfN0K{c1=p4xpOx3Dz^IG z&!1)6-L@o>hHXi8Y;Gg(G%HUmvKi`ec|TwdN`U#Q_z6fsX5R10uRG|bZ){5Gj{f=q zRPmz%7{(&3f-rhsT6R;`?)AZOpIcN)w{PckPFN=4N2VjIOrwpLY4=wA*?nhY!5t|f zl@X++sgcu~<=86sFu9Bm7l{}_BkQow|Lx(4I(rrf!E5oJC+8j@Hqe7>`BH!HI}B@e zQ6fvfY!!59yeB(vk;3VWg?6c3f5pC9M^iBOBnV&{R8O*L;P5W_mA_gnwPY1qo|qcv z6eYYVle%;=;cio+e`n-h7Q9W`!BuI<5Da`d80r97K3qiviNNJ;)Im)%GB{)rqGP1; zI?hQ6A-~CyWAm#GQ00Z&yZOtnrcf8)3%ve*)1V}uLUxJWb#S*gp+r`k+Vd%K8Qi~# zX-?GXL{;7l9|46AmJa^&9n0lQQ49cSTdBFUarXYty&C4&!k2+Np3_wpxFGSg3VWV+ zs5sXrg1}21B2^O^Nu1tBV){6M>pr*n4^Q*MrB>eb$m(T)5EK2)FlH`)87iJ}XA2|& z*`9%RYf)*>4|H@-rN?!JJ$ZV9>n+|TD+}CRgf!B4c21&C)fEQL_{Tby`woM<`;d|- z!vRXhp!=Y~PgfrFWp)i3aS3Oy8MXnhf3U4QA?IEB4C`|WlgDo_F&{x%X2;NC#DDc_ zDzrL=0kS4{wNLi1ubs83y4kl|vHK6d9N#&MrTClaBkHIB_EG zQ{_#ha1av$WEmT@WrM4YX13u@oaa^+Z41o3M8oIq{$g}7K~z4ZY=tTb#M$Rk8{5b|UE*~f zs=3G%+Hr#KSymZKp_)>k0^Q-MR|^9kA^gUCrJJlE$eO1w_piauMD;BdeV89rXm6C@ z85M6?;To@;agaD(xM_X%8~NTp7BYsLnw5>3nl%l0Sk$Jc8DvfOOS9cDD0HE7YCGcB z`0gjHd_YL_zyYEA`n9@z_-|j=XA7Q(4;44F_Qf@?UHr%PgWeuAu{wCrACxQj2Fo8hbg_frs&K#i)4$d90VL z!rOc=Ci>DkzS3JscMb^gC{qP0-JnwXxl3cR+AG3w|G!^YnYNpRnz_I+M}KyJO8~xy ztzCvG?6Cr_2nS5 zy~y7za#yqAz&Zcl8xJIx~`eEgV+_YhM(`E`zZvmve9s|Bh<-Fa$TaTG}7Lzniprax!h- zE|n6wNC3zOXQlmHY!N0U9qoKi~%>%-!#c_iHTwnw{hdD(;DakMdMm-hv4Xt)u$ zLRV+zyayJq1h1`h__7PF*%%poV&SCdP}RBX03;lDygMGLR5PBFF~)rk=THJ2JCuQ)KnqOnjbmWYiv`(O*t42FXo@9qTH&UDQ zx_3XMTttTJsr@DZVj-V{3o7Na<1!4iU&HqEUdWL7XtyogP0^vUHSCGAGMD_} zJtATT-^5oM!@f&^kpw;*YiNLMID?{|G%0wD6R~C0L4vi8p!>MyF6nHh6N<}~?PaLS zRvs5cPFQs{JmN6te3L-FAA8Y!$}&++!Y$Q(q(=dLcPs^Dz_`?fsA}Uie(W zVj=+QVOnL71P3x5f){9r}vWe)_rDvwmNKGcr|Md_w+GzE?MxTxUPx7NNIFI}x+ zN?=+nl!K(bkB@V{FX2As)ow^BNpjLuo52=A+~(ugNtJsL%8uS9DUf{!wlO9u;kS$M zc(ET^*}Z)G|Cw(6auA>z&J(fN-si5v*39h#W@)IA&x_aa=;&K%*P2shJU01td*-eb z165IdRCAhNc5~L?U79pK+l>z&Z^_$K;mNo4KWd48zLA^E&dJMN{_3)OLtb@ZwJNg5 zyMO>2Jj7C54d_d3HQT6d-kccozPtoKeY~GdZY9ovN49JorkMu&dOqm!5}b2C=-59# zWX$4nn&&P)U$N0SECM~JEXNgS=u^N{yLj$nlrBG4dem@!IH0Lbd~>9|-~XnmcFl$Q zfIk9sDI?i%u$R%*j>VFoBoCu)uO0Hv+D5z!dFn*axmlu!Bl)22;<4O(xvf~sHso3I zQV~B=>f6`VLSZ`oX{@4GTt?F7GhP0Mj{#2zwjI4HBqJ8ns@MJdW}Mm;z8Uk|u=3DU zOTpD7~d9kZTZ+@RtK-{XMK(|2=n^DS=vU9ylHG;`EfHO=sgkfd|C+_ieM z`N^RV7hlU3nqxB(7rAh?^*zw+8PD}&yv5aBV(71lTv~=XC8Ogk#&%vzfB<9x{3alg z()(O?)OBX=N;NN^t*p~vm&ZbA$2*zHK@bQI)bWwW22%EqT_vp*Hw+(?b7{*nOh+dq zQ_niJeZVG5xcFj~2XSg@!1m^7cnK49wm7WoFlB3Nir3iI2j}Ekz49{k^=kXN7iR@h zes9glx66;ZcJ74{FJB)d!NaCNdz{fQyCnR0jv|~X?9?XV34dp#+N`(YZOe*H5tD40 zjjyMgpF7>P0y3;Jjk&wt|IC*EPV%q-(4^2uqU#ziji>M1`SF?PT~HVcs{9`2PUVOe z#3$OUq<%*`ZGO*pcc5(V^@xK4MK`zMyvgA17BmdL!c?{X2$8@*1=`yX#2(Vi?VU-^ zMaTH=1cS5L9L_S79AogUHh+&^hXZ`t_k=3OKaGMy%ro5ed@?G2rP6>O&SuPRn123% z>2)#ars&bAN?Z`qv%E!gJ&;{b9*~CCk~L0J5Bpd~_(HV067WPJK6yQsfz-pgIbV^z zyRAGv8{4cdn_)_==uO-N&7{_F^)u^z6jyZ=+j@0bPhiKcA}xu0ZPux|aZK}H*W*i) z6jhSjHUpTT_}YoM2@@$5@LX@5gWy;7pfhXOYn)szi`&bCeS)F(Xgm9Dl;R;dtre(w z?ddCc$hLR1K&@FWxCOH0Z-4dSFkbAamgv)K^tYQ=?z^4Xc)$4##I2Zr4v>3i#$=|U zktm=a&rce&FB`OvaYdkPCZIr9pp*~LnvcRag`+uqo6gSIlLwVMm)K(m^s*Y&BM%PM zX)@uzbxaP`fY;|5iwH0leU^Lw*e+KhuDaiO*8V1?!Bn}iCdVh~$j<*C~6i(?I4IJYz%u8glVJqfIN zD_iEP-1PYCsI06+x(Oc*7H9x+K7iHGueW3FgMmsobKY*nsY=CY`Lf}=tV|sJl17I4 z$HF(*YDpVg&5E4f9fZ3q4JpXJ+xm+eClcOeUo2^s%vP`2c#o?d9?rw!lC<~`yAdoS zoQF9V#0Txz(dLo-qp!U=#`7w7CkEI2k+7{)nTZ?~L$tkzv#ou8Ca$vc_41a8pwljP zo2S(NaYEeA5&<@SbU6%uxEZCApw8}40nNts4S!xsmHV|OJ2-xy3o-#M+j!tzjzG#s z%YRNIY+oJiZFh5@Gjy}H!|(~z_9I3GNE{sciFfxakG&3RPdr2A4k4~F%Al!c&wOLI zQ-q>W?e4{IG*T6=@A}I8S`>eNx5DVHkUic~`=XK=eZjjNPPbL)y4^8qpO>NzBsXo@ zpq?tEE@fp$m7#NWeUkg_jf-<0M=%+RiOFlUKX%gL)pVUaceaTmNdNTJ@z=Zo)CxDd zw{8fnqx7LF(MB?|>SdBU(4x+3n==m7nh=H(_W*MnWo=2ctra;Pi#_L2{CGu$#P5KH z8@Ia*oXq{h_!#k6hmCn3<47?7evD6RpZZ7JipvIvlc#Ms9w#6kuPtVRN1z-SXwU|tmG3RNt}8sfWH^BvJnW+oq>Vo0iVk9z9=3~EoiZvL;qo3$7C5P9Ae}zE*~?CNl^YHB6|U=) z!yed6$u^8`REp;U{MB66issLzC$;DEXaiVuT;=*D`36eod?mgTF7qwEee_Fn{G+`S zBU8z#W=Q4iT07T(P61@<_tJJeDtPnR-K5j{f_J$@dhJo|FBzflZ z#bnul7Y`DyMJy}$w15W)!mF#_zq~APW(u~|% zs`8AwR>H57I}gpZkP>~{5v2!|PSE7@1oq8ck;awN^TRje zkCbuu9<;HM8jwmurpout)MdAh+&}*$2{Z2^CNBZ0wc;yinqG|Wp6d^91cqqlXV$;T zv5U&1kzeI~xqJ;)w?d?cjpp>q??iS@(^>`uPLXl@>E zxzE9W_HL<0QfhCc$GYng!(jr;i9T!X!ESKqH-)mc4~ArxW;YK7PH!;V(YinTil-{1 zW70m-+2Z{NCgJuwFmG7eVa;6rGdfQ3L4s(Z|xfV^enYv%SMIW zRL;nJd^l)yQv<3f!`8#^nvZ|(KkE*yAwE_I{_*uTcKD8nANAnH&7=fy++@4so`zj~zk$XaabgRKy7vZ(c)MW5X z*#>kB@A389p32Leu`Sunk!Xe8#tqr23)n25X=Tf8UZ;Tr`AvHBcl8YOiE?B~ zghLhF;@1+KcKLsw@59d=x^AEmf`XjOm}w(=0)rw9dVEM`^bjc2Eg zV|nu?U+*BLe7Do}3FG1zuqCCe$=5pJn&M{7LY39_d-N5LxIW_0@?nk%|7S*;Boxuu zmfsdX@S{z)zq91^DvF!t1F;hQRV?pUlybsdX;=#Asv;H2p=#3@U*GRTC$I!767%`^ zlA4u0Tdv8vW@7B}u4V7$q{i`qRTP`8DC1VVk;Hp%DXX!g_HZYJyMCtjG$kJ09v)tX zE5+I?Pz>->!&RJXFoM@gPXYe9jaMRg=PcP1R3#9$-y5@1_UgX&vpMD%QiPN$Lkl>l z7;bB!KYPs@cjs23AbZW~xh*|y83(>q)A3EiJavqULMR3O%;nY@akUqyu%0j&5q(x> zJ2=O8T})h^B>%a&wE%5j{3CR}@^OfKm?=5_`4n$y0rggoj{N3?%j^?Nmmp4$5AaV{ zau&UBGkdAgf5MA~DzXsvu6K&ovS-vkkGk7TWm^y2C-dBXUCelINypfpEED2viO!A4Fh}p1_-Ao0{r*MV>-RAS}HoxTBh_ObJnvmVcz9)ndo+;mP&pi z`+tg4o>>(ary^8hP4}eG z*tOe@wW6uTQFpzwAL9<#U;ffd3>P?{WV!fQ<=LiN7qbMZ+2$$=B*R`Z=3kp_=l3A> zM+exVV2pYz7{j@a$wX_rR?L#RS3@AgOkK_c`Fp6RF{SLqT{pwnQBsOjO?t)6lDTYo z83e~FgGBcpG2f*;`MyVPj{@{}c^+5Y+aAnLDxZ;tYmXUG>md%05~V&3P@VH-7d@)2 zo4(dYF?xp@!lyw@=RI!83E(S5_j}X7j8LgPU&0-;2JyPVr6X1$;MxI2e(s)`8p6UH zF--t@tklL$Lq*w@{m{(S^!O|ezlQKKd|Qf1UUu*1c39N+{`bg9zYX2dR-a2xzooUc z+fhYjl@CG_yoNS>joGT0qXtg;4T$L>`)q^EvqUrRNR8Z5CiOhMx*2u<5%cv8$AWfD z_TNjQdsAy_3GY?hag3Mr?G=DZ7NrUj{@MfX9!5X#J~vI_p#t#(5l`Fj4?~xlFi^X{ zpwD_FS6S7>EM_}IbZ4pU0W7Z3_(&n9J&tpp;E?c1m|$aw+B}w{?~3D;k?2p=*w76a z&d(vJaP5rUWV;R`p6iz?oXoOqjLHz+@0s&oWzuW>b391})-a65NL%Ob6l9A>-Fv1R z^pL-SO(F;39q(*pC$pO$NabUN)ZBGvBW5^xYG@5Tj#ea*mF|C!Xo<)DW*Dmogy;ZN zmJPM8h)q^V(em170{4b^kISbf{y@CV(YnQwl>6_S-fTBJ!0^os+0FL{l?+EjdtQMf z0oV(Z*2@%lgwjvXs-ek$91XSiZdXKZH7_#e-uEr3lD$on-$a}33!deOk*b@gByx-}?|TDxeP99}M`k_@fH`96kH#_cNP>W%T79U#d*5wEs+(yr)e( zF4qiBJ(lypSBp)IRy}C#dnK2V54cmUZZS}OaLzZgK54KOSNY*c=2X55kx5YVo97=F z*$-3YMCoZgJuU54z~`e7aAtgy)+Dp;V*$u!Vjw^Ue_8r$((}Ac5F#~2i5UIYLfk!Q z()lR6tx7;>p+LT9(O$zIHDW~-waCK;k(%F@R%nnuAcRT*Vs?L{GH@vE!o=Jqq>+kZ zZeJcF2gnQ%A9pzWu8UqaDJwvSMSGy(G9QN0a*8Z3N_kOpJj=XRzN(qJ0IJMDu+GJ~ zd+*O@zHY@F&Ncljtb87d)mqPG=t6559l{*f8`yZ79&4G)DtI}yuRi-g_8Df>!Jv8szlD+p~)9Wv3~iL23e#3jUx=y zUn?rCt*dQIg@Rt%7zt4!J(Q5F@?DtGS$%(=f$&lw`omoEFyGc+huPo>YTYbSIuA}G z&6s(WS^J4dX;?9}E=$pdG<2tCvg&;x%0&4`3<=vwc&HvPBxU+gWF0s8y0!}M@|?xU zBZER8Bz2*5$V;74o?Btz`r7KP;_f+~s^0Dx8wAbbq0A=kOd<$vYgZ^RI6PzB@n6=% zsbc42UyeIlGrM61Z$p-6R*3OD%Bk&;&W>PMWS4zK@EQ8DM-M$t$r;)1s*%IZ)&r+A zjD2!p;3IX5!6|uN2|D^!X7K?Qktp{KE3MWG-)Ci#oPKEn$<_&>v?frUtEVw|vFK-| za{0}$w)V#wj{>+W4NItUZX^fS9C#+MwNP*33bm5UCqB}0U@3WQo02@gD*9u7WqC&C z8kOxM342(-vLy*7<`h4w1Q;IH<`B~%D>*^wic8e2wmqCv#fxPoUjkUd_?ZxwDntD2 z2eeW4X@?vg==EUWAvVr=5N+c2lj_#st4^sKkY^Rq2X?SR<>ett7U)9Z4t$6W*us=ju+Q98dKbMg!^CU9?RU@ z{3BBJk??kCeNmpMZIopvYvw6q5O)fH-6~L?UsZ>sl0GlA-^rE9`h>cE33~TOh{@yh zQ`E7_4W}JH&e+eaNp!EC-l-yg(ps#~nW>yJ8`Q>%>7m>(3FJeP{Ml#x(kz@&rN@fh z<71ivtW{%biSA>hKNQ$&^&0y()G(NkAnPAfJS1NrvRtotiOhOE6vIHRo1|E!_AqHQ z-?+pe0CQ&X)}s>T(N6%fui^BQVeCSq7(kSV_7MkUo zRhhAvq6coSazI155C&ngmCP}V$vfw|y;5gBEb)s@&Uf+Xxi>)7flksxIi8ZvNfkl; z0t1AG9;dH=;fMWG{TJ}f_5(9S9h~U9qI<^1K~}dY@YW6UMKZFq|H`nfZocn^bIGjrE2h;aONz2{XmSGk+g$XKSt2oKjZ}Pzc)ZbmnS*aoF(rgNo&rcWBY) zqmKt@?v-6C#I2@V#G!;@ZECLe>EX#%rOJ=f1%;gIqH57n?7wDPEsIJ4_0xdnfa1I%s7%rTb?`LM+(Fn2i z#=Q1Ow&7Gv(2t^u@1!5aZy#TAJgzZS;Ouw)&~_CW|eN~SyMM{-lr3~uXTa8W;K|-0+-jNCS{&;z&h=aq%o=}0EYyB9>@8x zmh5hJsmXgStvLYy1MBSDuYO>K>p0NpVK)g80Qs%uo&#iK8@MX@Hy0-?=0%Gh%w}D) z8lH-0IZG3-&I&-X^-5_JC?jE#%aRiAQBQatud3O`TzkJ6WAfdGYX_JH~>bu28X$ zp#r9dtjv4yw)>;gfn)M>il!y|bMwXRp4*Y1RlScle9!bk1gIskrDyz7IlVNv%D&#m zy=y*n%I{;%cL+oYV+BE;2sENQ4kT#q&;C%4_U$`re;i^frNCzYH?AZETywxNT(c{? z{2Np(Ty`O6aBcO|GJ3P6ovSfQqEGLDqfqFnF`J~s0=Mr@b);?p4|EcjlQ}O}g_Vtr z<(78Qf-#1h&MXGrK}pp8;J5)z!Rlc|l;PMt`|VN-*2}Ww7pKf{`Ch|QLwroKJKP=} znRC%xwmic%Oj7Au>hhFh3LoE$s*BjGA4*#L$T|!f;iK5i-A(;vPSKOALnRN?giu^6 zn7($u|F`kn791`oC^)v)LI++%kQ={uzAG(;6R8XienJ)YSfU7taO_S$YBV&#XH~rWTa1G7Me#=x}=b-nfGXh3AIR5(m zcc|HW@P4;~bR)NxNuFZlwN1yhJMBIQG<3j2UD-{E(E!fGVt^BAF{F10R?(H+SUOP2 z>)_qeDNL;Pjk6^{_)678{>QJrSx^2^ha0=bo$wP}9&O&8t2U~=el{voWx|@paH^|j zH7eNNjj3pqNwVG^19kAIABwI3dK;HY74UN}iUiy+gwiC$kO#qg=RI0(dp3Hn#11_x zziafZePhNM+viY`K$LGmz~q4DjvaBBh2mmq;GBSQ3a2590|}Vr(uOg4>$1nBR5WQh z0;u102!Lp1%L(4rAODFx}KU;1w>qZWru)CP+UiO{xoMwvZVicf%=oc14MuDEmL zLg3ID%PraC*;4^6Z=Z358t>$jtGyE;4V|mRp;6=X*e0F(lyAt!U6)O{oD5@FLyh`Q z^Umta+8{;m#KmZZwXrf^L(egwd*5zfw3g`0?&f3w6ode1QFTH4Nl}Qt?~IsFbjtwB z2wSt$NQDTiFRN`ykMpH1Thzl<%xLS4I;0W}!jG%|sGi1YLHHrH|bu>;h$$wms4c0 z<+&ci4V{Av4!%huNa;AlJ!0?WjqS10KXbz(Rn0Fq=zvwtG3%AYvp`XMSxmF#--CJwG(SewuD5d6mS|5oFki0%S800H=((0CFzhk4bHebN z3TNPJHn!2x{3T1l@>F;AJ+(~xYsIW@@KfuCXS+Qcb6msUe?Pe3V=lp`8B*M2Hc!cJ zwz-2^TlQMqSQWQ4a<4Jg4o98;UN5a2+&GuIO;vvxeXdEmv99iu_Q%c)VI z6R!Xl=nt@OXsm_7%Y$Y1I#fpX?k0{4CNGGkQg2l-)q1RTP?#iup0iWcvPTRyqv;Z$ zfOxt#rC$%{dgccbtmsK;*>kvyeJ+Fe1}5GW3?JOFi1~pHo3d!#?h)qcdApjd^&R0` zJCBl~$_}XS+T#e-zA|91`@U-XjKSddvCV#mDmq}p=BAddT?ty_lBTIH|Cb%oIOstp zF#cY@^l=$!O_Q(ZemTz08Eub-f(SRM4fuO@^r;PznwK`WnPBVfMiiTe+k~7SRJea3s_Zl! zQuTV1_qEN;p_G#9i`Q=bK*kO6>1U54Mb~l)T7}Vs$`Ux26?AR@ovDJ%g#7ErqI(ZX zd+t}F~a$mp|(HzTME4M^Z zv((CzHnq(?3qe#$L~+5<%o!=s6i`uc8B-xeO+-QAC2gLWXP)=G&->5sch2v3hCevm z=WzJm-;3+M?(67_P{jdX4{(HIPMZnb{TXZMoQiT15Ah=XYBba&Mh&wV{xUFGw@w%HryFxPI_BDV ztis0&emE+utZ_PZdOp2k>Yk7MrEh$|e-E(7H&mLJdS2!klWV$g~B#;eLsC$s!AyIes^47kr#i?76BKtbwgPW zar;+UfDSX^ZSLuL0YvzttrxmiN39+dA)&*)err=3gV3r?W=YS|#CYBpdpkx!f$N23;UD z1A~cC2K@txdyJPygk!v*CC6Yig6Ut8FyfV|KYSPX2yVxCS_J8nH(=2ypWxjabj~bD zz*}(0o6$U(>Rm~~2GNtmvUUtYL130De#J!kd`}d8lF46qcy$t;iwOJu_Kv)tDN7HD zhR&1{8t2COUrWYF@Dr!-?RB|_WrnO_k=FU~VXb^y*FpH9jlNLV?$$H%@;Cr9`RPsa z^xl8|W5K0EGEsaPtP8h%;t1-(HzMY@#WoDmsLEW;##h7jhZkjG@d(&*6Oy_@vOm7m zgvnOG(x+~^uDc6Sin0uOq6}H^pEz;mZZ#M~9UXrN#MmHk{RnI~X=nq4Bt%kk*e^C0| z^1ABV=CkwN7xqGMH1x>U!p;5(`Wx!s2_VmMV{_#VzOVuuNJ5y|hQ_>$di`fJxObwj zLu?|e9w>qR)Ad1B&^+)OcomX8RXSt?t)#*2d@s@U$={ft1~whY%%iunAIV_XFk4Fk zb6pMgY~_jc+-muCU~%er{*_DNq|Z&o4s%H5%K-16jBIO> zLDgdtZ55qr9{_D$y`V2Q3UfloXklJ%g?~FViyHdigqk_a0A7|<##ORQR#vB!Oeqye z8Ye)&DhfHgvEqEbpriZh;DL@tLfP*-P)PV!=2OpJTT*wmHTW+%(Los#)bH*9m-fxT z;os0bqP!GyCy3w&p4Xx(SQDm^3U**Yv&AuX`&T}~qDPMY(xYjLN9gD0p1B5+>ImsW zdvNqDR=aS&2ky|?&$n}lFc?4?6L{d`5 z*2r9G_-80!$~GF0KHw~$WG_@qO$n_2b%p%-48W6Z>OH#Vr=#eYHp$t07NjxX{SWF> z%+Y3catI+&QmI^SH35fJVJ~od9FakQmJ{XLTU$C!^({a@#|~TOpn9hZ0nC`-@J=fK zU{|$lFYzC}uUOhR1P%1N^P&eh5vm%&w;sT+PiE^zGf?c3lBu5J=JMdGZ;bD-?B|;5 z{@*G4DU5u}7$m&{UTcUa1C9CewYcWRe2UMcP~$g7djzv-LDh8I2K@ibnubM6vhOvF z{XcTdyZ(8>|41CSl=K~i|L@q~0Obk!vEf*W^ow(2Or-0ZU66WL|Q7;KY*{j=cm5)J~23@ z1oh#}x=V@t>GAM)G-`|7^a_%dIP>1L?QM0eIGvzfeIEg<{k~dVON9G_W&Lf2ibO)V zBxP8lI3Q#&|3gBB*#voHkixu$<^1?>c8Ha466{%_VABMUTy0?2m#GlcQsdEW48o(xe zHX9&o8Uq-O#h)b)+593-7%>&F5ixP%U@!S5Gx6L*meq8`5@h3$St+s+4x1#5D2lzV zd?*G8Nsv8z3-Q!e;FB0~z}WN`vr)nS;{$sfGObmFsbWskVVO7(&qt;Ce59 z>Bhwd0i!%;!=|k>*!P7GShS_tpL4hZ`tlru;?L>l6juzgf}}FxGhic#d{a=C%V}E5 zjfpvKqJXBSz&ZrfTv_3SlNPhbxCzVEuJ6oh0j$LSl|`}wj^6SK6EV~o9DRk8S6QRb zG(Lz(<|^Y_cWKkd-nd~hBn6q-Li(vW;-EigPSW04u)3XnbFJTWq%c~fzDwGIb`)_E zt%YsJ07Y%gn(z=|=4-5Mf<~x=RCZ!d`C#HttfkNLgb}BRS_8A#*4*eer?8AXxjLxs zKqoeKpm`eW0E^J(Kc=Uh48^FpN(UegBnx@{)sed3O*g#qi0#;@0j;r6nxQB^!6R~a zZkktaZBXp9T)o~V!YRfV(4YohSMU^FdGXSHvT719v1=}r;`8SG(ys)KPdRDaM;+E5 zrYN~kz5ZT3dQ4~bsF>oFhr1)3%+d6(xcz~5*9f`LX2lB7jGxUEV>_jh8!9F~#VPbn z8J$}{BSE~R*9ySjT)<(SyYdro<`Y&-Vk1_pA68O4&dUCA4$sOk&+sg1hNtYA(tdP9goLZP+CX|Ceus$DZ@spaU(?CLWn2yQ-LNeUlAlBvIDp+n!WOzHO@G-D^Q=mPV;xUOKY7T_yy#79Bkxzt zSbN0*>XsdxGWh%Trl(S;dSqXW^|ws}!acjRRDg@JG`PC>X$h~+yT<9z5z4qG=2(DO zFSImC6L=h5b*P68(_#G9GuE#BrY^s(v>uA&@O26Hr6J}&ps02fw`&XHw2yHgM}4b! zIva-^pEw8JYOG*wT83;p>d@FlsIMQ-5H1CvEm8x5u5S%^$uFV(XadQx5QVp@0LfwcsTT;ihioL^$GQ^Q_wu+A*+ei=`HH=f`6y{WGeP1p77Y zWfZ)O1~bVHPZSVgFHieCmC!{I>GRLW$>e!PfyUms z3?yt}u(>6U${o?j34tVti74d`X{h%$lOIkf9TJGZ(|+Qo&p_m=#LykODreVU{&}^L zA8t`5Nv%zPb&-M)ET9>*x2EWOLC^DmTZ3Ai57HDcK&ouDz{{*1{eoRfAqgMBK6%}? zIb%=GtO+w9eMT{Nyd!E1#_5r4IrHw<_f)}4Ep5rBOM;fG1=}R-(yB?ErzeQ-hu<0a z1o%VsSgj673UYdjKlOx$liwLK2y?@8tLr)vf5~gbAMEJ*L-)yQ{X!FHLCZe(lTtQd zS-d?K053aIPLr%sfVYlIiesBK*DatR0Qj);7}JUHZs`q%g44Z;qF>i-4t(NS5UB-2W@PweL1AqpmSgMPJLLzQiVY%@Q4 zbCV!!TYle>g5lerK5#)_)asrFg;^xE z)dq^aQAqtqRVMZPHHB(tP2y3E9L#-TiI7j+oj)<=1yepa|2(i{uX}}5nQSgF7M|<) zvLE8M*m9D66%_!r_bw_rZ0;5Ye)l=yXdUtNi?AmO_1Dt$#u@pI9fT80v;yZ=Xud+| ztMZSYn#3QgFHEQxs&rM_4s!4kUs}BH#^oWY$M*=l;}X=ajV*|9aDYW!pA0PcycBKR zJ>L#IPd$&iSKfpPtMc2u{xZ&ZqFsGiaa#&=rq7X^-(pkf4)BV+;zTy5+(hWHh|At9 zrZ-u4Di24-(QjV$>${;hIGHo~1=BX{?PzmxzM=PM6I=ma-@U|fi#JP^!xDRUArf>B zNc%#iHj^~j4aIV=hp+7HP?T@#I@@d>%$0ZWp4LoD_Yj1YE{LLhCXz4qx>X5J_$SG& zA%kma^x3@imB44{U)ZL}=zG9~@UleEk4^x6>D*3LhSNIt)L$RELR1y7B&vx?nOndw zB5^U=sA%x}3vN(ju#?-Z?r|s3hRDOGobN_tLZKJnc3n60`&|@Hjc2mm05WUps^0hEaxtT@XPQULoGKysx zmuzv_mvS{Qj2sQ^z|>ulCGt&Af3Zduk4oGuSZbT{tdBx2lxbB6BcES)Egpb(^av)u zkWy=Rdw92$2MXAcYir}@&Iz?tlq9rUI+qGnUQYCe#UVqYEL}PEnO%J`y!r9BX9;Q} zni$^k5I|U32?7oF_}L>-NTKf?7luaareq_cwou0pdn?_})0Exj*PCR1%XHL`JHN~R z_?GO{GZW_Sn`r`X4sNRsfvBjM9cq>^r`}*~vaMxPvMQ}WW#~wB-zG_k7S|&!53O=l zcrxRmaK;|6rR8!)r`N&$8u+@q96W;3?PSS06D^TkNL-FB7IS=$qL_ulg-FtiOC_l~?zt zoxlyaa!tEn68#!z;1_!$X{oQM&Ecn_J9WTGzkvrp(D0oY6e@TtE@39%vAP?lD#N^j zXcqj$vu)lzYy60I2x=&;9SQ7`5K*>g=y(6*7;P+hc<-hm$D>_$bKzxjQp3%sYdkew z_gj$15PAcjyqJ+$rmC5)ne2oZNWQ;>#kl)r+kyrT7#Fo?E7$r>o)sOlEZMl)os=t% z_|)AeXf7Q`{qZ?y0yl5*mynbrxAj<37DVfMU zl4{~^`6hNrG3)GL%4sAo(NUXN9XC%EE+4~}X)9skd5l87raJSx0|MtEItL}ZHnGcG zN_Xd9mWZ=irE2gC?FH3t)HG!w+T@t+{0?xInjJ*By6b(HL|@E_s&n|gA`mpDMbtDR zV_nlxgE_8Jmnd%!;5Ye7!-SrNSvrOh;YVBtih+-A^13_>i`n8}zB2HpXvN~)J{c+z zplfb{4@l>^Wx>Foz_kezC8o-Nv?8EVpm+ZI39JKj>U-(ej34ngCyO55t-fOk9&-mk zd%m2`^2@-`ISk=hZDZ7!ZAB?=q|jMHc2&yVr;WaVmS;%lmO+rte-3ap?iG$~;{7Q*U_>oc1DCXOnB z0q2Xx-FCXD^>EVCL0yF>3&)f3*z(@cQa%8_-;%m7vukR8m%{Y{k(I@U%a0d0t2chYc)(olQ#0g`ix74%dwzaazNXH11^+pc#FRVxSTp`}WB z);l&&a)Yj)Ib%*B-;+fVn=9yq|L1_6A3ZZR+!0r zeHN_`CKtOc@Qm#iwUm3510_g*{)*Yb8BSh>>W!DvN$9s0+SBMqeVuz+-oJ9v@37&V zm~{>w_Bnowy%_j87Z^=Ecz+?jBXz?TE_r0`6%vJp{NHHmr-n`a`(?My)j zjm}fkE%>e5xG_ zKw93((3nLA)eyq`uo|S68<}_Kop`=gq z=l>1^+rB;hPhIP43D&CK)h$c?KNL`Ix!3(J9`m;75BZ005p`99>b%o`1CU8%Z*Zvp zlMvqD(_a$+`+v>)o1|7>t#4N~kT$FilFpxB6*ybBIy!w6x2kEg_Qit7YnpCrBS+gc znX \ No newline at end of file From 19610191c8b518385750d490472d8f5024465ade Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Fri, 2 Jun 2023 09:45:38 -0500 Subject: [PATCH 106/290] Optimize top_set_bit(::BigInt) and hash(::Real) (#49996) * implement top_set_bit in Julia for increased performance * simplify and optimize hash(::Real) --------- Co-authored-by: Lilith Hafner --- base/float.jl | 28 +++++++++++----------------- base/gmp.jl | 6 +++--- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/base/float.jl b/base/float.jl index eb30c087c0f8c..0f05027b522a2 100644 --- a/base/float.jl +++ b/base/float.jl @@ -674,34 +674,28 @@ function hash(x::Real, h::UInt) num = -num den = -den end - z = trailing_zeros(num) - if z != 0 - num >>= z - pow += z - end - z = trailing_zeros(den) - if z != 0 - den >>= z - pow -= z - end + num_z = trailing_zeros(num) + den_z = trailing_zeros(den) + den >>= den_z + pow += num_z - den_z # handle values representable as Int64, UInt64, Float64 if den == 1 - left = ndigits0z(num,2) + pow - right = trailing_zeros(num) + pow + left = top_set_bit(abs(num)) - den_z + right = pow if -1074 <= right - if 0 <= right && left <= 64 - left <= 63 && return hash(Int64(num) << Int(pow), h) - signbit(num) == signbit(den) && return hash(UInt64(num) << Int(pow), h) + if 0 <= right + left <= 63 && return hash(Int64(num) << Int(pow-num_z), h) + left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow-num_z), h) end # typemin(Int64) handled by Float64 case - left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num),pow), h) + left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow-num_z), h) end end # handle generic rational values h = hash_integer(den, h) h = hash_integer(pow, h) - h = hash_integer(num, h) + h = hash_integer(num >> num_z, h) return h end diff --git a/base/gmp.jl b/base/gmp.jl index 69926f4ad0d06..3f809fd99d1cc 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -606,9 +606,9 @@ Number of ones in the binary representation of abs(x). count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x) function top_set_bit(x::BigInt) - x < 0 && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned.")) - x == 0 && return 0 - Int(ccall((:__gmpz_sizeinbase, :libgmp), Csize_t, (Base.GMP.MPZ.mpz_t, Cint), x, 2)) + isneg(x) && throw(DomainError(x, "top_set_bit only supports negative arguments when they have type BitSigned.")) + iszero(x) && return 0 + x.size * sizeof(Limb) << 3 - leading_zeros(GC.@preserve x unsafe_load(x.d, x.size)) end divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y) From f4a264a7a489d7f01ed0803e1790aa330967885e Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 2 Jun 2023 23:31:22 +0000 Subject: [PATCH 107/290] Rename LLVM passes to be suffixed with Pass (#50043) --- src/llvm-cpufeatures.cpp | 2 +- src/llvm-demote-float16.cpp | 2 +- src/llvm-julia-passes.inc | 16 +++++++-------- src/llvm-late-gc-lowering.cpp | 2 +- src/llvm-lower-handlers.cpp | 2 +- src/llvm-muladd.cpp | 2 +- src/llvm-multiversioning.cpp | 2 +- src/llvm-remove-ni.cpp | 2 +- src/llvm-simdloop.cpp | 2 +- src/passes.h | 18 ++++++++--------- src/pipeline.cpp | 38 +++++++++++++++++------------------ 11 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 45637a4c567f6..77f1baf6237c4 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -118,7 +118,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT } } -PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses CPUFeaturesPass::run(Module &M, ModuleAnalysisManager &AM) { if (lowerCPUFeatures(M)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp index 6ff7feaa550c8..4875abb390e21 100644 --- a/src/llvm-demote-float16.cpp +++ b/src/llvm-demote-float16.cpp @@ -187,7 +187,7 @@ static bool demoteFloat16(Function &F) } // end anonymous namespace -PreservedAnalyses DemoteFloat16::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses DemoteFloat16Pass::run(Function &F, FunctionAnalysisManager &AM) { if (demoteFloat16(F)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index 18e0f645d5445..cf7fdf0430c83 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -1,10 +1,10 @@ //Module passes #ifdef MODULE_PASS -MODULE_PASS("CPUFeatures", CPUFeatures()) -MODULE_PASS("RemoveNI", RemoveNI()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoop()) +MODULE_PASS("CPUFeatures", CPUFeaturesPass()) +MODULE_PASS("RemoveNI", RemoveNIPass()) +MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass()) MODULE_PASS("FinalLowerGC", FinalLowerGCPass()) -MODULE_PASS("JuliaMultiVersioning", MultiVersioning()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass()) MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) @@ -12,12 +12,12 @@ MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) //Function passes #ifdef FUNCTION_PASS -FUNCTION_PASS("DemoteFloat16", DemoteFloat16()) -FUNCTION_PASS("CombineMulAdd", CombineMulAdd()) -FUNCTION_PASS("LateLowerGCFrame", LateLowerGC()) +FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass()) +FUNCTION_PASS("CombineMulAdd", CombineMulAddPass()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass()) FUNCTION_PASS("AllocOpt", AllocOptPass()) FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) -FUNCTION_PASS("LowerExcHandlers", LowerExcHandlers()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass()) FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) #endif diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 038948839f725..f1cef798224d2 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2764,7 +2764,7 @@ bool LateLowerGCFrameLegacy::runOnFunction(Function &F) { return modified; } -PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses LateLowerGCPass::run(Function &F, FunctionAnalysisManager &AM) { auto GetDT = [&AM, &F]() -> DominatorTree & { return AM.getResult(F); diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp index 57fb6ab1c7ed6..146c0fe701e9b 100644 --- a/src/llvm-lower-handlers.cpp +++ b/src/llvm-lower-handlers.cpp @@ -236,7 +236,7 @@ static bool lowerExcHandlers(Function &F) { } // anonymous namespace -PreservedAnalyses LowerExcHandlers::run(Function &F, FunctionAnalysisManager &AM) +PreservedAnalyses LowerExcHandlersPass::run(Function &F, FunctionAnalysisManager &AM) { bool modified = lowerExcHandlers(F); #ifdef JL_VERIFY_PASSES diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp index efe0acb36f1fc..98e56e344f7af 100644 --- a/src/llvm-muladd.cpp +++ b/src/llvm-muladd.cpp @@ -109,7 +109,7 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT return modified; } -PreservedAnalyses CombineMulAdd::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT +PreservedAnalyses CombineMulAddPass::run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT { if (combineMulAdd(F)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 814b13554358c..e4ebbe9d3838a 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -1140,7 +1140,7 @@ void multiversioning_preannotate(Module &M) M.addModuleFlag(Module::ModFlagBehavior::Error, "julia.mv.enable", 1); } -PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses MultiVersioningPass::run(Module &M, ModuleAnalysisManager &AM) { if (runMultiVersioning(M, external_use)) { auto preserved = PreservedAnalyses::allInSet(); diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp index b767074202eb2..5e8f54b98e417 100644 --- a/src/llvm-remove-ni.cpp +++ b/src/llvm-remove-ni.cpp @@ -36,7 +36,7 @@ static bool removeNI(Module &M) JL_NOTSAFEPOINT } } -PreservedAnalyses RemoveNI::run(Module &M, ModuleAnalysisManager &AM) +PreservedAnalyses RemoveNIPass::run(Module &M, ModuleAnalysisManager &AM) { if (removeNI(M)) { return PreservedAnalyses::allInSet(); diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index fcb05ba7c6805..9a7f61410ba1d 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -282,7 +282,7 @@ static bool markLoopInfo(Module &M, Function *marker, function_ref { +struct DemoteFloat16Pass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; -struct CombineMulAdd : PassInfoMixin { +struct CombineMulAddPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; }; -struct LateLowerGC : PassInfoMixin { +struct LateLowerGCPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -33,7 +33,7 @@ struct PropagateJuliaAddrspacesPass : PassInfoMixin { +struct LowerExcHandlersPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -47,17 +47,17 @@ struct GCInvariantVerifierPass : PassInfoMixin { }; // Module Passes -struct CPUFeatures : PassInfoMixin { +struct CPUFeaturesPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; -struct RemoveNI : PassInfoMixin { +struct RemoveNIPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; -struct LowerSIMDLoop : PassInfoMixin { +struct LowerSIMDLoopPass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; @@ -67,9 +67,9 @@ struct FinalLowerGCPass : PassInfoMixin { static bool isRequired() { return true; } }; -struct MultiVersioning : PassInfoMixin { +struct MultiVersioningPass : PassInfoMixin { bool external_use; - MultiVersioning(bool external_use = false) : external_use(external_use) {} + MultiVersioningPass(bool external_use = false) : external_use(external_use) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT; static bool isRequired() { return true; } }; diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 1007dfd35c1d6..0a5e718d28d67 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -252,7 +252,7 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza invokePipelineStartCallbacks(MPM, PB, O); MPM.addPass(ConstantMergePass()); if (!options.dump_native) { - JULIA_PASS(MPM.addPass(CPUFeatures())); + JULIA_PASS(MPM.addPass(CPUFeaturesPass())); if (O.getSpeedupLevel() > 0) { MPM.addPass(createModuleToFunctionPassAdaptor(InstSimplifyPass())); } @@ -276,7 +276,7 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); } invokeOptimizerEarlyCallbacks(MPM, PB, O); - JULIA_PASS(MPM.addPass(LowerSIMDLoop())); + JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); { FunctionPassManager FPM; { @@ -293,21 +293,21 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza //TODO no barrier pass? { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(LowerExcHandlers())); + JULIA_PASS(FPM.addPass(LowerExcHandlersPass())); JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - JULIA_PASS(MPM.addPass(RemoveNI())); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC()))); + JULIA_PASS(MPM.addPass(RemoveNIPass())); + JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass()))); JULIA_PASS(MPM.addPass(FinalLowerGCPass())); JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native))); } else { - JULIA_PASS(MPM.addPass(RemoveNI())); + JULIA_PASS(MPM.addPass(RemoveNIPass())); } - JULIA_PASS(MPM.addPass(LowerSIMDLoop())); // TODO why do we do this twice + JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); // TODO why do we do this twice if (options.dump_native) { - JULIA_PASS(MPM.addPass(MultiVersioning(options.external_use))); - JULIA_PASS(MPM.addPass(CPUFeatures())); + JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use))); + JULIA_PASS(MPM.addPass(CPUFeaturesPass())); if (O.getSpeedupLevel() > 0) { FunctionPassManager FPM; FPM.addPass(InstSimplifyPass()); @@ -317,7 +317,7 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza } invokeOptimizerLastCallbacks(MPM, PB, O); addSanitizerPasses(MPM, O); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16()))); + JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16Pass()))); } //Use for O2 and above @@ -355,9 +355,9 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); } if (options.dump_native) { - JULIA_PASS(MPM.addPass(MultiVersioning(options.external_use))); + JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use))); } - JULIA_PASS(MPM.addPass(CPUFeatures())); + JULIA_PASS(MPM.addPass(CPUFeaturesPass())); { FunctionPassManager FPM; FPM.addPass(SROAPass()); @@ -371,7 +371,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat invokePeepholeEPCallbacks(FPM, PB, O); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - MPM.addPass(LowerSIMDLoop()); + JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); { FunctionPassManager FPM; { @@ -443,14 +443,14 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat //TODO barrier pass? { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(LowerExcHandlers())); + JULIA_PASS(FPM.addPass(LowerExcHandlersPass())); JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } // Needed **before** LateLowerGCFrame on LLVM < 12 // due to bug in `CreateAlignmentAssumption`. - JULIA_PASS(MPM.addPass(RemoveNI())); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGC()))); + JULIA_PASS(MPM.addPass(RemoveNIPass())); + JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass()))); JULIA_PASS(MPM.addPass(FinalLowerGCPass())); { FunctionPassManager FPM; @@ -467,11 +467,11 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } } else { - JULIA_PASS(MPM.addPass(RemoveNI())); + JULIA_PASS(MPM.addPass(RemoveNIPass())); } { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(CombineMulAdd())); + JULIA_PASS(FPM.addPass(CombineMulAddPass())); FPM.addPass(DivRemPairsPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } @@ -479,7 +479,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat addSanitizerPasses(MPM, O); { FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(DemoteFloat16())); + JULIA_PASS(FPM.addPass(DemoteFloat16Pass())); FPM.addPass(GVNPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } From c3d70e83e76f2f4c87ad0d69b4dc7e27abe3275a Mon Sep 17 00:00:00 2001 From: Colin Caine Date: Sat, 3 Jun 2023 06:02:18 +0100 Subject: [PATCH 108/290] Update Printf.jl reference link (#50029) The old link described %g inaccurately. The new one is better (tho it mentions %n, which we don't support). --- stdlib/Printf/src/Printf.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl index 62a84d7d36984..8600af8227d7f 100644 --- a/stdlib/Printf/src/Printf.jl +++ b/stdlib/Printf/src/Printf.jl @@ -975,7 +975,7 @@ Use shorter of decimal or scientific 1.23 1.23e+07 julia> @printf "Use dynamic width and precision %*.*f" 10 2 0.12345 Use dynamic width and precision 0.12 ``` -For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/). +For a systematic specification of the format, see [here](https://en.cppreference.com/w/c/io/fprintf). See also [`@sprintf`](@ref) to get the result as a `String` instead of it being printed. # Caveats From 0c774c7fb8815cc700e2828066d3e9053f7c3a11 Mon Sep 17 00:00:00 2001 From: Alexander Plavin Date: Sat, 3 Jun 2023 08:13:14 +0300 Subject: [PATCH 109/290] fix empty string error in Printf (#50011) * fix empty string error in Printf * Update runtests.jl * Update runtests.jl --- stdlib/Printf/src/Printf.jl | 1 - stdlib/Printf/test/runtests.jl | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl index 8600af8227d7f..cb336a8d9c18b 100644 --- a/stdlib/Printf/src/Printf.jl +++ b/stdlib/Printf/src/Printf.jl @@ -122,7 +122,6 @@ end # parse format string function Format(f::AbstractString) - isempty(f) && throw(InvalidFormatStringError("Format string must not be empty", f, 1, 1)) bytes = codeunits(f) len = length(bytes) pos = 1 diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl index 96d61b61d02e3..33970f78648e2 100644 --- a/stdlib/Printf/test/runtests.jl +++ b/stdlib/Printf/test/runtests.jl @@ -339,7 +339,6 @@ end @test Printf.@sprintf("1%%2%%3") == "1%2%3" @test Printf.@sprintf("GAP[%%]") == "GAP[%]" @test Printf.@sprintf("hey there") == "hey there" - @test_throws Printf.InvalidFormatStringError Printf.Format("") @test_throws Printf.InvalidFormatStringError Printf.Format("%+") @test_throws Printf.InvalidFormatStringError Printf.Format("%.") @test_throws Printf.InvalidFormatStringError Printf.Format("%.0") @@ -488,6 +487,10 @@ end @test @sprintf("%d", 3//1) == "3" @test @sprintf("%d", Inf) == "Inf" @test @sprintf(" %d", NaN) == " NaN" + + # 50011 + @test Printf.@sprintf("") == "" + @test Printf.format(Printf.Format("")) == "" end @testset "integers" begin From ff23b3769f558a935e823a921b006390088ae099 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 3 Jun 2023 21:27:33 -0400 Subject: [PATCH 110/290] add docs on task migration (#50047) --- base/threadingconstructs.jl | 13 ++++++++++--- doc/src/manual/multi-threading.md | 12 ++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index 714a7054313d0..a59c0046dd07b 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -22,6 +22,11 @@ julia> Threads.@threads for i in 1:4 5 4 ``` + +!!! note + The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration). + For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects. + """ threadid() = Int(ccall(:jl_threadid, Int16, ())+1) @@ -229,7 +234,7 @@ For example, the above conditions imply that: - Write only to locations not shared across iterations (unless a lock or atomic operation is used). - The value of [`threadid()`](@ref Threads.threadid) may change even within a single - iteration. + iteration. See [`Task Migration`](@ref man-task-migration) ## Schedulers @@ -355,8 +360,10 @@ the _value_ of a variable, isolating the asynchronous code from changes to the variable's value in the current task. !!! note - See the manual chapter on [multi-threading](@ref man-multithreading) - for important caveats. See also the chapter on [threadpools](@ref man-threadpools). + The thread that the task runs on may change if the task yields, therefore `threadid()` should not + be treated as constant for a task. See [`Task Migration`](@ref man-task-migration), and the broader + [multi-threading](@ref man-multithreading) manual for further important caveats. + See also the chapter on [threadpools](@ref man-threadpools). !!! compat "Julia 1.3" This macro is available as of Julia 1.3. diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md index be64390e473f2..afb1c749f9447 100644 --- a/doc/src/manual/multi-threading.md +++ b/doc/src/manual/multi-threading.md @@ -388,6 +388,18 @@ threads in Julia: This may require some transitional work across the ecosystem before threading can be widely adopted with confidence. See the next section for further details. +## [Task Migration](@id man-task-migration) + +After a task starts running on a certain thread (e.g. via [`@spawn`](@ref Threads.@spawn) or +[`@threads`](@ref Threads.@threads)), it may move to a different thread if the task yields. + +This means that [`threadid()`](@ref Threads.threadid) should not be treated as constant within a task, and therefore +should not be used to index into a vector of buffers or stateful objects. + +!!! compat "Julia 1.7" + Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were + started on. + ## Safe use of Finalizers Because finalizers can interrupt any code, they must be very careful in how From f407a4cac3d1c660d1f8f1a9b367eec108d98178 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:20:02 +0900 Subject: [PATCH 111/290] inlining: allow callsite inlining with cached results (#50048) In some rare cases with callsite inlining, we try to inline an inferred result from a local cache (`inf_result::InferenceResult`), whose source has been transformed by `transform_result_for_cache`. At present, `inf_result.src` stays to be `OptimizationState` in such cases, causing `inlining_policy` to handle the callsite inlining. This commit adjusts `transform_result_for_cache` so that it stores the transformed source in `inf_result.src`, letting the callsite inliner use it. Down the line, we might revisit this change to align it with 532125d51d23f22c3fd117fe8a37c158fe16ac62, which isn't enabled yet. --- base/compiler/typeinfer.jl | 5 ++--- test/compiler/inline.jl | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 757f9f567a0c2..77e1fd02de8d0 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -369,10 +369,9 @@ end function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult) inferred_result = result.src - # If we decided not to optimize, drop the OptimizationState now. - # External interpreters can override as necessary to cache additional information if inferred_result isa OptimizationState{typeof(interp)} - inferred_result = ir_to_codeinf!(inferred_result) + # TODO respect must_be_codeinf setting here? + result.src = inferred_result = ir_to_codeinf!(inferred_result) end if inferred_result isa CodeInfo inferred_result.min_world = first(valid_worlds) diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 7920212537608..be821a88f00cc 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -705,6 +705,27 @@ mktempdir() do dir end end +# callsite inlining with cached frames +issue49823_events = @NamedTuple{evid::Int8, base_time::Float64}[ + (evid = 1, base_time = 0.0), (evid = -1, base_time = 0.0)] +issue49823_fl1(t, events) = @inline findlast(x -> x.evid ∈ (1, 4) && x.base_time <= t, events) +issue49823_fl3(t, events) = @inline findlast(x -> any(==(x.evid), (1,4)) && x.base_time <= t, events) +issue49823_fl5(t, events) = begin + f = let t=t + x -> x.evid ∈ (1, 4) && x.base_time <= t + end + @inline findlast(f, events) +end +let src = @code_typed1 issue49823_fl1(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end +let src = @code_typed1 issue49823_fl3(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end +let src = @code_typed1 issue49823_fl5(0.0, issue49823_events) + @test count(isinvoke(:findlast), src.code) == 0 # successful inlining +end + # Issue #42264 - crash on certain union splits let f(x) = (x...,) # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls From 3c299427a2fd121a97b2d50df25c0870427f020a Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 5 Jun 2023 11:18:44 -0600 Subject: [PATCH 112/290] Remove printlns from test/atexit.jl (#50066) --- test/atexit.jl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test/atexit.jl b/test/atexit.jl index 5b4fbc0b44a40..64b56e32466df 100644 --- a/test/atexit.jl +++ b/test/atexit.jl @@ -202,15 +202,11 @@ using Test # This will run in a concurrent task, testing that we can register atexit # hooks from another task while running atexit hooks. Threads.@spawn begin - Core.println("INSIDE") take!(c) # block on c - Core.println("go") atexit() do - Core.println("exit11") exit(11) end take!(c) # keep the _atexit() loop alive until we've added another item. - Core.println("done") end end exit(0) @@ -224,7 +220,6 @@ using Test # Block until the atexit hooks have all finished. We use a manual "spin # lock" because task switch is disallowed inside the finalizer, below. while !atexit_has_finished[] end - Core.println("done") try # By the time this runs, all the atexit hooks will be done. # So this will throw. @@ -241,13 +236,10 @@ using Test # task above gets a chance to run. x = [] finalizer(x) do x - Core.println("FINALIZER") # Allow the spawned task to finish atexit_has_finished[] = true - Core.println("ready") # Then spin forever to prevent exit. while atexit_has_finished[] end - Core.println("exiting") end exit(0) """ => 22, From 270a1d8bdcb593a56e037e5a40ff66aa039655b0 Mon Sep 17 00:00:00 2001 From: Fons van der Plas Date: Mon, 5 Jun 2023 20:26:51 +0200 Subject: [PATCH 113/290] Docs: simple docstring for `write(filename::AbstractString, x)` (#49835) * Docs: simple docstring for `write(filename::AbstractString, x)` * use `content` instead of `x` * Update base/io.jl Co-authored-by: Jameson Nash * Update base/io.jl --------- Co-authored-by: Jameson Nash --- base/io.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/base/io.jl b/base/io.jl index 4163a5e803676..60a24831587cb 100644 --- a/base/io.jl +++ b/base/io.jl @@ -224,7 +224,6 @@ read(stream, ::Type{Union{}}, slurp...; kwargs...) = error("cannot read a value """ write(io::IO, x) - write(filename::AbstractString, x) Write the canonical binary representation of a value to the given I/O stream or file. Return the number of bytes written into the stream. See also [`print`](@ref) to @@ -458,6 +457,14 @@ wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe # Exception-safe wrappers (io = open(); try f(io) finally close(io)) + +""" + write(filename::AbstractString, content) + +Write the canonical binary representation of `content` to a file, which will be created if it does not exist yet or overwritten if it does exist. + +Return the number of bytes written into the file. +""" write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), convert(String, filename)::String, "w") """ From 3051f47a40c37a2e898b9814856f7d4173b6d8c4 Mon Sep 17 00:00:00 2001 From: Takafumi Arakaki Date: Mon, 5 Jun 2023 11:29:23 -0700 Subject: [PATCH 114/290] Use xchg to swap boxed values (#45147) * Use xchg to swap boxed values * Update src/cgutils.cpp Co-authored-by: Valentin Churavy * Update src/cgutils.cpp Co-authored-by: Valentin Churavy --------- Co-authored-by: Jameson Nash Co-authored-by: Valentin Churavy --- src/cgutils.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 21d58cc200e48..b400e510e0cde 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -2008,10 +2008,8 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, emit_unbox_store(ctx, rhs, ptr, tbaa, alignment); } } - else if (isswapfield && !isboxed) { + else if (isswapfield && isStrongerThanMonotonic(Order)) { assert(Order != AtomicOrdering::NotAtomic && r); - // we can't handle isboxed here as a workaround for really bad LLVM - // design issue: plain Xchg only works with integers auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); From 32e29864e2d38117e7e003f834530ab1087e065c Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Mon, 5 Jun 2023 22:44:49 +0000 Subject: [PATCH 115/290] Add LLVMExtra stubs for newpm (#50042) --- src/codegen-stubs.c | 16 ++++++++++++++++ src/jl_exported_funcs.inc | 16 ++++++++++++++++ src/llvm-julia-passes.inc | 32 ++++++++++++++++---------------- src/llvm_api.cpp | 35 ++++++++++++++++++++++++++++++++++- src/pipeline.cpp | 14 +++++++------- 5 files changed, 89 insertions(+), 24 deletions(-) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 9de831fa30a9c..0853a090183dd 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -139,6 +139,22 @@ JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraMPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraCGPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraFPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT void LLVMExtraLPMAdd##CLASS##_fallback(void *PM) UNAVAILABLE + +#include "llvm-julia-passes.inc" + +#undef MODULE_PASS +#undef CGSCC_PASS +#undef FUNCTION_PASS +#undef LOOP_PASS + //LLVM C api to the julia JIT JL_DLLEXPORT void* JLJITGetLLVMOrcExecutionSession_fallback(void* JIT) UNAVAILABLE diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index b2216d10fb27d..898656c142480 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -573,6 +573,22 @@ YY(LLVMExtraAddGCInvariantVerifierPass) \ YY(LLVMExtraAddDemoteFloat16Pass) \ YY(LLVMExtraAddCPUFeaturesPass) \ + YY(LLVMExtraMPMAddCPUFeaturesPass) \ + YY(LLVMExtraMPMAddRemoveNIPass) \ + YY(LLVMExtraMPMAddLowerSIMDLoopPass) \ + YY(LLVMExtraMPMAddFinalLowerGCPass) \ + YY(LLVMExtraMPMAddMultiVersioningPass) \ + YY(LLVMExtraMPMAddRemoveJuliaAddrspacesPass) \ + YY(LLVMExtraMPMAddRemoveAddrspacesPass) \ + YY(LLVMExtraMPMAddLowerPTLSPass) \ + YY(LLVMExtraFPMAddDemoteFloat16Pass) \ + YY(LLVMExtraFPMAddCombineMulAddPass) \ + YY(LLVMExtraFPMAddLateLowerGCPass) \ + YY(LLVMExtraFPMAddAllocOptPass) \ + YY(LLVMExtraFPMAddPropagateJuliaAddrspacesPass) \ + YY(LLVMExtraFPMAddLowerExcHandlersPass) \ + YY(LLVMExtraFPMAddGCInvariantVerifierPass) \ + YY(LLVMExtraLPMAddJuliaLICMPass) \ YY(JLJITGetLLVMOrcExecutionSession) \ YY(JLJITGetJuliaOJIT) \ YY(JLJITGetExternalJITDylib) \ diff --git a/src/llvm-julia-passes.inc b/src/llvm-julia-passes.inc index cf7fdf0430c83..39030d60a44fc 100644 --- a/src/llvm-julia-passes.inc +++ b/src/llvm-julia-passes.inc @@ -1,27 +1,27 @@ //Module passes #ifdef MODULE_PASS -MODULE_PASS("CPUFeatures", CPUFeaturesPass()) -MODULE_PASS("RemoveNI", RemoveNIPass()) -MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass()) -MODULE_PASS("FinalLowerGC", FinalLowerGCPass()) -MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass()) -MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass()) -MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass()) -MODULE_PASS("LowerPTLSPass", LowerPTLSPass()) +MODULE_PASS("CPUFeatures", CPUFeaturesPass, CPUFeaturesPass()) +MODULE_PASS("RemoveNI", RemoveNIPass, RemoveNIPass()) +MODULE_PASS("LowerSIMDLoop", LowerSIMDLoopPass, LowerSIMDLoopPass()) +MODULE_PASS("FinalLowerGC", FinalLowerGCPass, FinalLowerGCPass()) +MODULE_PASS("JuliaMultiVersioning", MultiVersioningPass, MultiVersioningPass()) +MODULE_PASS("RemoveJuliaAddrspaces", RemoveJuliaAddrspacesPass, RemoveJuliaAddrspacesPass()) +MODULE_PASS("RemoveAddrspaces", RemoveAddrspacesPass, RemoveAddrspacesPass()) +MODULE_PASS("LowerPTLSPass", LowerPTLSPass, LowerPTLSPass()) #endif //Function passes #ifdef FUNCTION_PASS -FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass()) -FUNCTION_PASS("CombineMulAdd", CombineMulAddPass()) -FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass()) -FUNCTION_PASS("AllocOpt", AllocOptPass()) -FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass()) -FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass()) -FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass()) +FUNCTION_PASS("DemoteFloat16", DemoteFloat16Pass, DemoteFloat16Pass()) +FUNCTION_PASS("CombineMulAdd", CombineMulAddPass, CombineMulAddPass()) +FUNCTION_PASS("LateLowerGCFrame", LateLowerGCPass, LateLowerGCPass()) +FUNCTION_PASS("AllocOpt", AllocOptPass, AllocOptPass()) +FUNCTION_PASS("PropagateJuliaAddrspaces", PropagateJuliaAddrspacesPass, PropagateJuliaAddrspacesPass()) +FUNCTION_PASS("LowerExcHandlers", LowerExcHandlersPass, LowerExcHandlersPass()) +FUNCTION_PASS("GCInvariantVerifier", GCInvariantVerifierPass, GCInvariantVerifierPass()) #endif //Loop passes #ifdef LOOP_PASS -LOOP_PASS("JuliaLICM", JuliaLICMPass()) +LOOP_PASS("JuliaLICM", JuliaLICMPass, JuliaLICMPass()) #endif diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp index 6d7b9b143ff0a..d56fb3a0497fa 100644 --- a/src/llvm_api.cpp +++ b/src/llvm_api.cpp @@ -9,7 +9,8 @@ #define __STDC_CONSTANT_MACROS #endif -#include +#include "jitlayers.h" +#include "passes.h" #include #include @@ -50,6 +51,15 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry, DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility, LLVMOrcMaterializationResponsibilityRef) + +typedef struct LLVMOpaqueModulePassManager *LLVMModulePassManagerRef; +typedef struct LLVMOpaqueFunctionPassManager *LLVMFunctionPassManagerRef; +typedef struct LLVMOpaqueLoopPassManager *LLVMLoopPassManagerRef; + +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::ModulePassManager, LLVMModulePassManagerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::FunctionPassManager, LLVMFunctionPassManagerRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::LoopPassManager, LLVMLoopPassManagerRef) + extern "C" { JL_DLLEXPORT_CODEGEN JuliaOJITRef JLJITGetJuliaOJIT_impl(void) @@ -130,4 +140,27 @@ JLJITGetIRCompileLayer_impl(JuliaOJITRef JIT) return wrap(&unwrap(JIT)->getIRCompileLayer()); } +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraMPMAdd##CLASS##_impl(LLVMModulePassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraFPMAdd##CLASS##_impl(LLVMFunctionPassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ + JL_DLLEXPORT_CODEGEN void LLVMExtraLPMAdd##CLASS##_impl(LLVMLoopPassManagerRef PM) \ + { \ + unwrap(PM)->addPass(CREATE_PASS); \ + } + +#include "llvm-julia-passes.inc" + +#undef MODULE_PASS +#undef CGSCC_PASS +#undef FUNCTION_PASS +#undef LOOP_PASS + } // extern "C" diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 0a5e718d28d67..e5822bbf74aeb 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -491,13 +491,13 @@ namespace { auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { auto PIC = std::make_unique(); //Borrowed from LLVM PassBuilder.cpp:386 -#define MODULE_PASS(NAME, CREATE_PASS) \ +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define FUNCTION_PASS(NAME, CREATE_PASS) \ +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); @@ -505,13 +505,13 @@ PIC->addClassToPassName(CLASS, NAME); PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define LOOP_PASS(NAME, CREATE_PASS) \ +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CGSCC_PASS(NAME, CREATE_PASS) \ +#define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); @@ -684,7 +684,7 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { -#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef FUNCTION_PASS return false; @@ -693,7 +693,7 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, ModulePassManager &PM, ArrayRef InnerPipeline) { -#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef MODULE_PASS //Add full pipelines here @@ -714,7 +714,7 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { PB.registerPipelineParsingCallback( [](StringRef Name, LoopPassManager &PM, ArrayRef InnerPipeline) { -#define LOOP_PASS(NAME, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } +#define LOOP_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef LOOP_PASS return false; From c16891d03f46f06393bb3be360454db2317d2f3f Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Mon, 5 Jun 2023 19:20:47 -0400 Subject: [PATCH 116/290] Atomic pointer operations APIs (#49811) This provides and interface to `Intrinsics.atomic_pointerref` and `Intrinsics.atomic_pointerset` through `unsafe_load` and `unsafe_store!`. Added the following atomic pointer operations: unsafe_modify! unsafe_replace! unsafe_swap! elsize(::Type{<:Ptr}) Tests previously using the explicit intrinsic method here are replaced by the new user facing variants. Add notes about atomic implementation and refs to relevant field and property method docs. Move general atomic doc strings to kw doc and ref in multi-threading. Co-authored-by: Jameson Nash Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- base/array.jl | 9 ++- base/docs/basedocs.jl | 9 +++ base/exports.jl | 3 + base/pointer.jl | 119 +++++++++++++++++++++++++++++++- doc/src/base/base.md | 6 ++ doc/src/base/c.md | 3 + doc/src/base/multi-threading.md | 4 ++ test/intrinsics.jl | 112 ++++++++++++++++-------------- 8 files changed, 211 insertions(+), 54 deletions(-) diff --git a/base/array.jl b/base/array.jl index ce400a7fa7154..95c77ef8a0879 100644 --- a/base/array.jl +++ b/base/array.jl @@ -252,9 +252,14 @@ function bitsunionsize(u::Union) return sz end -# Deprecate this, as it seems to have no documented meaning and is unused here, -# but is frequently accessed in packages elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T) +function elsize(::Type{Ptr{T}}) where T + # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T), + # which includes Any and most concrete datatypes + T === Any && return sizeof(Ptr{Any}) + T isa DataType || sizeof(Any) # throws + return LLT_ALIGN(Core.sizeof(T), datatype_alignment(T)) +end elsize(::Type{Union{}}, slurp...) = 0 sizeof(a::Array) = Core.sizeof(a) diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 684ed8b48f734..fd8c35a5fdf76 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -3227,6 +3227,15 @@ See also [`"`](@ref \") """ kw"\"\"\"" +""" +Unsafe pointer operations are compatible with loading and storing pointers declared with +`_Atomic` and `std::atomic` type in C11 and C++23 respectively. An error may be thrown if +there is not support for atomically loading the Julia type `T`. + +See also: [`unsafe_load`](@ref), [`unsafe_modify!`](@ref), [`unsafe_replace!`](@ref), [`unsafe_store!`](@ref), [`unsafe_swap!`](@ref) +""" +kw"atomic" + """ Base.donotdelete(args...) diff --git a/base/exports.jl b/base/exports.jl index 8d8983950fe74..10f43825e12df 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -979,8 +979,11 @@ export reenable_sigint, unsafe_copyto!, unsafe_load, + unsafe_modify!, unsafe_pointer_to_objref, + unsafe_replace!, unsafe_store!, + unsafe_swap!, # implemented in Random module rand, diff --git a/base/pointer.jl b/base/pointer.jl index 62b34dd06d368..a47f1e38edb9b 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -98,32 +98,147 @@ unsafe_wrap(Atype::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, """ unsafe_load(p::Ptr{T}, i::Integer=1) + unsafe_load(p::Ptr{T}, order::Symbol) + unsafe_load(p::Ptr{T}, i::Integer, order::Symbol) Load a value of type `T` from the address of the `i`th element (1-indexed) starting at `p`. -This is equivalent to the C expression `p[i-1]`. +This is equivalent to the C expression `p[i-1]`. Optionally, an atomic memory ordering can +be provided. The `unsafe` prefix on this function indicates that no validation is performed on the pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program or return garbage answers. Unlike C, dereferencing memory region allocated as different type may be valid provided that the types are compatible. + +!!! compat "Julia 1.10" + The `order` argument is available as of Julia 1.10. + +See also: [`atomic`](@ref) """ unsafe_load(p::Ptr, i::Integer=1) = pointerref(p, Int(i), 1) +unsafe_load(p::Ptr, order::Symbol) = atomic_pointerref(p, order) +function unsafe_load(p::Ptr, i::Integer, order::Symbol) + unsafe_load(p + (elsize(typeof(p)) * (Int(i) - 1)), order) +end """ unsafe_store!(p::Ptr{T}, x, i::Integer=1) + unsafe_store!(p::Ptr{T}, x, order::Symbol) + unsafe_store!(p::Ptr{T}, x, i::Integer, order::Symbol) Store a value of type `T` to the address of the `i`th element (1-indexed) starting at `p`. -This is equivalent to the C expression `p[i-1] = x`. +This is equivalent to the C expression `p[i-1] = x`. Optionally, an atomic memory ordering +can be provided. The `unsafe` prefix on this function indicates that no validation is performed on the pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program. Unlike C, storing memory region allocated as different type may be valid provided that that the types are compatible. + +!!! compat "Julia 1.10" + The `order` argument is available as of Julia 1.10. + +See also: [`atomic`](@ref) """ unsafe_store!(p::Ptr{Any}, @nospecialize(x), i::Integer=1) = pointerset(p, x, Int(i), 1) unsafe_store!(p::Ptr{T}, x, i::Integer=1) where {T} = pointerset(p, convert(T,x), Int(i), 1) +unsafe_store!(p::Ptr{T}, x, order::Symbol) where {T} = atomic_pointerset(p, x isa T ? x : convert(T,x), order) +function unsafe_store!(p::Ptr, x, i::Integer, order::Symbol) + unsafe_store!(p + (elsize(typeof(p)) * (Int(i) - 1)), x, order) +end + +""" + unsafe_modify!(p::Ptr{T}, op, x, [order::Symbol]) -> Pair + +These atomically perform the operations to get and set a memory address after applying +the function `op`. If supported by the hardware (for example, atomic increment), this may be +optimized to the appropriate hardware instruction, otherwise its execution will be +similar to: + + y = unsafe_load(p) + z = op(y, x) + unsafe_store!(p, z) + return y => z + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`modifyproperty!`](@ref Base.modifyproperty!), [`atomic`](@ref) +""" +function unsafe_modify!(p::Ptr, op, x, order::Symbol=:not_atomic) + return atomic_pointermodify(p, op, x, order) +end + +""" + unsafe_replace!(p::Ptr{T}, expected, desired, + [success_order::Symbol[, fail_order::Symbol=success_order]]) -> (; old, success::Bool) + +These atomically perform the operations to get and conditionally set a memory address to +a given value. If supported by the hardware, this may be optimized to the appropriate +hardware instruction, otherwise its execution will be similar to: + + y = unsafe_load(p, fail_order) + ok = y === expected + if ok + unsafe_store!(p, desired, success_order) + end + return (; old = y, success = ok) + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`replaceproperty!`](@ref Base.replaceproperty!), [`atomic`](@ref) +""" +function unsafe_replace!(p::Ptr{T}, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) where {T} + @inline + xT = desired isa T ? desired : convert(T, desired) + return atomic_pointerreplace(p, expected, xT, success_order, fail_order) +end +function unsafe_replace!(p::Ptr{Any}, @nospecialize(expected), @nospecialize(desired), success_order::Symbol=:not_atomic, fail_order::Symbol=success_order) + return atomic_pointerreplace(p, expected, desired, success_order, fail_order) +end + +""" + unsafe_swap!(p::Ptr{T}, x, [order::Symbol]) + +These atomically perform the operations to simultaneously get and set a memory address. +If supported by the hardware, this may be optimized to the appropriate hardware +instruction, otherwise its execution will be similar to: + + y = unsafe_load(p) + unsafe_store!(p, x) + return y + +The `unsafe` prefix on this function indicates that no validation is performed on the +pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring +that referenced memory is not freed or garbage collected while invoking this function. +Incorrect usage may segfault your program. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +See also: [`swapproperty!`](@ref Base.swapproperty!), [`atomic`](@ref) +""" +function unsafe_swap!(p::Ptr{Any}, x, order::Symbol=:not_atomic) + return atomic_pointerswap(p, x, order) +end +function unsafe_swap!(p::Ptr{T}, x, order::Symbol=:not_atomic) where {T} + @inline + xT = x isa T ? x : convert(T, x) + return atomic_pointerswap(p, xT, order) +end # convert a raw Ptr to an object reference, and vice-versa """ diff --git a/doc/src/base/base.md b/doc/src/base/base.md index 3d17665190e21..e540f50ca2cb0 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -141,10 +141,16 @@ Base.copy Base.deepcopy Base.getproperty Base.setproperty! +Base.replaceproperty! +Base.swapproperty! +Base.modifyproperty! Base.propertynames Base.hasproperty Core.getfield Core.setfield! +Core.modifyfield! +Core.replacefield! +Core.swapfield! Core.isdefined Core.getglobal Core.setglobal! diff --git a/doc/src/base/c.md b/doc/src/base/c.md index bdc64fa6d98b4..e221a6432542f 100644 --- a/doc/src/base/c.md +++ b/doc/src/base/c.md @@ -10,6 +10,9 @@ Base.unsafe_convert Base.cconvert Base.unsafe_load Base.unsafe_store! +Base.unsafe_modify! +Base.unsafe_replace! +Base.unsafe_swap! Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any) Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any) Base.copyto! diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md index fb75b21479707..45a60b14d541a 100644 --- a/doc/src/base/multi-threading.md +++ b/doc/src/base/multi-threading.md @@ -17,6 +17,10 @@ See also [Multi-Threading](@ref man-multithreading). ## Atomic operations +```@docs +atomic +``` + ```@docs Base.@atomic Base.@atomicswap diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 35ce05b61dc24..3c49afe2c4d7e 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -107,16 +107,28 @@ end const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T +@testset "elsize(::Type{<:Ptr})" begin + @test Base.elsize(Ptr{Any}) == sizeof(Int) + @test Base.elsize(Ptr{NTuple{3,Int8}}) == 3 + @test Base.elsize(Ptr{Cvoid}) == 0 + @test Base.elsize(Ptr{Base.RefValue{Any}}) == sizeof(Int) + @test Base.elsize(Ptr{Int}) == sizeof(Int) + @test_throws MethodError Base.elsize(Ptr) + @test_throws ErrorException Base.elsize(Ptr{Ref{Int}}) + @test_throws ErrorException Base.elsize(Ptr{Ref}) + @test_throws ErrorException Base.elsize(Ptr{Complex}) +end + # issue #29929 let p = Ptr{Nothing}(0) @test unsafe_store!(p, nothing) === C_NULL @test unsafe_load(p) === nothing - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing - @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing - @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing) - @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) - @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) + @test unsafe_load(p, :sequentially_consistent) === nothing + @test unsafe_store!(p, nothing, :sequentially_consistent) === p + @test unsafe_swap!(p, nothing, :sequentially_consistent) === nothing + @test unsafe_modify!(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing) + @test unsafe_replace!(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true)) + @test unsafe_replace!(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false)) end struct GhostStruct end @@ -225,43 +237,43 @@ for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Co end @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] if sizeof(r) > 8 - @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) - @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent) - @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) - @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent) - @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") unsafe_load(p, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") unsafe_store!(p, T(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") unsafe_swap!(p, T(100), :sequentially_consistent) + @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, add, T(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") unsafe_modify!(p, swap, S(1), :sequentially_consistent) + @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent) + @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[] else if TT !== Any - @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(4), :sequentially_consistent) + @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, Returns(S(5)), T(10), :sequentially_consistent) end - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10) - @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1) - @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100) - @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100) - @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101)) - @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102) - @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102) - @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103) - @test Core.Intrinsics.atomic_pointermodify(p, Returns(T(105)), nothing, :sequentially_consistent) === Pair{TT,TT}(T(103), T(105)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(105) + @test unsafe_load(p, :sequentially_consistent) === T(10) + @test unsafe_store!(p, T(1), :sequentially_consistent) === p + @test unsafe_load(p, :sequentially_consistent) === T(1) + @test unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true)) + @test unsafe_load(p, :sequentially_consistent) === T(100) + @test unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false)) + @test unsafe_load(p, :sequentially_consistent) === T(100) + @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101)) + @test unsafe_modify!(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102)) + @test unsafe_load(p, :sequentially_consistent) === T(102) + @test unsafe_swap!(p, T(103), :sequentially_consistent) === T(102) + @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false)) + @test unsafe_load(p, :sequentially_consistent) === T(103) + @test unsafe_modify!(p, Returns(T(105)), nothing, :sequentially_consistent) === Pair{TT,TT}(T(103), T(105)) + @test unsafe_load(p, :sequentially_consistent) === T(105) end if TT === Any - @test Core.Intrinsics.atomic_pointermodify(p, swap, S(105), :sequentially_consistent) === Pair{TT,TT}(T(105), S(105)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(105) - @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p - @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1) - @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false)) - @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true)) - @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2) + @test unsafe_modify!(p, swap, S(105), :sequentially_consistent) === Pair{TT,TT}(T(105), S(105)) + @test unsafe_load(p, :sequentially_consistent) === S(105) + @test unsafe_store!(p, S(1), :sequentially_consistent) === p + @test unsafe_swap!(p, S(100), :sequentially_consistent) === S(1) + @test unsafe_replace!(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false)) + @test unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true)) + @test unsafe_load(p, :sequentially_consistent) === T(2) end end)(TT,) end @@ -314,38 +326,38 @@ Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")") @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) - r2 = Core.Intrinsics.pointerref(p, 1, 1) + r2 = unsafe_load(p, 1) @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[] @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent) - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[] - @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + @test unsafe_store!(p, T(1), :sequentially_consistent) === p + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 1 === r[].x && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 1 && r[].x === 100 && r2 !== r[] @test succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] @test !succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[] - r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) + r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 100 !== r[].x && r2 !== r[] @test r3 isa IntWrap && r3.x === 101 === r[].x && r3 !== r[] - r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) + r2, r3 = unsafe_modify!(p, add, T(1), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 101 !== r[].x && r2 !== r[] @test r3 isa IntWrap && r3.x === 102 === r[].x && r3 !== r[] - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 102 === r[].x && r2 !== r[] - r2 = Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) + r2 = unsafe_swap!(p, T(103), :sequentially_consistent) @test r2 isa IntWrap && r2.x === 102 !== r[].x && r[].x == 103 && r2 !== r[] - r2, succ = Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) + r2, succ = unsafe_replace!(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[] @test !succ - r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) + r2 = unsafe_load(p, :sequentially_consistent) @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[] end end)() From 112554e1a533cebad4cb0daa27df59636405c075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Tue, 6 Jun 2023 01:01:44 +0100 Subject: [PATCH 117/290] [test] Automatically install `llvm-tools` when runnning `llvmpasses` (#49985) --- test/llvmpasses/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile index ec0333178c225..7318d1b67da02 100644 --- a/test/llvmpasses/Makefile +++ b/test/llvmpasses/Makefile @@ -9,6 +9,7 @@ TESTS_jl := $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl)) TESTS := $(TESTS_ll) $(TESTS_jl) . $(TESTS): + $(MAKE) -C $(JULIAHOME)/deps install-llvm-tools PATH=$(build_bindir):$(build_depsbindir):$$PATH \ LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \ $(build_depsbindir)/lit/lit.py -v "$(addprefix $(SRCDIR)/,$@)" From 1bb3d26d80b32b05270c3be55ae85036b8d9f72c Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Tue, 6 Jun 2023 07:39:36 -0400 Subject: [PATCH 118/290] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20?= =?UTF-8?q?Pkg=20stdlib=20from=209c01707a2=20to=204de1826bc=20(#50078)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 | 1 + .../Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 | 1 + .../Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 | 1 - .../Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 | 1 - stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 create mode 100644 deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 delete mode 100644 deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 diff --git a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 new file mode 100644 index 0000000000000..5bcff2bb6fe1a --- /dev/null +++ b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 @@ -0,0 +1 @@ +44deb23c240d210544eea31317ef56ab diff --git a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 new file mode 100644 index 0000000000000..b6407afe49ea4 --- /dev/null +++ b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 @@ -0,0 +1 @@ +9d043a647efad34be0fd68778f7ae9643c2620ec3b69e80868dee4d3a889aab90b9fdd1932de043f65ea29348d65ceee331aaba041a9f9773d90d0eb30d920af diff --git a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 deleted file mode 100644 index dc9660f97087f..0000000000000 --- a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -16da059096b4981e389884a5232fb3c6 diff --git a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 b/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 deleted file mode 100644 index c7e946d70bbdb..0000000000000 --- a/deps/checksums/Pkg-9c01707a20478b859e6fd995b5e16c82e7096d92.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -d162771efdbae9dd5c349319595b094577b4612c11b163cfbf16242a818769975cb7a8c5188c1cba75ae804b430d52e9a08c1f394286f2ae239bec7f8c7e3b71 diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index d9eba65f4ce77..dd65fe3340b77 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 9c01707a20478b859e6fd995b5e16c82e7096d92 +PKG_SHA1 = 4de1826bcb49b8576603ba6a22ed1c6fecc66a10 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From 53bcb3987e64e72b6550b75c0d0c315cf15ad476 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Tue, 6 Jun 2023 08:25:00 -0500 Subject: [PATCH 119/290] Fixup `hash(::Real)` broken by #49996 (#50067) * fixup for 49996 and add test from 50065 --------- Co-authored-by: Lilith Hafner --- base/float.jl | 13 +++++++------ test/hashing.jl | 11 ++++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/base/float.jl b/base/float.jl index 0f05027b522a2..37f91c1f511bb 100644 --- a/base/float.jl +++ b/base/float.jl @@ -675,27 +675,28 @@ function hash(x::Real, h::UInt) den = -den end num_z = trailing_zeros(num) + num >>= num_z den_z = trailing_zeros(den) den >>= den_z pow += num_z - den_z # handle values representable as Int64, UInt64, Float64 if den == 1 - left = top_set_bit(abs(num)) - den_z - right = pow + left = top_set_bit(abs(num)) + pow + right = pow + den_z if -1074 <= right if 0 <= right - left <= 63 && return hash(Int64(num) << Int(pow-num_z), h) - left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow-num_z), h) + left <= 63 && return hash(Int64(num) << Int(pow), h) + left <= 64 && !signbit(num) && return hash(UInt64(num) << Int(pow), h) end # typemin(Int64) handled by Float64 case - left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow-num_z), h) + left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow), h) end end # handle generic rational values h = hash_integer(den, h) h = hash_integer(pow, h) - h = hash_integer(num >> num_z, h) + h = hash_integer(num, h) return h end diff --git a/test/hashing.jl b/test/hashing.jl index 5230442354d99..de64cd80cb49d 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -8,8 +8,14 @@ types = Any[ Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Rational{Int8}, Rational{UInt8}, Rational{Int16}, Rational{UInt16}, - Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64} + Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64}, + BigFloat, #BigInt, # TODO: BigInt hashing is broken on 32-bit systems ] +if Int === Int64 + push!(types, BigInt) +else + @test_broken hash(12345678901234) == hash(big(12345678901234)) +end vals = vcat( typemin(Int64), -Int64(maxintfloat(Float64)) .+ Int64[-4:1;], @@ -51,8 +57,7 @@ let collides = 0 collides += eq end end - # each pair of types has one collision for these values - @test collides <= (length(types) - 1)^2 + @test collides <= 452 end @test hash(0.0) != hash(-0.0) From 4d2f35edfa05219380dd355df639e4c1f43817b3 Mon Sep 17 00:00:00 2001 From: Mike Boyle Date: Tue, 6 Jun 2023 16:23:45 +0000 Subject: [PATCH 120/290] Update URL for "lexical scoping" (#50085) --- doc/src/manual/variables-and-scoping.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md index 8bd62fe7ee5bf..c763d62680091 100644 --- a/doc/src/manual/variables-and-scoping.md +++ b/doc/src/manual/variables-and-scoping.md @@ -33,7 +33,7 @@ Notably missing from this table are which do *not* introduce new scopes. The three types of scopes follow somewhat different rules which will be explained below. -Julia uses [lexical scoping](https://en.wikipedia.org/wiki/Scope_%28computer_science%29#Lexical_scoping_vs._dynamic_scoping), +Julia uses [lexical scoping](https://en.wikipedia.org/wiki/Scope_(computer_science)#Lexical_scope_vs._dynamic_scope), meaning that a function's scope does not inherit from its caller's scope, but from the scope in which the function was defined. For example, in the following code the `x` inside `foo` refers to the `x` in the global scope of its module `Bar`: From e1c0d83692accffcc63191233f7f9dd758c23f1b Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Fri, 2 Jun 2023 17:36:45 -0400 Subject: [PATCH 121/290] Prevent registering static FASTTLS with multiple libjulias When there are multiple copies of libjulia present in the application, they can both end up trying to use the same static fast tls storage. The alternative options I considered for this were: - Add `jl_options.use_static_fasttls` and update PackageCompiler.jl to set the flag to FALSE - Use `dlvsym` in the CLI loader and attach to the exported jl_pgcstack_static symbols `__attribute__((symver(...)))` The second approach is not available on musl, but the first one seems like the ideal way to handle this long-term. --- cli/loader_lib.c | 9 +++++++-- src/julia_fasttls.h | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/cli/loader_lib.c b/cli/loader_lib.c index a344910478ccd..50519db29d811 100644 --- a/cli/loader_lib.c +++ b/cli/loader_lib.c @@ -527,8 +527,13 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) { } void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static"); void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static"); - if (fptr != NULL && key != NULL) - jl_pgcstack_setkey(fptr, key); + _Atomic char *semaphore = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_static_semaphore"); + if (fptr != NULL && key != NULL && semaphore != NULL) { + char already_used = 0; + atomic_compare_exchange_strong(semaphore, &already_used, 1); + if (already_used == 0) // RMW succeeded - we have exclusive access + jl_pgcstack_setkey(fptr, key); + } #endif // jl_options must be initialized very early, in case an embedder sets some diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h index 08f465badf8d3..1c0929717b293 100644 --- a/src/julia_fasttls.h +++ b/src/julia_fasttls.h @@ -3,6 +3,13 @@ #ifndef JL_FASTTLS_H #define JL_FASTTLS_H +#ifdef __cplusplus +#include +#define _Atomic(T) std::atomic +#else +#include +#endif + // Thread-local storage access #ifdef __cplusplus @@ -25,6 +32,7 @@ typedef jl_gcframe_t **(jl_get_pgcstack_func)(void); #if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_) #define JULIA_DEFINE_FAST_TLS \ static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec; \ +JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore; \ JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void) \ { \ return jl_pgcstack_localexec; \ From c3ea5dc9dc3f903a75107788858d20123bcfb0b4 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Tue, 6 Jun 2023 13:26:20 -0400 Subject: [PATCH 122/290] fix `hash(::BigInt)` on 32 bit systems (#50076) * don't define hash(::BigInt) on 32 bit systems --- base/gmp.jl | 15 ++++++--------- test/hashing.jl | 9 ++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/base/gmp.jl b/base/gmp.jl index 3f809fd99d1cc..b80be35c25fcd 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -843,8 +843,8 @@ Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), st ## streamlined hashing for BigInt, by avoiding allocation from shifts ## -if Limb === UInt - # this condition is true most (all?) of the time, and in this case we can define +if Limb === UInt64 === UInt + # On 64 bit systems we can define # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}), # and of hash @@ -854,7 +854,7 @@ if Limb === UInt GC.@preserve n begin s = n.size s == 0 && return hash_integer(0, h) - p = convert(Ptr{UInt}, n.d) + p = convert(Ptr{UInt64}, n.d) b = unsafe_load(p) h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h) for k = 2:abs(s) @@ -864,14 +864,11 @@ if Limb === UInt end end - _divLimb(n) = UInt === UInt64 ? n >>> 6 : n >>> 5 - _modLimb(n) = UInt === UInt64 ? n & 63 : n & 31 - function hash(x::BigInt, h::UInt) GC.@preserve x begin sz = x.size sz == 0 && return hash(0, h) - ptr = Ptr{UInt}(x.d) + ptr = Ptr{UInt64}(x.d) if sz == 1 return hash(unsafe_load(ptr), h) elseif sz == -1 @@ -880,8 +877,8 @@ if Limb === UInt end pow = trailing_zeros(x) nd = Base.ndigits0z(x, 2) - idx = _divLimb(pow) + 1 - shift = _modLimb(pow) % UInt + idx = (pow >>> 6) + 1 + shift = (pow & 63) % UInt upshift = BITS_PER_LIMB - shift asz = abs(sz) if shift == 0 diff --git a/test/hashing.jl b/test/hashing.jl index de64cd80cb49d..1c7c37d00f93b 100644 --- a/test/hashing.jl +++ b/test/hashing.jl @@ -9,13 +9,8 @@ types = Any[ Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32, Float64, Rational{Int8}, Rational{UInt8}, Rational{Int16}, Rational{UInt16}, Rational{Int32}, Rational{UInt32}, Rational{Int64}, Rational{UInt64}, - BigFloat, #BigInt, # TODO: BigInt hashing is broken on 32-bit systems + BigFloat, BigInt, Rational{BigInt} ] -if Int === Int64 - push!(types, BigInt) -else - @test_broken hash(12345678901234) == hash(big(12345678901234)) -end vals = vcat( typemin(Int64), -Int64(maxintfloat(Float64)) .+ Int64[-4:1;], @@ -57,7 +52,7 @@ let collides = 0 collides += eq end end - @test collides <= 452 + @test collides <= 516 end @test hash(0.0) != hash(-0.0) From bbbe8c3bf201ba44c22d4d8f9787af52869c77ad Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 7 Jun 2023 07:08:30 +0900 Subject: [PATCH 123/290] missing: improve effects for some missing operations (#50080) --- base/missing.jl | 12 +++++++----- test/missing.jl | 9 ++++++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/base/missing.jl b/base/missing.jl index 4544c2b38c460..f6f5fe507260b 100644 --- a/base/missing.jl +++ b/base/missing.jl @@ -12,7 +12,7 @@ where it is not supported. The error message, in the `msg` field may provide more specific details. """ struct MissingException <: Exception - msg::String + msg::AbstractString end showerror(io::IO, ex::MissingException) = @@ -136,11 +136,14 @@ max(::Missing, ::Missing) = missing max(::Missing, ::Any) = missing max(::Any, ::Missing) = missing +missing_conversion_msg(@nospecialize T) = + LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead") + # Rounding and related functions round(::Missing, ::RoundingMode=RoundNearest; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing round(::Type{>:Missing}, ::Missing, ::RoundingMode=RoundNearest) = missing round(::Type{T}, ::Missing, ::RoundingMode=RoundNearest) where {T} = - throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead")) + throw(MissingException(missing_conversion_msg(T))) round(::Type{T}, x::Any, r::RoundingMode=RoundNearest) where {T>:Missing} = round(nonmissingtype_checked(T), x, r) # to fix ambiguities round(::Type{T}, x::Rational{Tr}, r::RoundingMode=RoundNearest) where {T>:Missing,Tr} = round(nonmissingtype_checked(T), x, r) @@ -151,8 +154,7 @@ for f in (:(ceil), :(floor), :(trunc)) @eval begin ($f)(::Missing; sigdigits::Integer=0, digits::Integer=0, base::Integer=0) = missing ($f)(::Type{>:Missing}, ::Missing) = missing - ($f)(::Type{T}, ::Missing) where {T} = - throw(MissingException("cannot convert a missing value to type $T: use Union{$T, Missing} instead")) + ($f)(::Type{T}, ::Missing) where {T} = throw(MissingException(missing_conversion_msg(T))) ($f)(::Type{T}, x::Any) where {T>:Missing} = $f(nonmissingtype_checked(T), x) # to fix ambiguities ($f)(::Type{T}, x::Rational) where {T>:Missing} = $f(nonmissingtype_checked(T), x) @@ -265,7 +267,7 @@ keys(itr::SkipMissing) = Iterators.filter(i -> @inbounds(itr.x[i]) !== missing, keys(itr.x)) @propagate_inbounds function getindex(itr::SkipMissing, I...) v = itr.x[I...] - v === missing && throw(MissingException("the value at index $I is missing")) + v === missing && throw(MissingException(LazyString("the value at index ", I, " is missing"))) v end diff --git a/test/missing.jl b/test/missing.jl index 489d264192fef..36155eb32fe49 100644 --- a/test/missing.jl +++ b/test/missing.jl @@ -643,4 +643,11 @@ end @test isequal(sort(X, alg=MergeSort, rev=true), XRP) end -sortperm(reverse([NaN, missing, NaN, missing])) +@test (sortperm(reverse([NaN, missing, NaN, missing])); true) + +# use LazyString for MissingException to get the better effects +for func in (round, ceil, floor, trunc) + @testset let func = func + @test Core.Compiler.is_foldable(Base.infer_effects(func, (Type{Int},Union{Int,Missing}))) + end +end From e111c01d0483ac12885e3e1ea2132aa98edfb623 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 6 Jun 2023 19:34:53 -0400 Subject: [PATCH 124/290] add default order for atomics to docstring (#50086) --- base/expr.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/base/expr.jl b/base/expr.jl index 5952904b3d17b..e007306063db1 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -1044,6 +1044,7 @@ end @atomic order ex Mark `var` or `ex` as being performed atomically, if `ex` is a supported expression. +If no `order` is specified it defaults to :sequentially_consistent. @atomic a.b.x = new @atomic a.b.x += addend From 0b4754694e27991c39095433e6c915f9617f390a Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:04:05 +0900 Subject: [PATCH 125/290] array: make `DimensionMismatch` lazy-string friendly (#50079) At the moment, the `msg` field of `DimensionMismatch` is typed as `String`. This means lazy strings passed to `DimensionMismatch` are instantly instantiated, defeating the whole purpose of using `LazyString`. This commit tweaks the field type to `AbstractString`. This way, lazy strings get instantiated during the exception handling later on. --- base/array.jl | 2 +- test/ranges.jl | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/base/array.jl b/base/array.jl index 95c77ef8a0879..fc9a2a8a23b6f 100644 --- a/base/array.jl +++ b/base/array.jl @@ -9,7 +9,7 @@ The objects called do not have matching dimensionality. Optional argument `msg` descriptive error string. """ struct DimensionMismatch <: Exception - msg::String + msg::AbstractString end DimensionMismatch() = DimensionMismatch("") diff --git a/test/ranges.jl b/test/ranges.jl index ec69c57fc0a8f..98233267d03a9 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -2477,3 +2477,13 @@ end @test !occursin("_colon", ir) @test !occursin("StepRange", ir) end + +# DimensionMismatch and LazyString +function check_ranges(rx, ry) + if length(rx) != length(ry) + throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))")) + end + rx, ry +end +@test Core.Compiler.is_foldable(Base.infer_effects(check_ranges, (UnitRange{Int},UnitRange{Int}))) +# TODO JET.@test_opt check_ranges(1:2, 3:4) From dbd82a4dbab0582a345679eb83b2d99d40c0356a Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 7 Jun 2023 04:40:07 +0000 Subject: [PATCH 126/290] Update newpm pass pipeline (#49747) Co-authored-by: Gabriel Baraldi --- src/pipeline.cpp | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index e5822bbf74aeb..965b0079c3b30 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -34,17 +34,21 @@ // NewPM needs to manually include all the pass headers #include +#include #include +#include #include #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -52,18 +56,23 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include #include #include #include #include +#include +#include #include #include #include @@ -224,21 +233,12 @@ namespace { //The actual pipelines //TODO Things we might want to consider: -//? annotation2metadata pass -//? force function attributes pass -//? annotation remarks pass -//? infer function attributes pass -//? lower expect intrinsic pass -//? warn missed transformations pass //* For vectorization //? loop unroll/jam after loop vectorization //? optimization remarks pass //? cse/cvp/instcombine/bdce/sccp/licm/unswitch after loop vectorization ( // cleanup as much as possible before trying to slp vectorize) -//? vectorcombine pass //* For optimization -//? float2int pass -//? lower constant intrinsics pass //? loop sink pass //? hot-cold splitting pass @@ -249,7 +249,10 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif + // Place after verification in case we want to force it anyways + MPM.addPass(ForceFunctionAttrsPass()); invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(Annotation2MetadataPass()); MPM.addPass(ConstantMergePass()); if (!options.dump_native) { JULIA_PASS(MPM.addPass(CPUFeaturesPass())); @@ -259,6 +262,7 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza } { FunctionPassManager FPM; + FPM.addPass(LowerExpectIntrinsicPass()); FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); if (O.getSpeedupLevel() > 0) { FPM.addPass(SROAPass()); @@ -316,6 +320,12 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza } } invokeOptimizerLastCallbacks(MPM, PB, O); + { + FunctionPassManager FPM; + FPM.addPass(WarnMissedTransformationsPass()); + FPM.addPass(AnnotationRemarksPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } addSanitizerPasses(MPM, O); JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16Pass()))); } @@ -325,10 +335,14 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif + // Place after verification in case we want to force it anyways + MPM.addPass(ForceFunctionAttrsPass()); invokePipelineStartCallbacks(MPM, PB, O); + MPM.addPass(Annotation2MetadataPass()); MPM.addPass(ConstantMergePass()); { FunctionPassManager FPM; + FPM.addPass(LowerExpectIntrinsicPass()); JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass())); //TODO consider not using even basic simplification //options here, and adding a run of CVP to take advantage @@ -348,6 +362,8 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(AllocOptPass())); + FPM.addPass(Float2IntPass()); + FPM.addPass(LowerConstantIntrinsicsPass()); FPM.addPass(InstCombinePass()); FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); @@ -398,6 +414,8 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat LPM.addPass(LoopIdiomRecognizePass()); LPM.addPass(IndVarSimplifyPass()); LPM.addPass(LoopDeletionPass()); + // This unroll will only unroll loops when the trip count is known and small, + // so that no loop remains LPM.addPass(LoopFullUnrollPass()); invokeLoopOptimizerEndCallbacks(LPM, PB, O); //We don't know if the loop end callbacks support MSSA @@ -427,6 +445,8 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat LPM.addPass(LoopInstSimplifyPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); } + FPM.addPass(LoopDistributePass()); + FPM.addPass(InjectTLIMappings()); invokeScalarOptimizerCallbacks(FPM, PB, O); //TODO look into loop vectorize options FPM.addPass(LoopVectorizePass()); @@ -435,8 +455,13 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); FPM.addPass(SLPVectorizerPass()); invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(VectorCombinePass()); FPM.addPass(ADCEPass()); //TODO add BDCEPass here? + // This unroll will unroll vectorized loops + // as well as loops that we tried but failed to vectorize + FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + FPM.addPass(WarnMissedTransformationsPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } if (options.lower_intrinsics) { @@ -476,6 +501,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeOptimizerLastCallbacks(MPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); addSanitizerPasses(MPM, O); { FunctionPassManager FPM; From 36c595396200427f683f1d4d7b813b9706cb2710 Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Wed, 7 Jun 2023 09:26:17 -0400 Subject: [PATCH 127/290] More `hash` optimization (#50041) * speed up hash of Float32/Float16 since they never are big numbers, and hash of arbitrary real types by not hashing the denominator when it is one --- base/float.jl | 48 +++++++++++++++++++++++++++++------------------- base/gmp.jl | 1 - base/rational.jl | 9 ++++++--- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/base/float.jl b/base/float.jl index 37f91c1f511bb..d5280ef74fbce 100644 --- a/base/float.jl +++ b/base/float.jl @@ -635,31 +635,40 @@ isinf(x::Real) = !isnan(x) & !isfinite(x) isinf(x::IEEEFloat) = abs(x) === oftype(x, Inf) const hx_NaN = hash_uint64(reinterpret(UInt64, NaN)) -let Tf = Float64, Tu = UInt64, Ti = Int64 - @eval function hash(x::$Tf, h::UInt) - # see comments on trunc and hash(Real, UInt) - if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti))) - xi = fptosi($Ti, x) - if isequal(xi, x) - return hash(xi, h) - end - elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu))) - xu = fptoui($Tu, x) - if isequal(xu, x) - return hash(xu, h) - end - elseif isnan(x) - return hx_NaN ⊻ h # NaN does not have a stable bit pattern +function hash(x::Float64, h::UInt) + # see comments on trunc and hash(Real, UInt) + if typemin(Int64) <= x < typemax(Int64) + xi = fptosi(Int64, x) + if isequal(xi, x) + return hash(xi, h) + end + elseif typemin(UInt64) <= x < typemax(UInt64) + xu = fptoui(UInt64, x) + if isequal(xu, x) + return hash(xu, h) end - return hash_uint64(bitcast(UInt64, x)) - 3h + elseif isnan(x) + return hx_NaN ⊻ h # NaN does not have a stable bit pattern end + return hash_uint64(bitcast(UInt64, x)) - 3h end hash(x::Float32, h::UInt) = hash(Float64(x), h) -hash(x::Float16, h::UInt) = hash(Float64(x), h) -## generic hashing for rational values ## +function hash(x::Float16, h::UInt) + # see comments on trunc and hash(Real, UInt) + if isfinite(x) # all finite Float16 fit in Int64 + xi = fptosi(Int64, x) + if isequal(xi, x) + return hash(xi, h) + end + elseif isnan(x) + return hx_NaN ⊻ h # NaN does not have a stable bit pattern + end + return hash_uint64(bitcast(UInt64, Float64(x))) - 3h +end +## generic hashing for rational values ## function hash(x::Real, h::UInt) # decompose x as num*2^pow/den num, pow, den = decompose(x) @@ -691,10 +700,11 @@ function hash(x::Real, h::UInt) end # typemin(Int64) handled by Float64 case left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num), pow), h) end + else + h = hash_integer(den, h) end # handle generic rational values - h = hash_integer(den, h) h = hash_integer(pow, h) h = hash_integer(num, h) return h diff --git a/base/gmp.jl b/base/gmp.jl index b80be35c25fcd..8a1451be7a590 100644 --- a/base/gmp.jl +++ b/base/gmp.jl @@ -891,7 +891,6 @@ if Limb === UInt64 === UInt if nd <= 1024 && nd - pow <= 53 return hash(ldexp(flipsign(Float64(limb), sz), pow), h) end - h = hash_integer(1, h) h = hash_integer(pow, h) h ⊻= hash_uint(flipsign(limb, sz) ⊻ h) for idx = idx+1:asz diff --git a/base/rational.jl b/base/rational.jl index 6ab022736388e..4ffae9043d10a 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -545,11 +545,14 @@ function hash(x::Rational{<:BitInteger64}, h::UInt) pow = trailing_zeros(den) den >>= pow pow = -pow - if den == 1 && uabs(num) < UInt64(maxintfloat(Float64)) - return hash(ldexp(Float64(num),pow),h) + if den == 1 + if uabs(num) < UInt64(maxintfloat(Float64)) + return hash(ldexp(Float64(num),pow),h) + end + else + h = hash_integer(den, h) end end - h = hash_integer(den, h) h = hash_integer(pow, h) h = hash_integer(num, h) return h From 13e40afdec396211efaeadf641cd0b37322ae378 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 7 Jun 2023 09:01:12 -0500 Subject: [PATCH 128/290] Skip flakey tests in CI (#50071) --- stdlib/LinearAlgebra/test/addmul.jl | 9 ++-- test/cmdlineargs.jl | 64 ++++++++++++++++------------- test/threads.jl | 2 +- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl index 72fdf687bf5c3..3fff8289242f7 100644 --- a/stdlib/LinearAlgebra/test/addmul.jl +++ b/stdlib/LinearAlgebra/test/addmul.jl @@ -164,7 +164,8 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, β) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol y = C[:, 1] x = B[:, 1] @@ -189,7 +190,8 @@ end returned_mat = mul!(C, Af, Bf, α, β) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol end end end @@ -201,7 +203,8 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, zero(eltype(C))) @test returned_mat === C - @test collect(returned_mat) ≈ α * Ac * Bc rtol=rtol + # This test is skipped because it is flakey, but should be fixed and put back (see #49966) + @test_skip collect(returned_mat) ≈ α * Ac * Bc rtol=rtol end end diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 7ebed56227d03..9c8c0ac553c24 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -300,37 +300,43 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` @test errors_not_signals(`$exename -C invalidtarget`) @test errors_not_signals(`$exename --cpu-target=invalidtarget`) - # -t, --threads - code = "print(Threads.threadpoolsize())" - cpu_threads = ccall(:jl_effective_threads, Int32, ()) - @test string(cpu_threads) == - read(`$exename --threads auto -e $code`, String) == - read(`$exename --threads=auto -e $code`, String) == - read(`$exename -tauto -e $code`, String) == - read(`$exename -t auto -e $code`, String) - for nt in (nothing, "1") - withenv("JULIA_NUM_THREADS" => nt) do - @test read(`$exename --threads=2 -e $code`, String) == - read(`$exename -t 2 -e $code`, String) == "2" + if Sys.iswindows() + # -t, --threads + code = "print(Threads.threadpoolsize())" + cpu_threads = ccall(:jl_effective_threads, Int32, ()) + @test string(cpu_threads) == + read(`$exename --threads auto -e $code`, String) == + read(`$exename --threads=auto -e $code`, String) == + read(`$exename -tauto -e $code`, String) == + read(`$exename -t auto -e $code`, String) + for nt in (nothing, "1") + withenv("JULIA_NUM_THREADS" => nt) do + @test read(`$exename --threads=2 -e $code`, String) == + read(`$exename -t 2 -e $code`, String) == "2" + end end - end - # We want to test oversubscription, but on manycore machines, this can - # actually exhaust limited PID spaces - cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads)) - if Sys.WORD_SIZE == 32 - cpu_threads = min(cpu_threads, 50) - end - @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads) - withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do - @test read(`$exename -e $code`, String) == string(cpu_threads) - end - @test errors_not_signals(`$exename -t 0`) - @test errors_not_signals(`$exename -t -1`) + # We want to test oversubscription, but on manycore machines, this can + # actually exhaust limited PID spaces + cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads)) + if Sys.WORD_SIZE == 32 + cpu_threads = min(cpu_threads, 50) + end + @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads) + withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do + @test read(`$exename -e $code`, String) == string(cpu_threads) + end + @test errors_not_signals(`$exename -t 0`) + @test errors_not_signals(`$exename -t -1`) - # Combining --threads and --procs: --threads does propagate - withenv("JULIA_NUM_THREADS" => nothing) do - code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))" - @test read(`$exename -p2 -t2 -e $code`, String) == "6" + # Combining --threads and --procs: --threads does propagate + withenv("JULIA_NUM_THREADS" => nothing) do + code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))" + @test read(`$exename -p2 -t2 -e $code`, String) == "6" + end + else + @test_skip "Command line tests with -t are flakey on non-Windows OS" + # Known issue: https://github.com/JuliaLang/julia/issues/49154 + # These tests should be fixed and reenabled on all operating systems. end # Combining --threads and invalid -C should yield a decent error diff --git a/test/threads.jl b/test/threads.jl index af752fe715b0e..8189311739e31 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -312,7 +312,7 @@ close(proc.in) if ( !success(proc) ) || ( timeout ) @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout end - if Sys.iswindows() + if Sys.iswindows() || Sys.isapple() # Known failure: https://github.com/JuliaLang/julia/issues/43124 @test_skip success(proc) else From d7d7ffa7f964a9acf9261833bb0c98d442586ef9 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 7 Jun 2023 16:46:41 +0000 Subject: [PATCH 129/290] Invoke the passbuilder's extension point callbacks when possible (#50095) Invoke the passbuilder's extension point callbacks when possible --- src/pipeline.cpp | 117 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 19 deletions(-) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 965b0079c3b30..3ba05a21250a6 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -209,26 +209,105 @@ namespace { ; } - // TODO(vchuravy/maleadt): - // Since we are not using the PassBuilder fully and instead rolling our own, we are missing out on - // TargetMachine::registerPassBuilderCallbacks. We need to find a solution either in working with upstream - // or adapting PassBuilder (or subclassing it) to suite our needs. This is in particular important for - // BPF, NVPTX, and AMDGPU. - //TODO implement these once LLVM exposes - //the PassBuilder extension point callbacks - //For now we'll maintain the insertion points even though they don't do anything - //for the sake of documentation +// At any given time exactly one of each pair of overloads is strictly unused +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + + // Version check for our patch to allow invoking pipeline callbacks + // won't work if built with our LLVM but linked with system LLVM + template std::true_type hasInvokeCallbacks_helper(decltype(&PB::invokePipelineStartEPCallbacks)) JL_NOTSAFEPOINT; + std::false_type hasInvokeCallbacks_helper(...) JL_NOTSAFEPOINT; + + // static constexpr bool hasInvokeCallbacks = decltype(hasInvokeCallbacks_helper(nullptr))::value; + //If PB is a nullptr, don't invoke anything (this happens when running julia from opt) - void invokePipelineStartCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokePeepholeEPCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeCGSCCCallbacks(CGSCCPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeLateLoopOptimizationCallbacks(LoopPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeLoopOptimizerEndCallbacks(LoopPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeScalarOptimizerCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeVectorizerCallbacks(FunctionPassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} - void invokeOptimizerLastCallbacks(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O) JL_NOTSAFEPOINT {} + template + std::enable_if_t(nullptr))::value, void> invokePipelineStartCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePipelineStartEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokePeepholeEPCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePeepholeEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeEarlySimplificationCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokePipelineEarlySimplificationEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeCGSCCCallbacks(CGSCCPassManager &CGPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeCGSCCOptimizerLateEPCallbacks(CGPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeOptimizerEarlyCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeOptimizerEarlyEPCallbacks(MPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeLateLoopOptimizationCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeLateLoopOptimizationsEPCallbacks(LPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeLoopOptimizerEndCallbacks(LoopPassManager &LPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeLoopOptimizerEndEPCallbacks(LPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeScalarOptimizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeScalarOptimizerLateEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeVectorizerCallbacks(FunctionPassManager &FPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeVectorizerStartEPCallbacks(FPM, O); + } + template + std::enable_if_t(nullptr))::value, void> invokeOptimizerLastCallbacks(ModulePassManager &MPM, PB_t *PB, OptimizationLevel O) JL_NOTSAFEPOINT { + static_assert(std::is_same::value, "Expected PassBuilder as second argument!"); + if (!PB) return; + PB->invokeOptimizerLastEPCallbacks(MPM, O); + } + + // Fallbacks + void invokePipelineStartCallbacks(...) {} + void invokePeepholeEPCallbacks(...) {} + void invokeEarlySimplificationCallbacks(...) {} + void invokeCGSCCCallbacks(...) {} + void invokeOptimizerEarlyCallbacks(...) {} + void invokeLateLoopOptimizationCallbacks(...) {} + void invokeLoopOptimizerEndCallbacks(...) {} + void invokeScalarOptimizerCallbacks(...) {} + void invokeVectorizerCallbacks(...) {} + void invokeOptimizerLastCallbacks(...) {} + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic pop +#endif + +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif } //The actual pipelines From 1f174a61f3e63b7a0a8bff8c74b8f17c5a5b8743 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 7 Jun 2023 14:14:20 -0500 Subject: [PATCH 130/290] Fix typo in CONTRIBUTING.md (#50104) --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 099ef6b03509b..f08b71e2f3c30 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -325,7 +325,7 @@ please remove the `backport-X.Y` tag from the originating pull request for the c - use lower case with underscores for method names - it is generally preferred to use ASCII operators and identifiers over Unicode equivalents whenever possible - - in docstring refer to the language as "Julia" and the executable as "`julia`" + - in docstrings refer to the language as "Julia" and the executable as "`julia`" #### General Formatting Guidelines For C code contributions From 2e76fc4157d8349b3e676de635d82c9a53aab06f Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Thu, 8 Jun 2023 00:57:36 +0530 Subject: [PATCH 131/290] Update return type in collect(::Dict) docs (#50056) --- base/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/array.jl b/base/array.jl index fc9a2a8a23b6f..3a12b38c5bc26 100644 --- a/base/array.jl +++ b/base/array.jl @@ -728,7 +728,7 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it collect(collection) Return an `Array` of all items in a collection or iterator. For dictionaries, returns -`Pair{KeyType, ValType}`. If the argument is array-like or is an iterator with the +`Vector{Pair{KeyType, ValType}}`. If the argument is array-like or is an iterator with the [`HasShape`](@ref IteratorSize) trait, the result will have the same shape and number of dimensions as the argument. From 4200aa9471bbe3af448931eb2b35fcfee0550e24 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 7 Jun 2023 21:15:19 +0000 Subject: [PATCH 132/290] Unify and split the pass pipeline (#49798) Unify and split the pass pipeline --- src/pipeline.cpp | 274 ++++++++++++++++++++--------------------------- 1 file changed, 114 insertions(+), 160 deletions(-) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 3ba05a21250a6..e6b3b284a3768 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -208,6 +208,9 @@ namespace { // .sinkCommonInsts(true) ; } +#if JL_LLVM_VERSION < 150000 +#define LICMOptions() +#endif // At any given time exactly one of each pair of overloads is strictly unused #ifdef _COMPILER_GCC_ @@ -323,8 +326,7 @@ namespace { #define JULIA_PASS(ADD_PASS) if (!options.llvm_only) { ADD_PASS; } else do { } while (0) -//Use for O1 and below -static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { +static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif @@ -333,118 +335,32 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza invokePipelineStartCallbacks(MPM, PB, O); MPM.addPass(Annotation2MetadataPass()); MPM.addPass(ConstantMergePass()); - if (!options.dump_native) { - JULIA_PASS(MPM.addPass(CPUFeaturesPass())); - if (O.getSpeedupLevel() > 0) { - MPM.addPass(createModuleToFunctionPassAdaptor(InstSimplifyPass())); - } - } { FunctionPassManager FPM; FPM.addPass(LowerExpectIntrinsicPass()); + if (O.getSpeedupLevel() >= 2) { + JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass())); + } FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - if (O.getSpeedupLevel() > 0) { + if (O.getSpeedupLevel() >= 1) { + FPM.addPass(DCEPass()); FPM.addPass(SROAPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(EarlyCSEPass()); } - FPM.addPass(MemCpyOptPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeEarlySimplificationCallbacks(MPM, PB, O); - MPM.addPass(AlwaysInlinerPass()); - { - CGSCCPassManager CGPM; - invokeCGSCCCallbacks(CGPM, PB, O); - MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); - } - invokeOptimizerEarlyCallbacks(MPM, PB, O); - JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); - { - FunctionPassManager FPM; - { - LoopPassManager LPM; - invokeLateLoopOptimizationCallbacks(LPM, PB, O); - invokeLoopOptimizerEndCallbacks(LPM, PB, O); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); - } - invokeScalarOptimizerCallbacks(FPM, PB, O); - invokeVectorizerCallbacks(FPM, PB, O); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - if (options.lower_intrinsics) { - //TODO no barrier pass? - { - FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(LowerExcHandlersPass())); - JULIA_PASS(FPM.addPass(GCInvariantVerifierPass(false))); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - JULIA_PASS(MPM.addPass(RemoveNIPass())); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass()))); - JULIA_PASS(MPM.addPass(FinalLowerGCPass())); - JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native))); - } else { - JULIA_PASS(MPM.addPass(RemoveNIPass())); - } - JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); // TODO why do we do this twice - if (options.dump_native) { - JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use))); - JULIA_PASS(MPM.addPass(CPUFeaturesPass())); - if (O.getSpeedupLevel() > 0) { - FunctionPassManager FPM; - FPM.addPass(InstSimplifyPass()); - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - } - invokeOptimizerLastCallbacks(MPM, PB, O); - { - FunctionPassManager FPM; - FPM.addPass(WarnMissedTransformationsPass()); - FPM.addPass(AnnotationRemarksPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - addSanitizerPasses(MPM, O); - JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(DemoteFloat16Pass()))); } -//Use for O2 and above -static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { -#ifdef JL_DEBUG_BUILD - addVerificationPasses(MPM, options.llvm_only); -#endif - // Place after verification in case we want to force it anyways - MPM.addPass(ForceFunctionAttrsPass()); - invokePipelineStartCallbacks(MPM, PB, O); - MPM.addPass(Annotation2MetadataPass()); - MPM.addPass(ConstantMergePass()); - { - FunctionPassManager FPM; - FPM.addPass(LowerExpectIntrinsicPass()); - JULIA_PASS(FPM.addPass(PropagateJuliaAddrspacesPass())); - //TODO consider not using even basic simplification - //options here, and adding a run of CVP to take advantage - //of the unsimplified codegen information (e.g. known - //zeros or ones) - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); - FPM.addPass(DCEPass()); - FPM.addPass(SROAPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - invokeEarlySimplificationCallbacks(MPM, PB, O); - MPM.addPass(AlwaysInlinerPass()); +static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { invokeOptimizerEarlyCallbacks(MPM, PB, O); { CGSCCPassManager CGPM; invokeCGSCCCallbacks(CGPM, PB, O); - { + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(AllocOptPass())); FPM.addPass(Float2IntPass()); FPM.addPass(LowerConstantIntrinsicsPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); } MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); @@ -453,42 +369,52 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat JULIA_PASS(MPM.addPass(MultiVersioningPass(options.external_use))); } JULIA_PASS(MPM.addPass(CPUFeaturesPass())); - { + if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; - FPM.addPass(SROAPass()); - // SROA can duplicate PHI nodes which can block LowerSIMD - FPM.addPass(InstCombinePass()); - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(ReassociatePass()); - FPM.addPass(EarlyCSEPass()); - JULIA_PASS(FPM.addPass(AllocOptPass())); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(SROAPass()); + // SROA can duplicate PHI nodes which can block LowerSIMD + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(ReassociatePass()); + FPM.addPass(EarlyCSEPass()); + JULIA_PASS(FPM.addPass(AllocOptPass())); + } else { // if (O.getSpeedupLevel() >= 1) (exactly) + FPM.addPass(InstCombinePass()); + FPM.addPass(EarlyCSEPass()); + } invokePeepholeEPCallbacks(FPM, PB, O); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - JULIA_PASS(MPM.addPass(LowerSIMDLoopPass())); +} + +static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { { - FunctionPassManager FPM; - { - LoopPassManager LPM1, LPM2; - LPM1.addPass(LoopRotatePass()); - invokeLateLoopOptimizationCallbacks(LPM1, PB, O); - //We don't know if the loop callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA = */false)); -#if JL_LLVM_VERSION < 150000 -#define LICMOptions() -#endif - LPM2.addPass(LICMPass(LICMOptions())); - JULIA_PASS(LPM2.addPass(JuliaLICMPass())); - LPM2.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); - LPM2.addPass(LICMPass(LICMOptions())); - JULIA_PASS(LPM2.addPass(JuliaLICMPass())); - //LICM needs MemorySSA now, so we must use it - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA = */true)); + LoopPassManager LPM; + if (O.getSpeedupLevel() >= 2) { + LPM.addPass(LoopRotatePass()); } + invokeLateLoopOptimizationCallbacks(LPM, PB, O); + //We don't know if the loop callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } + if (O.getSpeedupLevel() >= 2) { + LoopPassManager LPM; + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + //LICM needs MemorySSA now, so we must use it + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); + } + if (O.getSpeedupLevel() >= 2) { FPM.addPass(IRCEPass()); - { - LoopPassManager LPM; + } + { + LoopPassManager LPM; + if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopInstSimplifyPass()); LPM.addPass(LoopIdiomRecognizePass()); LPM.addPass(IndVarSimplifyPass()); @@ -496,10 +422,15 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat // This unroll will only unroll loops when the trip count is known and small, // so that no loop remains LPM.addPass(LoopFullUnrollPass()); - invokeLoopOptimizerEndCallbacks(LPM, PB, O); - //We don't know if the loop end callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); } + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + //We don't know if the loop end callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } +} + +static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + if (O.getSpeedupLevel() >= 2) { JULIA_PASS(FPM.addPass(AllocOptPass())); FPM.addPass(SROAPass()); FPM.addPass(InstSimplifyPass()); @@ -511,9 +442,11 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat FPM.addPass(IRCEPass()); FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); - if (O.getSpeedupLevel() >= 3) { - FPM.addPass(GVNPass()); - } + } + if (O.getSpeedupLevel() >= 3) { + FPM.addPass(GVNPass()); + } + if (O.getSpeedupLevel() >= 2) { FPM.addPass(DSEPass()); invokePeepholeEPCallbacks(FPM, PB, O); FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); @@ -525,24 +458,28 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); } FPM.addPass(LoopDistributePass()); - FPM.addPass(InjectTLIMappings()); - invokeScalarOptimizerCallbacks(FPM, PB, O); - //TODO look into loop vectorize options - FPM.addPass(LoopVectorizePass()); - FPM.addPass(LoopLoadEliminationPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); - FPM.addPass(SLPVectorizerPass()); - invokeVectorizerCallbacks(FPM, PB, O); - FPM.addPass(VectorCombinePass()); - FPM.addPass(ADCEPass()); - //TODO add BDCEPass here? - // This unroll will unroll vectorized loops - // as well as loops that we tried but failed to vectorize - FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); - FPM.addPass(WarnMissedTransformationsPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + invokeScalarOptimizerCallbacks(FPM, PB, O); +} + +static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + //TODO look into loop vectorize options + FPM.addPass(InjectTLIMappings()); + FPM.addPass(LoopVectorizePass()); + FPM.addPass(LoopLoadEliminationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(SLPVectorizerPass()); + invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(VectorCombinePass()); + FPM.addPass(ADCEPass()); + //TODO add BDCEPass here? + // This unroll will unroll vectorized loops + // as well as loops that we tried but failed to vectorize + FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); +} + +static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { if (options.lower_intrinsics) { //TODO barrier pass? { @@ -556,7 +493,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat JULIA_PASS(MPM.addPass(RemoveNIPass())); JULIA_PASS(MPM.addPass(createModuleToFunctionPassAdaptor(LateLowerGCPass()))); JULIA_PASS(MPM.addPass(FinalLowerGCPass())); - { + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; FPM.addPass(GVNPass()); FPM.addPass(SCCPPass()); @@ -564,7 +501,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } JULIA_PASS(MPM.addPass(LowerPTLSPass(options.dump_native))); - { + if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; FPM.addPass(InstCombinePass()); FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); @@ -573,7 +510,10 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat } else { JULIA_PASS(MPM.addPass(RemoveNIPass())); } - { +} + +static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(CombineMulAddPass())); FPM.addPass(DivRemPairsPass()); @@ -585,9 +525,30 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(DemoteFloat16Pass())); - FPM.addPass(GVNPass()); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(GVNPass()); + } + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } +} + +static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + buildEarlySimplificationPipeline(MPM, PB, O, options); + MPM.addPass(AlwaysInlinerPass()); + buildEarlyOptimizerPipeline(MPM, PB, O, options); + MPM.addPass(LowerSIMDLoopPass()); + { + FunctionPassManager FPM; + buildLoopOptimizerPipeline(FPM, PB, O, options); + buildScalarOptimizerPipeline(FPM, PB, O, options); + if (O.getSpeedupLevel() >= 2) { + buildVectorPipeline(FPM, PB, O, options); + } + FPM.addPass(WarnMissedTransformationsPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + buildIntrinsicLoweringPipeline(MPM, PB, O, options); + buildCleanupPipeline(MPM, PB, O, options); } #undef JULIA_PASS @@ -665,10 +626,7 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); ModulePassManager createMPM(PassBuilder &PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT { ModulePassManager MPM; - if (O.getSpeedupLevel() < 2) - buildBasicPipeline(MPM, &PB, O, options); - else - buildFullPipeline(MPM, &PB, O, options); + buildPipeline(MPM, &PB, O, options); return MPM; } } @@ -805,11 +763,7 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { auto julia_options = parseJuliaPipelineOptions(Name); if (julia_options) { ModulePassManager pipeline; - if (julia_options->first.getSpeedupLevel() < 2) { - buildBasicPipeline(pipeline, nullptr, julia_options->first, julia_options->second); - } else { - buildFullPipeline(pipeline, nullptr, julia_options->first, julia_options->second); - } + buildPipeline(pipeline, nullptr, julia_options->first, julia_options->second); PM.addPass(std::move(pipeline)); return true; } From 2c8045572b6e9dda6be1b8ba712ed9ea5e7252e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20G=C3=B6ttgens?= Date: Thu, 8 Jun 2023 16:54:42 +0200 Subject: [PATCH 133/290] Revert "allow artifact string macro to take an explicit path to the artifact file (#46755)" (#50036) This reverts commit 1720a54011f7520f30ae69088e5c1b6f9643b35e. --- stdlib/Artifacts/src/Artifacts.jl | 11 +++-------- stdlib/Artifacts/test/runtests.jl | 3 --- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl index 47812fb993428..70593bfadae05 100644 --- a/stdlib/Artifacts/src/Artifacts.jl +++ b/stdlib/Artifacts/src/Artifacts.jl @@ -654,18 +654,13 @@ access a single file/directory within an artifact. Example: !!! compat "Julia 1.6" Slash-indexing requires at least Julia 1.6. """ -macro artifact_str(name, platform=nothing, artifacts_toml_path=nothing) +macro artifact_str(name, platform=nothing) # Find Artifacts.toml file we're going to load from srcfile = string(__source__.file) if ((isinteractive() && startswith(srcfile, "REPL[")) || (!isinteractive() && srcfile == "none")) && !isfile(srcfile) srcfile = pwd() end - # Sometimes we know the exact path to the Artifacts.toml file, so we can save some lookups - local artifacts_toml = if artifacts_toml_path === nothing || artifacts_toml_path == :(nothing) - find_artifacts_toml(srcfile) - else - eval(artifacts_toml_path) - end + local artifacts_toml = find_artifacts_toml(srcfile) if artifacts_toml === nothing error(string( "Cannot locate '(Julia)Artifacts.toml' file when attempting to use artifact '", @@ -695,7 +690,7 @@ macro artifact_str(name, platform=nothing, artifacts_toml_path=nothing) # If `name` is a constant, (and we're using the default `Platform`) we can actually load # and parse the `Artifacts.toml` file now, saving the work from runtime. - if isa(name, AbstractString) && (platform === nothing || platform == :(nothing)) + if isa(name, AbstractString) && platform === nothing # To support slash-indexing, we need to split the artifact name from the path tail: platform = HostPlatform() artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform) diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl index 248d851ccad79..67117217be549 100644 --- a/stdlib/Artifacts/test/runtests.jl +++ b/stdlib/Artifacts/test/runtests.jl @@ -91,9 +91,6 @@ end HelloWorldC_exe_path = joinpath(HelloWorldC_dir, "bin", "hello_world$(exeext)") @test isfile(HelloWorldC_exe_path) - HelloWorldC_dir_explicit_artifact = eval(:(@artifact_str "HelloWorldC" nothing joinpath(@__DIR__, "Artifacts.toml"))) - @test isdir(HelloWorldC_dir_explicit_artifact) - # Simple slash-indexed lookup HelloWorldC_bin_path = artifact"HelloWorldC/bin" @test isdir(HelloWorldC_bin_path) From 0a2d6fc552e3675c2485cc56820af40df47461d6 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 8 Jun 2023 12:28:34 -0300 Subject: [PATCH 134/290] Don't use exchange in the hot path of the GC (#50021) * Don't use exchange in the hot path of the GC and save one extra load --- src/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gc.c b/src/gc.c index b92324bde6c63..a9bb584cfcfba 100644 --- a/src/gc.c +++ b/src/gc.c @@ -804,7 +804,7 @@ STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr, FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { assert(gc_marked(mark_mode)); - uintptr_t tag = o->header; + uintptr_t tag = jl_atomic_load_relaxed((_Atomic(uintptr_t)*)&o->header); if (gc_marked(tag)) return 0; if (mark_reset_age) { @@ -818,9 +818,9 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N tag = tag | mark_mode; assert((tag & 0x3) == mark_mode); } - tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag); - verify_val(jl_valueof(o)); - return !gc_marked(tag); + jl_atomic_store_relaxed((_Atomic(uintptr_t)*)&o->header, tag); //xchg here was slower than + verify_val(jl_valueof(o)); //potentially redoing work because of a stale tag. + return 1; } // This function should be called exactly once during marking for each big From c4d162ea08e37dfdf958a79051aa6f6e223606b2 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Thu, 8 Jun 2023 17:55:41 +0200 Subject: [PATCH 135/290] mkpidlock: clarify that stale_age is given in seconds (#50014) --- stdlib/FileWatching/src/pidfile.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index 8416765a57b97..b78f7ef070018 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -31,7 +31,7 @@ your program, so the `finalizer` does not reclaim it early. Optional keyword arguments: - `mode`: file access mode (modified by the process umask). Defaults to world-readable. - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work) - - `stale_age`: Delete an existing pidfile (ignoring the lock) if its mtime is older than this. + - `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime. The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid. By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an estimated normal completion time. From 746a15bfca9b0f75f9a146912f3547ee1dd92ff0 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 8 Jun 2023 16:52:57 -0500 Subject: [PATCH 136/290] Fail gracefully when attempting pair destructuring (#49368) `a => b = x` was interpreted as a function definition. Now it's an error unless you `import Base: =>` --- src/module.c | 2 +- test/syntax.jl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 04d3970f9b460..89c4c6cdb674e 100644 --- a/src/module.c +++ b/src/module.c @@ -239,7 +239,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_ } // TODO: we might want to require explicitly importing types to add constructors // or we might want to drop this error entirely - if (!b->imported && (!b2->constp || !jl_is_type(f))) { + if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) { jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended", jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var)); } diff --git a/test/syntax.jl b/test/syntax.jl index af9344c5217e3..f7a6d1c095c00 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3483,3 +3483,9 @@ end # issue #49984 macro z49984(s); :(let a; $(esc(s)); end); end @test let a = 1; @z49984(a) === 1; end + +# issues #37783, #39929, #42552, #43379, and #48332 +let x = 1 => 2 + @test_throws ErrorException @eval a => b = 2 + @test_throws "function Base.=> must be explicitly imported to be extended" @eval a => b = 2 +end From 7ceea1350bf1693da51aa50ea111afcd11d279e0 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 8 Jun 2023 21:39:18 -0400 Subject: [PATCH 137/290] Update Unicode.jl (#50087) Correct confusing typo in documentation --- stdlib/Unicode/src/Unicode.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl index 58b9ab41b790a..e0ae78bd911a7 100644 --- a/stdlib/Unicode/src/Unicode.jl +++ b/stdlib/Unicode/src/Unicode.jl @@ -229,7 +229,7 @@ to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref). For example, the string `"noël"` can be constructed in two canonically equivalent ways in Unicode, depending on whether `"ë"` is formed from a single codepoint U+00EB or -from the ASCII character `'o'` followed by the U+0308 combining-diaeresis character. +from the ASCII character `'e'` followed by the U+0308 combining-diaeresis character. ```jldoctest julia> s1 = "no\u00EBl" From d041162594de53547f5d80b10a660a0e8dca057c Mon Sep 17 00:00:00 2001 From: "Viral B. Shah" Date: Thu, 8 Jun 2023 21:42:26 -0400 Subject: [PATCH 138/290] Update mailmap (#50116) --- .mailmap | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.mailmap b/.mailmap index e27453c63d2b5..e91501651d065 100644 --- a/.mailmap +++ b/.mailmap @@ -289,3 +289,9 @@ Frames Catherine White Frames Catherine White Claire Foster + +Jishnu Bhattacharya +Jishnu Bhattacharya + +Shuhei Kadowaki +Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> From 43d7f881f8e26eb4bb7ef3d6f84b378d28125325 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Fri, 9 Jun 2023 13:28:41 +0900 Subject: [PATCH 139/290] effects: audit `EFFECTS_UNKNOWN` usages (#50106) --- base/compiler/abstractinterpretation.jl | 6 +++--- base/compiler/effects.jl | 2 +- base/compiler/tfuncs.jl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 9f23bf3a05b7e..368a4054fbc46 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1875,7 +1875,7 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An ret = canconst ? Const(body) : Type{body} return CallMeta(ret, Effects(EFFECTS_TOTAL; nothrow), NoCallInfo()) end - return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo()) + return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) end function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState) @@ -1987,7 +1987,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), elseif f === TypeVar # Manually look through the definition of TypeVar to # make sure to be able to get `PartialTypeVar`s out. - (la < 2 || la > 4) && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo()) + (la < 2 || la > 4) && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) n = argtypes[2] ub_var = Const(Any) lb_var = Const(Union{}) @@ -2301,7 +2301,7 @@ end function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing}, sv::AbsIntState) - effects = EFFECTS_UNKNOWN + effects = Effects() ehead = e.head 𝕃ᵢ = typeinf_lattice(interp) ⊑ᵢ = ⊑(𝕃ᵢ) diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl index ec64b7601bc76..7d09769e5b31b 100644 --- a/base/compiler/effects.jl +++ b/base/compiler/effects.jl @@ -131,7 +131,7 @@ const INACCESSIBLEMEM_OR_ARGMEMONLY = 0x01 << 1 const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, true, true, true, ALWAYS_TRUE, true, true) const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, false, true, true, ALWAYS_TRUE, true, true) -const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call) +const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, true, true) # unknown mostly, but it's not overlayed and noinbounds at least (e.g. it's not a call) const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, false, false) # unknown really function Effects(e::Effects = _EFFECTS_UNKNOWN; diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 20543b207895e..f10dd03a6a058 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1374,7 +1374,7 @@ function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any nargs = length(argtypes) if !isempty(argtypes) && isvarargtype(argtypes[nargs]) nargs - 1 <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) - nargs > 3 || return CallMeta(Any, EFFECTS_UNKNOWN, NoCallInfo()) + nargs > 3 || return CallMeta(Any, Effects(), NoCallInfo()) else 5 <= nargs <= 6 || return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) end @@ -2681,7 +2681,7 @@ end # a simplified model of abstract_call_gf_by_type for applicable function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState, max_methods::Int) - length(argtypes) < 2 && return CallMeta(Union{}, EFFECTS_UNKNOWN, NoCallInfo()) + length(argtypes) < 2 && return CallMeta(Bottom, EFFECTS_THROWS, NoCallInfo()) isvarargtype(argtypes[2]) && return CallMeta(Bool, EFFECTS_UNKNOWN, NoCallInfo()) argtypes = argtypes[2:end] atype = argtypes_to_type(argtypes) From 2584b595be8c27f129ff546c5e39026e0812f9f6 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 9 Jun 2023 18:47:47 +0530 Subject: [PATCH 140/290] Fill zeros only if necessary in `Matrix(::Diagonal)` and friends (#50008) * delay filling zeros while convering (sym)tridiag to matrix * conditional zero fill for bidiagonal/diagonal --- stdlib/LinearAlgebra/src/bidiag.jl | 3 ++- stdlib/LinearAlgebra/src/diagonal.jl | 3 ++- stdlib/LinearAlgebra/src/tridiag.jl | 6 ++++-- stdlib/LinearAlgebra/test/bidiag.jl | 8 ++++++++ stdlib/LinearAlgebra/test/diagonal.jl | 6 ++++++ stdlib/LinearAlgebra/test/tridiag.jl | 23 ++++++++++++++++++++++- 6 files changed, 44 insertions(+), 5 deletions(-) diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index 855d05c3f94e8..192272cc61e98 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -183,8 +183,9 @@ end #Converting from Bidiagonal to dense Matrix function Matrix{T}(A::Bidiagonal) where T n = size(A, 1) - B = zeros(T, n, n) + B = Matrix{T}(undef, n, n) n == 0 && return B + n > 1 && fill!(B, zero(T)) @inbounds for i = 1:n - 1 B[i,i] = A.dv[i] if A.uplo == 'U' diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index 1813c04c46351..fb605a57ab5c6 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -111,7 +111,8 @@ Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D) Array(D::Diagonal{T}) where {T} = Matrix(D) function Matrix{T}(D::Diagonal) where {T} n = size(D, 1) - B = zeros(T, n, n) + B = Matrix{T}(undef, n, n) + n > 1 && fill!(B, zero(T)) @inbounds for i in 1:n B[i,i] = D.diag[i] end diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl index 07a47d917d914..13f6a1bb70756 100644 --- a/stdlib/LinearAlgebra/src/tridiag.jl +++ b/stdlib/LinearAlgebra/src/tridiag.jl @@ -124,8 +124,9 @@ AbstractMatrix{T}(S::SymTridiagonal) where {T} = convert(AbstractVector{T}, S.ev)::AbstractVector{T}) function Matrix{T}(M::SymTridiagonal) where T n = size(M, 1) - Mf = zeros(T, n, n) + Mf = Matrix{T}(undef, n, n) n == 0 && return Mf + n > 2 && fill!(Mf, zero(T)) @inbounds for i = 1:n-1 Mf[i,i] = symmetric(M.dv[i], :U) Mf[i+1,i] = transpose(M.ev[i]) @@ -556,9 +557,10 @@ function size(M::Tridiagonal, d::Integer) end function Matrix{T}(M::Tridiagonal) where {T} - A = zeros(T, size(M)) + A = Matrix{T}(undef, size(M)) n = length(M.d) n == 0 && return A + n > 2 && fill!(A, zero(T)) for i in 1:n-1 A[i,i] = M.d[i] A[i+1,i] = M.dl[i] diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl index 89f2b21a6a973..d13009780b975 100644 --- a/stdlib/LinearAlgebra/test/bidiag.jl +++ b/stdlib/LinearAlgebra/test/bidiag.jl @@ -797,6 +797,14 @@ end @test iszero(BL[i,j]) end end + + M = ones(2,2) + for n in 0:1 + dv = fill(M, n) + ev = fill(M, 0) + B = Bidiagonal(dv, ev, :U) + @test B == Matrix{eltype(B)}(B) + end end @testset "copyto! with UniformScaling" begin diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl index 5f169d21ff6fb..2a8248d9ca716 100644 --- a/stdlib/LinearAlgebra/test/diagonal.jl +++ b/stdlib/LinearAlgebra/test/diagonal.jl @@ -755,6 +755,12 @@ end @test tr(D) == 10 @test det(D) == 4 + + M = [1 2; 3 4] + for n in 0:1 + D = Diagonal(fill(M, n)) + @test D == Matrix{eltype(D)}(D) + end end @testset "linear solve for block diagonal matrices" begin diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl index e45fc9a65dba0..d4b2dd5e3f269 100644 --- a/stdlib/LinearAlgebra/test/tridiag.jl +++ b/stdlib/LinearAlgebra/test/tridiag.jl @@ -458,7 +458,7 @@ end end end -@testset "SymTridiagonal block matrix" begin +@testset "SymTridiagonal/Tridiagonal block matrix" begin M = [1 2; 2 4] n = 5 A = SymTridiagonal(fill(M, n), fill(M, n-1)) @@ -472,6 +472,27 @@ end @test_throws ArgumentError diag(A, 2) @test_throws ArgumentError diag(A, n+1) @test_throws ArgumentError diag(A, -n-1) + + A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1)) + @test @inferred A[1,1] == M + @test @inferred A[1,2] == M + @test @inferred A[2,1] == M + @test @inferred diag(A, 1) == fill(M, n-1) + @test @inferred diag(A, 0) == fill(M, n) + @test @inferred diag(A, -1) == fill(M, n-1) + @test_throws MethodError diag(A, -2) + @test_throws MethodError diag(A, 2) + @test_throws ArgumentError diag(A, n+1) + @test_throws ArgumentError diag(A, -n-1) + + for n in 0:2 + dv, ev = fill(M, n), fill(M, max(n-1,0)) + A = SymTridiagonal(dv, ev) + @test A == Matrix{eltype(A)}(A) + + A = Tridiagonal(ev, dv, ev) + @test A == Matrix{eltype(A)}(A) + end end @testset "Issue 12068" begin From f8827f140c136051daa97986af29c33319d1ac21 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 9 Jun 2023 20:16:41 +0530 Subject: [PATCH 141/290] Structured display for adjoint/transpose (#49979) * Structured display for adjoint/transpose * structured display for Symmetric/Hermitian * tests with Symmetric and Transpose --- stdlib/LinearAlgebra/src/adjtrans.jl | 5 +++ stdlib/LinearAlgebra/src/symmetric.jl | 7 ++++ stdlib/LinearAlgebra/test/adjtrans.jl | 28 +++++++++++++ stdlib/LinearAlgebra/test/symmetric.jl | 57 ++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl index 7e650664906b9..875e8cefcb66e 100644 --- a/stdlib/LinearAlgebra/src/adjtrans.jl +++ b/stdlib/LinearAlgebra/src/adjtrans.jl @@ -505,3 +505,8 @@ pinv(v::TransposeAbsVec, tol::Real = 0) = pinv(conj(v.parent)).parent ## complex conjugate conj(A::Transpose) = adjoint(A.parent) conj(A::Adjoint) = transpose(A.parent) + +## structured matrix methods ## +function Base.replace_in_print_matrix(A::AdjOrTrans,i::Integer,j::Integer,s::AbstractString) + Base.replace_in_print_matrix(parent(A), j, i, s) +end diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index dc553a7492778..1fc0c6aa0d94d 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -856,3 +856,10 @@ function _hermitianpart!(A::AbstractMatrix) end return A end + +## structured matrix printing ## +function Base.replace_in_print_matrix(A::HermOrSym,i::Integer,j::Integer,s::AbstractString) + ijminmax = minmax(i, j) + inds = A.uplo == 'U' ? ijminmax : reverse(ijminmax) + Base.replace_in_print_matrix(parent(A), inds..., s) +end diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl index e40beb29787cf..2362ec7fb28f2 100644 --- a/stdlib/LinearAlgebra/test/adjtrans.jl +++ b/stdlib/LinearAlgebra/test/adjtrans.jl @@ -643,4 +643,32 @@ end end end +@testset "structured printing" begin + D = Diagonal(1:3) + @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D) + @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D) + D = Diagonal((1:3)*im) + D2 = Diagonal((1:3)*(-im)) + @test sprint(Base.print_matrix, Transpose(D)) == sprint(Base.print_matrix, D) + @test sprint(Base.print_matrix, Adjoint(D)) == sprint(Base.print_matrix, D2) + + struct OneHotVecOrMat{N} <: AbstractArray{Bool,N} + inds::NTuple{N,Int} + sz::NTuple{N,Int} + end + Base.size(x::OneHotVecOrMat) = x.sz + function Base.getindex(x::OneHotVecOrMat{N}, inds::Vararg{Int,N}) where {N} + checkbounds(x, inds...) + inds == x.inds + end + Base.replace_in_print_matrix(o::OneHotVecOrMat{1}, i::Integer, j::Integer, s::AbstractString) = + o.inds == (i,) ? s : Base.replace_with_centered_mark(s) + Base.replace_in_print_matrix(o::OneHotVecOrMat{2}, i::Integer, j::Integer, s::AbstractString) = + o.inds == (i,j) ? s : Base.replace_with_centered_mark(s) + + o = OneHotVecOrMat((2,), (4,)) + @test sprint(Base.print_matrix, Transpose(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4))) + @test sprint(Base.print_matrix, Adjoint(o)) == sprint(Base.print_matrix, OneHotVecOrMat((1,2), (1,4))) +end + end # module TestAdjointTranspose diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl index 04621c4b49e86..c901f9b2facc3 100644 --- a/stdlib/LinearAlgebra/test/symmetric.jl +++ b/stdlib/LinearAlgebra/test/symmetric.jl @@ -824,4 +824,61 @@ end end end +@testset "Structured display" begin + @testset "Diagonal" begin + d = 10:13 + D = Diagonal(d) + for uplo in (:L, :U), SymHerm in (Symmetric, Hermitian) + S = SymHerm(D, uplo) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D) + end + + d = (10:13) .+ 2im + D = Diagonal(d) + DR = Diagonal(complex.(real.(d))) + for uplo in (:L, :U) + H = Hermitian(D, uplo) + @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, DR) + + S = Symmetric(D, uplo) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, D) + end + end + @testset "Bidiagonal" begin + dv, ev = 1:4, 1:3 + ST = SymTridiagonal(dv, ev) + D = Diagonal(dv) + for B_uplo in (:L, :U) + B = Bidiagonal(dv, ev, B_uplo) + for Sym_uplo in (:L, :U), SymHerm in (Symmetric, Hermitian) + SB = SymHerm(B, Sym_uplo) + teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? ST : D) + @test sprint(Base.print_matrix, SB) == teststr + SB = SymHerm(Transpose(B), Sym_uplo) + teststr = sprint(Base.print_matrix, Sym_uplo == B_uplo ? D : ST) + @test sprint(Base.print_matrix, SB) == teststr + end + end + end + @testset "Tridiagonal" begin + superd, d, subd = 3:5, 10:13, 1:3 + for uplo in (:U, :L), SymHerm in (Symmetric, Hermitian) + S = SymHerm(Tridiagonal(subd, d, superd), uplo) + ST = SymTridiagonal(d, uplo == :U ? superd : subd) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST) + end + + superd, d, subd = collect((3:5)*im), collect(Complex{Int}, 10:13), collect((1:3)*im) + for uplo in (:U, :L) + S = Symmetric(Tridiagonal(subd, d, superd), uplo) + ST = SymTridiagonal(d, uplo == :U ? superd : subd) + @test sprint(Base.print_matrix, S) == sprint(Base.print_matrix, ST) + + H = Hermitian(Tridiagonal(subd, d, superd), uplo) + T = Tridiagonal(uplo == :L ? subd : conj(superd), d, uplo == :U ? superd : conj(subd)) + @test sprint(Base.print_matrix, H) == sprint(Base.print_matrix, T) + end + end +end + end # module TestSymmetric From feaea2266eabe3d6714e8073ec57cb6bbf14e8ee Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 9 Jun 2023 21:23:54 +0200 Subject: [PATCH 142/290] print the skipping bytes notification when skipping printing parts of a string in color (#50096) --- base/strings/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index 5ae67fc8c841c..987a64798d3da 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -237,7 +237,7 @@ function show( if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1) skip = skip_text(n) show(io, SubString(str, 1:prevind(str, head))) - print(io, skip) # TODO: bold styled + printstyled(io, skip; color=:light_yellow, bold=true) show(io, SubString(str, tail)) else show(io, str) From 59bf9e84a25ff8756d2e5d0ac73825f3b66493dc Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 9 Jun 2023 21:24:52 +0200 Subject: [PATCH 143/290] use a fixed world for code loading (#49525) --- base/Base.jl | 1 + base/loading.jl | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/base/Base.jl b/base/Base.jl index 8c5b8e13d3fb5..1a677bf508977 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -597,6 +597,7 @@ function __init__() ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle) errormonitor(Threads.@spawn(profile_printing_listener())) end + _require_world_age[] = get_world_counter() # Prevent spawned Julia process from getting stuck waiting on Tracy to connect. delete!(ENV, "JULIA_WAIT_FOR_TRACY") nothing diff --git a/base/loading.jl b/base/loading.jl index a7d05e5940ef3..db2a09752a2f1 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1653,6 +1653,8 @@ end # require always works in Main scope and loads files from node 1 const toplevel_load = Ref(true) +const _require_world_age = Ref{UInt}(typemax(UInt)) + """ require(into::Module, module::Symbol) @@ -1675,6 +1677,14 @@ For more details regarding code loading, see the manual sections on [modules](@r [parallel computing](@ref code-availability). """ function require(into::Module, mod::Symbol) + if _require_world_age[] != typemax(UInt) + Base.invoke_in_world(_require_world_age[], __require, into, mod) + else + @invokelatest __require(into, mod) + end +end + +function __require(into::Module, mod::Symbol) @lock require_lock begin LOADING_CACHE[] = LoadingCache() try @@ -1724,6 +1734,14 @@ require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey) const REPL_PKGID = PkgId(UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL") function _require_prelocked(uuidkey::PkgId, env=nothing) + if _require_world_age[] != typemax(UInt) + Base.invoke_in_world(_require_world_age[], __require_prelocked, uuidkey, env) + else + @invokelatest __require_prelocked(uuidkey, env) + end +end + +function __require_prelocked(uuidkey::PkgId, env=nothing) assert_havelock(require_lock) if !root_module_exists(uuidkey) newm = _require(uuidkey, env) From 0919cba14306a90f0ae6dc043c4bf336dc70aab5 Mon Sep 17 00:00:00 2001 From: Chris Hodapp Date: Fri, 9 Jun 2023 16:30:51 -0400 Subject: [PATCH 144/290] Fix typo in calling-c-and-fortran-code.md (#50121) --- doc/src/manual/calling-c-and-fortran-code.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md index eab901adc2043..7b889589c592d 100644 --- a/doc/src/manual/calling-c-and-fortran-code.md +++ b/doc/src/manual/calling-c-and-fortran-code.md @@ -1000,7 +1000,7 @@ A table of translations between the macro and function interfaces is given below ## [Calling Convention](@id calling-convention) -The second argument to `ccall` (immediatel preceding return type) can optionally +The second argument to `ccall` (immediately preceding return type) can optionally be a calling convention specifier (the `@ccall` macro currently does not support giving a calling convention). Without any specifier, the platform-default C calling convention is used. Other supported conventions are: `stdcall`, `cdecl`, From a31c8ef47d446f071a7c48f332733e67e32a4dfe Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Sat, 10 Jun 2023 13:12:00 +0900 Subject: [PATCH 145/290] inlining: fix the `joint_effects` calculation (#50117) There are cases when `joint_effects` already has `:nothrow` tainted while the matches are fully covered. --- base/compiler/abstractinterpretation.jl | 4 +--- base/compiler/ssair/inlining.jl | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 368a4054fbc46..de8fd549400ef 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2603,11 +2603,9 @@ function abstract_eval_globalref(interp::AbstractInterpreter, g::GlobalRef, sv:: nothrow = false if isa(rt, Const) consistent = ALWAYS_TRUE + nothrow = true if is_mutation_free_argtype(rt) inaccessiblememonly = ALWAYS_TRUE - nothrow = true - else - nothrow = true end elseif isdefined_globalref(g) nothrow = true diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index aebcc7394e309..17df27bd5f637 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -1406,7 +1406,7 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt8, sig: fully_covered &= split_fully_covered end - joint_effects = Effects(joint_effects; nothrow=fully_covered) + fully_covered || (joint_effects = Effects(joint_effects; nothrow=false)) if handled_all_cases && revisit_idx !== nothing # we handled everything except one match with unmatched sparams, From c58e508fddba24a7508b3018aa7f413362b384ca Mon Sep 17 00:00:00 2001 From: Martin Kunz Date: Sat, 10 Jun 2023 18:12:48 +0200 Subject: [PATCH 146/290] fix(docs): typo in code-loading (#50123) --- doc/src/manual/code-loading.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md index d3806ee180f32..743ee83c333a4 100644 --- a/doc/src/manual/code-loading.md +++ b/doc/src/manual/code-loading.md @@ -370,7 +370,7 @@ FooExt = "ExtDep" ... ``` -The keys under `extensions` are the name of the extensions. +The keys under `extensions` are the names of the extensions. They are loaded when all the packages on the right hand side (the extension dependencies) of that extension are loaded. If an extension only has one extension dependency the list of extension dependencies can be written as just a string for brevity. The location for the entry point of the extension is either in `ext/FooExt.jl` or `ext/FooExt/FooExt.jl` for From a81398f2d1be2a12c685dbb840adb0e639f45f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Sun, 11 Jun 2023 21:47:32 +0100 Subject: [PATCH 147/290] [CompilerSupportLibraries_jll] Upgrade to v1.0.5 (#50135) This build includes a fix from GCC 12.2 for a bug in libgomp 12.1 which was causing frequent crashes on Windows in third-party libraries. --- deps/checksums/compilersupportlibraries | 184 +++++++++--------- .../CompilerSupportLibraries_jll/Project.toml | 2 +- 2 files changed, 93 insertions(+), 93 deletions(-) diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries index 098c181ca5c87..4830109bd7aea 100644 --- a/deps/checksums/compilersupportlibraries +++ b/deps/checksums/compilersupportlibraries @@ -1,92 +1,92 @@ -CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 -CompilerSupportLibraries.v1.0.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a -CompilerSupportLibraries.v1.0.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 -CompilerSupportLibraries.v1.0.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 -CompilerSupportLibraries.v1.0.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 -CompilerSupportLibraries.v1.0.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/756718e5eaa4547b874a71a8e3545492 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/c21c1be10ca8810f56e435b3629e2ab0678926ea9c4f4c3dd003f9e292c075493b83df04401d3bcf7738f1a44098f674f9b01bba9db4b9a9e45ad7af3497444e -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/65ce0024bf8fe3276addbf185ed03e48 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/5e8105a12ab04e2949e41eda50a060dea04ccd98660c7528cfc86e120fe61cca8bab878fd2c92a3858f02ac3f3c55d0e48789907e5fbd2392a8e84b183ed4636 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/b7727324d550f637209db795238c46a4 -CompilerSupportLibraries.v1.0.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/864b1db2642e68665b9d3322563c7ce964835d0e720325ea00b193e2cbf6791760e0014710e2a79876165ab0daffa6d53d61b87a5034f956ba6e255b0144652c -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa -CompilerSupportLibraries.v1.0.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d -CompilerSupportLibraries.v1.0.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc -CompilerSupportLibraries.v1.0.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 -CompilerSupportLibraries.v1.0.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/2338f8aa2696935f7460454e708ce308 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/5a4b0e97928c26eee16bbec4c3e69e55fa9c768101257c3e2f161118809c778aa0feaf21307198822c3172a58ed12ca0a49285b2941ed0b8f2b367e64ca1c51a -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/b393d2bf0d181d218130ac572c17d369 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/76e0f7caa24bb734c6f7542be9f834d5b912f082cb3c4c3c52a63e37d4b8c33dd94e576c43f4bee6c04bfb44af2f2b67ba70773fa52ad0de6c8c0059b3e51b83 -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/23db836e6e4142f621862971017fe61e -CompilerSupportLibraries.v1.0.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/c0b04f7fe5aabfe6af509c77a1f68e0bcfd14714758042fe502b968c4cc272156fc84c8b4c1ee574754bb2fddaa810f6a4215cbd164ddc11b697b3adaef09a81 +CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/20ebaad57850393b6ac9fa924e511fe4 +CompilerSupportLibraries.v1.0.5+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/020de4d8b0ff6bedbadaa305ff8445e6849f12053762ea4aa68412d1ec763dbd86f479587a2fbb862487f1feb04d976c38099ddf3887817a3d32b3f029cf85b1 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/3908fa1a2f739b330e787468c9bfb5c8 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/1741e3403ac7aa99e7cfd9a01222c4153ed300f47cc1b347e1af1a6cd07a82caaa54b9cfbebae8751440420551621cc6524504413446d104f9493dff2c081853 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/2444dbb7637b32cf543675cc12330878 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/8537f0b243df8544350c884021b21c585fd302e8dd462a30a6ee84c7a36a049133262e5d1bc362f972066b8e8d6a091c32c3b746bab1feb9fccf2e7cca65756c +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/d79c1434594c0c5e7d6be798bf52c99e +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/7e71accc401a45b51b298702fb4c79a2fc856c7b28f0935f6ad3a0db5381c55fe5432daff371842930d718024b7c6c1d80e2bd09d397145203673bebbe3496ae +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/f212059053d99558a9b0bf54b20180e1 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/5c104b1282cec8a944e5d008f44a4d60f4394fd5d797fec7d1f487d13e7328cd9c88ec4916dabf18596d87160756bda914e4f8c5a356b5577f9349d0d9e976d6 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/3e3b3795ee93ef317223050e803a9875 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/85d3c955e15f66bfe8bfec2f28c9160bc03d4d531ea4ffe6bc6b51e0d69ccea3ab67a16ca752dabc870861c407381c4519d75c6be3832e8dccd6122ec8c6ed75 +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/cf2d1315f6a348af2e6c065e2a286e7a +CompilerSupportLibraries.v1.0.5+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/58420377bc77aa7678034ee5f708eb6be7db359faef2c2638869765453633da9bf455512bd88e95b38ae0428ecc4053561517b176b2371129bdaef9d8d5dadfd +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.5+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/f5c09ed7e0eeb8d345d328f950582f26 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c657f55c8fcdeb404be168a3a63a5e84304730fe34f25673d92cdae4b0a1fcc6a877ee1433f060e1be854c7811d66632e32510a2ed591d88330f1340b9c20de +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/c685518aca4721cd8621d510e2039683 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/b760468c6377dcd2b8dd50200daaabe604006afc070984d78152b2becd0680b59036c9a6e91dea490121bd85b58d285bfc1e1cf696d29af236528400101de36c +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/8faf5c8ad62ab10f71dd2ec9683053e2 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/921239f241a5c89710cf07272d7f6c3f10201a7533068ed1e9643f9fb2f439e1bb765a4966d913829866ee0ce4f1589d30d06e4b5c1361e3c016a9473f087177 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/b38fcb70691ac2621379d298eef8c79e +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/06c7f64257ce721f5941f6e50a0d2717cdc9394fc532ded19ce3eaacd5e92a416969534227562e4fee04d2b6340c650d8bc9779e14519b90038bc41e8d1f5ce3 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/cdfab2c7bc41765caf4441c3caeed761 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/7109d4a7b32c00309c42685f54a86fc2cc63c0c00f65584ad296b6e44ad3320eed1aaf49684a8831841cdffa5555d72f89272fb722a780596e27ef020528026b +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/441980ebd23d72772cbe603f1c275336 +CompilerSupportLibraries.v1.0.5+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/e273d9f1af259a3080df8f173e1808a1ade976a943aba97216bf59a96178e7c052e7a048b0ceee53ab486ed577a2ecb92579857be2f7b29e76322ee1f13c9d76 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/md5/6decf8fd5afb50451771c761e63a8917 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/4984724bcc847724b1bc005b6f760a18b68147f7d5402d0faf4e28fc0d14fa10975368a951f9caf2a8856500046dec8343043274557d58269e77492b929a9e4b +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/md5/39d1e8a3baa144c018d3eaf7f3806482 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/fc4d429279c5a93b6c28b6e911b1e7cfd1c1cfe46f11f2e901b3832ce90d45f49d3d29f0ef18518a94af6cc8651f67c4ed81672680f9281ada390440b172a2af +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/md5/37dabd9cd224c9fed9633dedccb6c565 +CompilerSupportLibraries.v1.0.5+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/b253149e72eef9486888fbaace66e9b6945f4477f6b818f64f3047331165b0e2bc17aa6e3fc8c88686a72e478eb62c8f53883415d5419db448d8016fa3a1da5e +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/md5/afdd32bfadd465848e6be458817a44ae +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran3.tar.gz/sha512/eebd679c499143014514c7c9d1875dedbbab9e3af51526c4dd445a9e3dbade95d24522da8bbad0a50ab400755e47b018828b324c4ad7705e212ccd990e34439a +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/md5/bc4a0f0b7cea328f7e8850583774496b +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran4.tar.gz/sha512/82285b67946212b49cddf6259f2c60ff5469f8c5263ccefe44f1d93ace98ab68e2c152e1b54434b2f075fd8d192c06d5451bc8cca26d951ad15f3453102f02b5 +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/md5/177f0232abce8d523882530ed7a93092 +CompilerSupportLibraries.v1.0.5+0.i686-linux-musl-libgfortran5.tar.gz/sha512/db80acf0f2434f28ee7680e1beb34f564940071815d1ad89fb5913cbd9ac24da528e826d0d54be6265a7340ebd661b6d308ed79d96b67fa5d8c98dc3f1bee8d6 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f5795dada5360eb8422f45150b13bae9 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/6acd1bf7c81631cef9b8b0576ccece08723c5ae2f49de2487d3aefd25f9a0ad49df09e3782735267997d40687b04b85c89e00f6889b026af599bf1bbe91803a1 +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/5e590f83161913f0145ba8d496b2504b +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/4a3f36588afcdef26173764597054068e26f2376e6126a9a94c46b258b5d7a29951d47b5e1ba24df6c3d139bbc4decc5c501a266811692d7fadadc7bd7b6960d +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/27da4a7c890fe1427c33fe214cc5feaf +CompilerSupportLibraries.v1.0.5+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/310ad00f053f9f3ec715ce2e8d20446f397728dff5acc787ea9c9332346607a3d42b678099c424e6d6e5294acddf2aa26051de657b48d34abfd04486951bf241 +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/4e5e4b23dc87450738da33926a07511d +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fc09879d94b750e75775d8b64a41ab9924d675fb53c5700467604412928fe7f5cb21911da0f64898d2463fa77ffbaf4c96c397b9060f4746eec152747930cddc +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/9a92138ed69aa317a932a615c6e62d69 +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/0b7785379936a2a209b074177b1424dd7e00b29b5165f564e799b0aa4e06a582e9d616525d97274ba2507cb88192028f1ac485d3f99bdc7ee53fc63c1a7e85de +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8ffee3d6de5197c7a1f354d72c8238fa +CompilerSupportLibraries.v1.0.5+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/deadc4d7224c84f9b82dc956b69e815c44ae036802838365d870ab9f58c8bcf8ce0645f2f387c8ff344ac2108fc8e7e1ee907fa55e93c91aa5d9fd921bf3fdcb +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/87449e72e3f33dbb69b7053cdc2649d4 +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/5ce02ad10c6f4686a476eb2a5de2988cd8b482f5e693db2880c84ad1c82f468ef03fe01b9d0feefe5d4ee741d1d16643d36b144e6261ed32311b3b6f312fac2f +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/0407cde92cfa42fa89ac83217ca0ec16 +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/032c831f1166a336551138939ac40eb2c68a048ce786c0c1403b879a20c1b706caac16d22560b2c7f2b3d6373986c347188675674116005ca251336ee048d09f +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/23418763b808371ee94772a90d501f4d +CompilerSupportLibraries.v1.0.5+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/7867b843551457b11bda7821dd384c1c1cf23b80a308b2058a693de7b7da099f0b37eb0a6de2b84c04b625a68c60eea55138e200d5d6ec6f6af09bd7ce406a96 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/e3d33ae03c18affea74699bdc1fabb68 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42013f4921de5a69ad857195ce5c19ad1bca3c920d79699e5501f1f4534ab132fabd422362b2b5056f5d182215d6c069db5df460bafa700903faf962cc00f77b +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/d40c1e8c0393213c6057c53a12f44175 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/fe7baa4de7490065ab7b953cc12f41462a24bcb49d0a4a64b23249e98e7569b19bb1cb455af2f76090e34066a7d3cdd7a48cae6515ce6c7a5c8486b0cacc5106 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/48541b90f715c4c86ee4da0570275947 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/7f2683fb98e80f12629f4ed3bea9fd59d32b7e7a9ed1699e782d8e238ff0915ecc61bf00adaf4597cfe41caf82cdca0f9be250f595f5f0bea6d8f77dba99eaf4 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/4547059eb905995667be48bf85d49911 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/7400fdabc924434ab4a4949248c3603887ac06ffd2f205ae33e14495d86cd4f816bbd1999eeafa0257f518df1e7f7c522f596e847a71dbfbfccff4859f50acc7 +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/46267543cad6584d7b7b9fcc8f18f21d +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0353d7d724be48d4185d3c181692970b7996f53f6a01723072aa5c94b53a8c5055faeed30df51659c252a46f4b941dec0cb24569323e3c85c166f14c5b7c8e9e +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/14dba2897a6e9d370fa9091c045375fc +CompilerSupportLibraries.v1.0.5+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/10b79f9c059839f5b57fa8d2a381a034c4067262c4088bd354d14ea56bec097878069383aa9cfadaa09d73bd20fc348fb61662d863a8d62cb25d7af6b8e29858 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eed836d1addeb10d0901f836724aff1e +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/e33eca424d1529a1fb23ba9cf7fac345ed1cfc8073c975b6b31ca44d2e8c3f5083af65433df009b22483dceb2e43149f3c1e8433681fec5fb812e1d5b4243ce4 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/d5ae9f9519341fdaabf62267c89461d2 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/6421aa5d1bd6f08ad43f59ed4dc1bef8b9b598ebbbd3e48149730f3bec3471f8e2c02ffb338427326924290b8f52ef9e626e3313448bc931a61d866c5dc544ae +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/fc1df521395362a5aaa2e2aeef707207 +CompilerSupportLibraries.v1.0.5+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/f2e5a08e3cae171242ae6a20d2d4838c1529ce042745dc466148b7bbc06896d94476fd05c7787e6e8641bea752dfc0e6b09e95b160bede600d20d2ad68e7705f +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/0c2fc6fae4ebe293a7f0dc1e91f6531a +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/fdb0ad061cacad0557fde3ec216fd3666284f24ad6a86f4a4b6f946dccb112c9704f52edba86f3b17d84c824affbcfef740720348ef227380cf6017811bda80b +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/005e608dbef2b5cdb7624702ccc426be +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/8bb2bcd0a6b1901e8a9be20f505bead5c78ecafbe5a8271cd13385553e5744e0c7bff62976ac9e7d74b8f3bd467603d4c0f5658e6b120bb23066c15e0a644ed4 +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/d6c2c7ad72bff7f7e5c43678d716a57a +CompilerSupportLibraries.v1.0.5+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/36f5eba1b0be440797467cb7104652b74709913d2bad1b08ee2dc70f450fb8eab81b28f2b0bc8dfc238b3c46982c69aac831b4fad5bcee4e9dd114852fcb4a0b diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml index fc5883cc79802..4c7aa35a99730 100644 --- a/stdlib/CompilerSupportLibraries_jll/Project.toml +++ b/stdlib/CompilerSupportLibraries_jll/Project.toml @@ -4,7 +4,7 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" # NOTE: When updating this, also make sure to update the value # `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable # automatic usage of BB-built CSLs on extremely up-to-date systems! -version = "1.0.2+0" +version = "1.0.5+0" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" From 75bda64231eb1de7d09b14fe5e3b009caac8b421 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Mon, 12 Jun 2023 03:13:48 -0400 Subject: [PATCH 148/290] speed up expansion and lowering of ccall macro (#50077) --- base/c.jl | 33 +++++++-------------------------- src/julia-syntax.scm | 26 +++++++++++++++++--------- test/ccall.jl | 32 +++----------------------------- 3 files changed, 27 insertions(+), 64 deletions(-) diff --git a/base/c.jl b/base/c.jl index d94447650b9fb..662986501d59d 100644 --- a/base/c.jl +++ b/base/c.jl @@ -640,13 +640,11 @@ end function ccall_macro_lower(convention, func, rettype, types, args, nreq) - lowering = [] - realargs = [] - gcroots = [] + statements = [] - # if interpolation was used, ensure variable is a function pointer at runtime. + # if interpolation was used, ensure the value is a function pointer at runtime. if Meta.isexpr(func, :$) - push!(lowering, Expr(:(=), :func, esc(func.args[1]))) + push!(statements, Expr(:(=), :func, esc(func.args[1]))) name = QuoteNode(func.args[1]) func = :func check = quote @@ -655,31 +653,14 @@ function ccall_macro_lower(convention, func, rettype, types, args, nreq) throw(ArgumentError("interpolated function `$name` was not a Ptr{Cvoid}, but $(typeof(func))")) end end - push!(lowering, check) + push!(statements, check) else func = esc(func) end - for (i, (arg, type)) in enumerate(zip(args, types)) - sym = Symbol(string("arg", i, "root")) - sym2 = Symbol(string("arg", i, )) - earg, etype = esc(arg), esc(type) - push!(lowering, :(local $sym = $(GlobalRef(Base, :cconvert))($etype, $earg))) - push!(lowering, :(local $sym2 = $(GlobalRef(Base, :unsafe_convert))($etype, $sym))) - push!(realargs, sym2) - push!(gcroots, sym) - end - etypes = Expr(:call, Expr(:core, :svec), types...) - exp = Expr(:foreigncall, - func, - esc(rettype), - esc(etypes), - nreq, - QuoteNode(convention), - realargs..., gcroots...) - push!(lowering, exp) - - return Expr(:block, lowering...) + return Expr(:block, statements..., + Expr(:call, :ccall, func, Expr(:cconv, convention, nreq), esc(rettype), + Expr(:tuple, map(esc, types)...), map(esc, args)...)) end """ diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index c764577a6c89a..dd231ad13172f 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -1097,7 +1097,7 @@ ;; insert calls to convert() in ccall, and pull out expressions that might ;; need to be rooted before conversion. -(define (lower-ccall name RT atypes args cconv) +(define (lower-ccall name RT atypes args cconv nreq) (let loop ((F atypes) ;; formals (A args) ;; actuals (stmts '()) ;; initializers @@ -1114,13 +1114,15 @@ (if (null? A) `(block ,.(reverse! stmts) - (foreigncall ,name ,RT (call (core svec) ,@(reverse! T)) - ,(if isseq (- (length atypes) 1) 0) ; 0 or number of arguments before ... in definition + (foreigncall ,(expand-forms name) ,(expand-forms RT) (call (core svec) ,@(reverse! T)) + ;; 0 or number of arguments before ... in definition + ,(or nreq + (if isseq (- (length atypes) 1) 0)) ',cconv ,.(reverse! C) ,@GC)) ; GC root ordering is arbitrary - (let* ((a (car A)) - (ty (if isseq (cadar F) (car F)))) + (let* ((a (expand-forms (car A))) + (ty (expand-forms (if isseq (cadar F) (car F))))) (if (and isseq (not (null? (cdr F)))) (error "only the trailing ccall argument type should have \"...\"")) (if (eq? ty 'Any) (loop (if isseq F (cdr F)) (cdr A) stmts (list* '(core Any) T) (list* a C) GC) @@ -2616,7 +2618,9 @@ ((eq? f 'ccall) (if (not (length> e 4)) (error "too few arguments to ccall")) (let* ((cconv (cadddr e)) - (have-cconv (memq cconv '(cdecl stdcall fastcall thiscall llvmcall))) + (have-cconv-expr (and (pair? cconv) (eq? (car cconv) 'cconv))) + (have-cconv (or have-cconv-expr + (memq cconv '(cdecl stdcall fastcall thiscall llvmcall)))) (after-cconv (if have-cconv (cddddr e) (cdddr e))) (name (caddr e)) (RT (car after-cconv)) @@ -2629,9 +2633,13 @@ (eq? (car RT) 'tuple)) (error "ccall argument types must be a tuple; try \"(T,)\" and check if you specified a correct return type") (error "ccall argument types must be a tuple; try \"(T,)\""))) - (expand-forms - (lower-ccall name RT (cdr argtypes) args - (if have-cconv cconv 'ccall)))))) + (lower-ccall name RT (cdr argtypes) args + (if have-cconv + (if have-cconv-expr + (cadr cconv) + cconv) + 'ccall) + (and have-cconv-expr (caddr cconv)))))) ((any kwarg? (cddr e)) ;; f(..., a=b, ...) (expand-forms (lower-kw-call f (cddr e)))) ((has-parameters? (cddr e)) ;; f(...; ...) diff --git a/test/ccall.jl b/test/ccall.jl index 0266dabd6332b..7e166ddbd9041 100644 --- a/test/ccall.jl +++ b/test/ccall.jl @@ -1757,37 +1757,11 @@ end )::Cstring))...) @test call == Base.remove_linenums!( quote - local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, :str))) - local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root) - local arg2root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num1))) - local arg2 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg2root) - local arg3root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cint)), $(Expr(:escape, :num2))) - local arg3 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cint)), arg3root) - $(Expr(:foreigncall, - :($(Expr(:escape, :((:func, libstring))))), - :($(Expr(:escape, :Cstring))), - :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring, Cint, Cint))))), - 0, - :(:ccall), - :arg1, :arg2, :arg3, :arg1root, :arg2root, :arg3root)) + ccall($(Expr(:escape, :((:func, libstring)))), $(Expr(:cconv, :ccall, 0)), $(Expr(:escape, :Cstring)), ($(Expr(:escape, :Cstring)), $(Expr(:escape, :Cint)), $(Expr(:escape, :Cint))), $(Expr(:escape, :str)), $(Expr(:escape, :num1)), $(Expr(:escape, :num2))) end) - # pointer interpolation - call = ccall_macro_lower(:ccall, ccall_macro_parse(:( $(Expr(:$, :fptr))("bar"::Cstring)::Cvoid ))...) - @test Base.remove_linenums!(call) == Base.remove_linenums!( - quote - func = $(Expr(:escape, :fptr)) - begin - if !(func isa Ptr{Cvoid}) - name = :fptr - throw(ArgumentError("interpolated function `$(name)` was not a Ptr{Cvoid}, but $(typeof(func))")) - end - end - local arg1root = $(GlobalRef(Base, :cconvert))($(Expr(:escape, :Cstring)), $(Expr(:escape, "bar"))) - local arg1 = $(GlobalRef(Base, :unsafe_convert))($(Expr(:escape, :Cstring)), arg1root) - $(Expr(:foreigncall, :func, :($(Expr(:escape, :Cvoid))), :($(Expr(:escape, :(($(Expr(:core, :svec)))(Cstring))))), 0, :(:ccall), :arg1, :arg1root)) - end) - + local fptr = :x + @test_throws ArgumentError("interpolated function `fptr` was not a Ptr{Cvoid}, but Symbol") @ccall $fptr()::Cvoid end @testset "check error paths" begin From d69b1a228b4e0ac834c47a674725f0b08bd2da0e Mon Sep 17 00:00:00 2001 From: Christopher Rackauckas Date: Mon, 12 Jun 2023 10:26:22 -0400 Subject: [PATCH 149/290] Add check call to getrf! (#50134) * Add check call to getrf! `lu!(A; check=false)` is supposed to disable the checking and leave it to the user: > When check = true, an error is thrown if the decomposition fails. When check = false, responsibility for checking the decomposition's validity (via issuccess) lies with the user. However, this is not quite true since `lu!` calls `getrf!` which internally does a check for `chkfinite` which does throw an error. This updates the `getrf!` function to have a `check` argument which is then used by `lu!` to fully disable the error throwing checks. * Update lapack.jl --- stdlib/LinearAlgebra/src/lapack.jl | 4 ++-- stdlib/LinearAlgebra/src/lu.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl index 066a858cacb30..6353f9fa8d266 100644 --- a/stdlib/LinearAlgebra/src/lapack.jl +++ b/stdlib/LinearAlgebra/src/lapack.jl @@ -554,9 +554,9 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty # * .. Array Arguments .. # INTEGER IPIV( * ) # DOUBLE PRECISION A( LDA, * ) - function getrf!(A::AbstractMatrix{$elty}) + function getrf!(A::AbstractMatrix{$elty}; check = true) require_one_based_indexing(A) - chkfinite(A) + check && chkfinite(A) chkstride1(A) m, n = size(A) lda = max(1,stride(A, 2)) diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl index a93803ca2ea45..5d69090f27e44 100644 --- a/stdlib/LinearAlgebra/src/lu.jl +++ b/stdlib/LinearAlgebra/src/lu.jl @@ -79,7 +79,7 @@ transpose(F::LU{<:Real}) = TransposeFactorization(F) # the following method is meant to catch calls to lu!(A::LAPACKArray) without a pivoting stategy lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check) function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat} - lpt = LAPACK.getrf!(A) + lpt = LAPACK.getrf!(A; check) check && checknonsingular(lpt[3]) return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3]) end From bf9bbb2a1db284ee133f5d2e506a73b4f47cf80c Mon Sep 17 00:00:00 2001 From: Sheehan Olver Date: Mon, 12 Jun 2023 21:07:25 +0200 Subject: [PATCH 150/290] Add doc string for reflectorApply! (#50081) --- stdlib/LinearAlgebra/src/generic.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl index c66f59838e8ba..9cbe3f76ccfb9 100644 --- a/stdlib/LinearAlgebra/src/generic.jl +++ b/stdlib/LinearAlgebra/src/generic.jl @@ -1591,7 +1591,11 @@ end ξ1/ν end -# apply reflector from left +""" + reflectorApply!(x, τ, A) + +Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`. +""" @inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat) require_one_based_indexing(x) m, n = size(A, 1), size(A, 2) From 970941cce8989acfc98303087c11be02c9b606bf Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 13 Jun 2023 07:38:40 +0900 Subject: [PATCH 151/290] effects: allow concrete-eval when `--check-bounds=no` if proven "safe" (#50107) From version 1.9 onwards, when `--check-bounds=no` is used, concrete-eval is completely disabled. However, it appears `--check-bounds=no` is still being used within the community, causing issues like the one reported in JuliaArrays/StaticArrays.jl#1155. Although we should move forward to a direction of eliminating the flag in the future (#48245), for the time being, there are many requests to carry out a certain level of compiler optimization, even when this flag is enabled. This commit aims to allow concrete-eval "safely" even under `--check-bounds=no`. Specifically, when the method call being analyzed is `:nothrow`, it should be predominantly safe to concrete-eval it under this flag. Technically, however, even `:nothrow` methods could trigger undefined behavior, since `:nothrow` isn't a strict constraint and it's possible for users to annotate potentially risky methods with `Base.@assume_effects :nothrow`. Nonetheless, since this possibility is acknowledged in `Base.@assume_effects` documentation, I feel it's fair to relegate it to user responsibility. --- base/compiler/abstractinterpretation.jl | 20 +++++++++++++------- test/boundscheck_exec.jl | 6 +++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index de8fd549400ef..5fe0014ef3e60 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -834,21 +834,27 @@ end function concrete_eval_eligible(interp::AbstractInterpreter, @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::AbsIntState) + (;effects) = result if inbounds_option() === :off - # Disable concrete evaluation in `--check-bounds=no` mode, since we cannot be sure - # that inferred effects are accurate. - return :none - elseif !result.effects.noinbounds && stmt_taints_inbounds_consistency(sv) + if !is_nothrow(effects) + # Disable concrete evaluation in `--check-bounds=no` mode, + # unless it is known to not throw. + return :none + end + end + if !effects.noinbounds && stmt_taints_inbounds_consistency(sv) # If the current statement is @inbounds or we propagate inbounds, the call's consistency # is tainted and not consteval eligible. add_remark!(interp, sv, "[constprop] Concrete evel disabled for inbounds") return :none - elseif isoverlayed(method_table(interp)) && !is_nonoverlayed(result.effects) - # disable all concrete-evaluation if this function call is tainted by some overlayed + end + if isoverlayed(method_table(interp)) && !is_nonoverlayed(effects) + # disable concrete-evaluation if this function call is tainted by some overlayed # method since currently there is no direct way to execute overlayed methods + add_remark!(interp, sv, "[constprop] Concrete evel disabled for overlayed methods") return :none end - if result.edge !== nothing && is_foldable(result.effects) + if result.edge !== nothing && is_foldable(effects) if f !== nothing && is_all_const_arg(arginfo, #=start=#2) return :concrete_eval elseif !any_conditional(arginfo) diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl index 403014c94ed0d..f2eb2ea630893 100644 --- a/test/boundscheck_exec.jl +++ b/test/boundscheck_exec.jl @@ -282,7 +282,6 @@ begin # Pass inbounds meta to getindex on CartesianIndices (#42115) end end - # Test that --check-bounds=off doesn't permit const prop of indices into # function that are not dynamically reachable (the same test for @inbounds # is in the compiler tests). @@ -294,4 +293,9 @@ function f_boundscheck_elim(n) end @test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2] +# https://github.com/JuliaArrays/StaticArrays.jl/issues/1155 +@test Base.return_types() do + typeintersect(Int, Integer) +end |> only === Type{Int} + end From c3cd3ebd0f652e59efb0b82d67a7109eb09a8efb Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 13 Jun 2023 01:23:20 +0000 Subject: [PATCH 152/290] Fix refcount violation for pending_nodes These nodes get passed through renaming, but with SSAValues already renamed. Due to a long-standing bug, we were failing to refcount those SSAValues on insertion. Fix that bug. --- base/compiler/ssair/ir.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index c5415add51cc5..debad8bfb0d66 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1122,10 +1122,10 @@ end function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool) id = val.id - if id > length(ssanums) - return val - end if do_rename_ssa + if id > length(ssanums) + return val + end val = ssanums[id] end if isa(val, SSAValue) From 0083a04ab8a59536e079c622b41c0a089f7e1829 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 13 Jun 2023 01:24:42 +0000 Subject: [PATCH 153/290] Fix refcount violations in new ifelse sroa lifting Fixes a refcount violation in #49882. Unlike PhiNodes, the code for ifelse was referencing the old arguments in the newly inserted nodes, so they were getting counted. However, it did not update the count when replacing these arguments later. Fix this by aligning with PhiNode and leaving the arguments unset, pushing them in later. --- base/compiler/ssair/passes.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 3ac11b7bd232a..518a6512fc166 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -720,12 +720,12 @@ function perform_lifting!(compact::IncrementalCompact, lifted_philikes[i] = LiftedPhilike(ssa, new_node, true) else @assert is_known_call(old_node, Core.ifelse, compact) - ifelse_func, condition, then_result, else_result = old_node.args + ifelse_func, condition = old_node.args if is_old(compact, old_ssa) && isa(condition, SSAValue) condition = OldSSAValue(condition.id) end - new_node = Expr(:call, ifelse_func, condition, then_result, else_result) + new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag]) ssa = insert_node!(compact, old_ssa, new_inst) @@ -757,20 +757,24 @@ function perform_lifting!(compact::IncrementalCompact, end end elseif isa(lfnode, IfElseCall) - then_result, else_result = lfnode.call.args[3], lfnode.call.args[4] + old_node = compact[old_node_ssa][:inst]::Expr + then_result, else_result = old_node.args[3], old_node.args[4] then_result = lifted_value(compact, old_node_ssa, then_result, lifted_philikes, lifted_leaves, reverse_mapping) else_result = lifted_value(compact, old_node_ssa, else_result, lifted_philikes, lifted_leaves, reverse_mapping) - should_count && _count_added_node!(compact, then_result) - should_count && _count_added_node!(compact, else_result) - @assert then_result !== SKIP_TOKEN && then_result !== UNDEF_TOKEN @assert else_result !== SKIP_TOKEN && else_result !== UNDEF_TOKEN - lfnode.call.args[3], lfnode.call.args[4] = then_result, else_result + if should_count + _count_added_node!(compact, then_result) + _count_added_node!(compact, else_result) + end + + push!(lfnode.call.args, then_result) + push!(lfnode.call.args, else_result) end end From 7cd7a1b1b8d6af13047b3bd002f9d8e42c76d3dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Tue, 13 Jun 2023 07:54:21 +0100 Subject: [PATCH 154/290] Enable JITLink in aarch64 linux. (#49745) * Enable JITLink in aarch64 linux. * Simplify logic to enable JITLink * Do not enable JITLink on Aarch64 Linux with LLVM < 15 * Add NEWS entry and mention JITLink memory bug in ARM devdocs --------- Co-authored-by: Sunho Kim --- NEWS.md | 2 ++ doc/src/devdocs/build/arm.md | 10 ++++++++++ src/jitlayers.cpp | 2 +- src/jitlayers.h | 12 ++++++++++-- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2e1fa8c102461..6c60b56b7a028 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,8 @@ Compiler/Runtime improvements * The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]). * The mark phase of the Garbage Collector is now multi-threaded ([#48600]). +* [JITLink](https://llvm.org/docs/JITLink.html) is enabled by default on Linux aarch64 when Julia is linked to LLVM 15 or later versions ([#49745]). + This should resolve many segmentation faults previously observed on this platform. Command-line option changes --------------------------- diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md index 9268da32d9c26..747ee25d22a04 100644 --- a/doc/src/devdocs/build/arm.md +++ b/doc/src/devdocs/build/arm.md @@ -68,6 +68,16 @@ Compilation on `ARMv8-A` requires that `Make.user` is configured as follows: MCPU=armv8-a ``` +Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions. +Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like +``` +JIT session error: Cannot allocate memory +``` +Should this happen, ask your system administrator to increase the limit of memory mappings for example with the command +``` +sysctl -w vm.max_map_count=262144 +``` + ### nVidia Jetson TX2 Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index c613180522ecc..925638af5fa53 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -807,7 +807,7 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { PassConfig.PostAllocationPasses.push_back([&Info, this](jitlink::LinkGraph &G) -> Error { std::lock_guard lock(PluginMutex); for (const jitlink::Section &Sec : G.sections()) { -#ifdef _OS_DARWIN_ +#if defined(_OS_DARWIN_) // Canonical JITLink section names have the segment name included, e.g. // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal // sections without a comma separator, which we can just ignore. diff --git a/src/jitlayers.h b/src/jitlayers.h index 6d864640a2e24..344cba8c0f453 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -41,7 +41,7 @@ // However, JITLink is a relatively young library and lags behind in platform // and feature support (e.g. Windows, JITEventListeners for various profilers, // etc.). Thus, we currently only use JITLink where absolutely required, that is, -// for Mac/aarch64. +// for Mac/aarch64 and Linux/aarch64. // #define JL_FORCE_JITLINK #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) @@ -49,8 +49,16 @@ #endif // The sanitizers don't play well with our memory manager -#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER) +#if defined(JL_FORCE_JITLINK) || JL_LLVM_VERSION >= 150000 && defined(HAS_SANITIZER) # define JL_USE_JITLINK +#else +# if defined(_CPU_AARCH64_) +# if defined(_OS_LINUX_) && JL_LLVM_VERSION < 150000 +# pragma message("On aarch64-gnu-linux, LLVM version >= 15 is required for JITLink; fallback suffers from occasional segfaults") +# else +# define JL_USE_JITLINK +# endif +# endif #endif #ifdef JL_USE_JITLINK From f007c0152cd61b3c7af62d663b8cb894b11cc3a0 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 13 Jun 2023 19:46:51 +0900 Subject: [PATCH 155/290] fix `array_builtin_common_nothrow` for `arrayref` (#50152) --- base/compiler/tfuncs.jl | 9 +++++---- test/compiler/effects.jl | 26 ++++++++++++++++---------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index f10dd03a6a058..79e3cfefc7ff1 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -2041,8 +2041,9 @@ function array_type_undefable(@nospecialize(arytype)) end end -function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int, isarrayref::Bool) - length(argtypes) >= 4 || return false +function array_builtin_common_nothrow(argtypes::Vector{Any}, isarrayref::Bool) + first_idx_idx = isarrayref ? 3 : 4 + length(argtypes) ≥ first_idx_idx || return false boundscheck = argtypes[1] arytype = argtypes[2] array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false @@ -2086,11 +2087,11 @@ end @nospecs function _builtin_nothrow(𝕃::AbstractLattice, f, argtypes::Vector{Any}, rt) ⊑ = Core.Compiler.:⊑(𝕃) if f === arrayset - array_builtin_common_nothrow(argtypes, 4, #=isarrayref=#false) || return false + array_builtin_common_nothrow(argtypes, #=isarrayref=#false) || return false # Additionally check element type compatibility return arrayset_typecheck(argtypes[2], argtypes[3]) elseif f === arrayref || f === const_arrayref - return array_builtin_common_nothrow(argtypes, 3, #=isarrayref=#true) + return array_builtin_common_nothrow(argtypes, #=isarrayref=#true) elseif f === Core._expr length(argtypes) >= 1 || return false return argtypes[1] ⊑ Symbol diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl index f809192d8d1ed..99e788c0cff12 100644 --- a/test/compiler/effects.jl +++ b/test/compiler/effects.jl @@ -760,21 +760,27 @@ end # arrayref # -------- -let effects = Base.infer_effects(Base.arrayref, (Vector{Any},Int)) - @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) - @test Core.Compiler.is_effect_free(effects) - @test !Core.Compiler.is_nothrow(effects) - @test Core.Compiler.is_terminates(effects) +for tt = Any[(Bool,Vector{Any},Int), + (Bool,Matrix{Any},Int,Int)] + @testset let effects = Base.infer_effects(Base.arrayref, tt) + @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) + @test Core.Compiler.is_effect_free(effects) + @test !Core.Compiler.is_nothrow(effects) + @test Core.Compiler.is_terminates(effects) + end end # arrayset # -------- -let effects = Base.infer_effects(Base.arrayset, (Vector{Any},Any,Int)) - @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) - @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects) - @test !Core.Compiler.is_nothrow(effects) - @test Core.Compiler.is_terminates(effects) +for tt = Any[(Bool,Vector{Any},Any,Int), + (Bool,Matrix{Any},Any,Int,Int)] + @testset let effects = Base.infer_effects(Base.arrayset, tt) + @test Core.Compiler.is_consistent_if_inaccessiblememonly(effects) + @test Core.Compiler.is_effect_free_if_inaccessiblememonly(effects) + @test !Core.Compiler.is_nothrow(effects) + @test Core.Compiler.is_terminates(effects) + end end # nothrow for arrayset @test Base.infer_effects((Vector{Int},Int,Int)) do a, v, i From 0d89d8bb219eb36ed0e4b4675c8c457d81784cbe Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Tue, 13 Jun 2023 19:50:10 +0900 Subject: [PATCH 156/290] Test: support multiple assignments for `@testset let` (#50151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nested `ContextTestset` is supported, so we can stack it when there are multiple assignments in a given `let` block. ```julia julia> @testset let logi = log(im), op = !iszero @test imag(logi) == π/2 @test op(real(logi)) end Test Failed at none:3 Expression: !(iszero(real(logi))) Context: logi = 0.0 + 1.5707963267948966im op = !iszero ERROR: There was an error during testing ``` --- stdlib/Test/src/Test.jl | 62 ++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 11bb6229ec0a1..622c696b383a0 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -1351,11 +1351,11 @@ function _check_testset(testsettype, testsetname) end """ - @testset [CustomTestSet] [option=val ...] ["description"] begin ... end - @testset [CustomTestSet] [option=val ...] ["description \$v"] for v in (...) ... end - @testset [CustomTestSet] [option=val ...] ["description \$v, \$w"] for v in (...), w in (...) ... end - @testset [CustomTestSet] [option=val ...] ["description"] foo() - @testset let v = (...) ... end + @testset [CustomTestSet] [options...] ["description"] begin test_ex end + @testset [CustomTestSet] [options...] ["description \$v"] for v in itr test_ex end + @testset [CustomTestSet] [options...] ["description \$v, \$w"] for v in itrv, w in itrw test_ex end + @testset [CustomTestSet] [options...] ["description"] test_func() + @testset let v = v, w = w; test_ex; end # With begin/end or function call @@ -1380,7 +1380,7 @@ accepts three boolean options: This can also be set globally via the env var `JULIA_TEST_FAILFAST`. !!! compat "Julia 1.8" - `@testset foo()` requires at least Julia 1.8. + `@testset test_func()` requires at least Julia 1.8. !!! compat "Julia 1.9" `failfast` requires at least Julia 1.9. @@ -1436,6 +1436,9 @@ parent test set (with the context object appended to any failing tests.) !!! compat "Julia 1.9" `@testset let` requires at least Julia 1.9. +!!! compat "Julia 1.10" + Multiple `let` assignements are supported since Julia 1.10. + ## Examples ```jldoctest julia> @testset let logi = log(im) @@ -1446,6 +1449,17 @@ Test Failed at none:3 Expression: !(iszero(real(logi))) Context: logi = 0.0 + 1.5707963267948966im +ERROR: There was an error during testing + +julia> @testset let logi = log(im), op = !iszero + @test imag(logi) == π/2 + @test op(real(logi)) + end +Test Failed at none:3 + Expression: op(real(logi)) + Context: logi = 0.0 + 1.5707963267948966im + op = !iszero + ERROR: There was an error during testing ``` """ @@ -1477,7 +1491,7 @@ trigger_test_failure_break(@nospecialize(err)) = """ Generate the code for an `@testset` with a `let` argument. """ -function testset_context(args, tests, source) +function testset_context(args, ex, source) desc, testsettype, options = parse_testset_args(args[1:end-1]) if desc !== nothing || testsettype !== nothing # Reserve this syntax if we ever want to allow this, but for now, @@ -1485,22 +1499,38 @@ function testset_context(args, tests, source) error("@testset with a `let` argument cannot be customized") end - assgn = tests.args[1] - if !isa(assgn, Expr) || assgn.head !== :(=) - error("`@testset let` must have exactly one assignment") + let_ex = ex.args[1] + + if Meta.isexpr(let_ex, :(=)) + contexts = Any[let_ex.args[1]] + elseif Meta.isexpr(let_ex, :block) + contexts = Any[] + for assign_ex in let_ex.args + if Meta.isexpr(assign_ex, :(=)) + push!(contexts, assign_ex.args[1]) + else + error("Malformed `let` expression is given") + end + end + else + error("Malformed `let` expression is given") end - assignee = assgn.args[1] + reverse!(contexts) + + test_ex = ex.args[2] - tests.args[2] = quote - $push_testset($(ContextTestSet)($(QuoteNode(assignee)), $assignee; $options...)) + ex.args[2] = quote + $(map(contexts) do context + :($push_testset($(ContextTestSet)($(QuoteNode(context)), $context; $options...))) + end...) try - $(tests.args[2]) + $(test_ex) finally - $pop_testset() + $(map(_->:($pop_testset()), contexts)...) end end - return esc(tests) + return esc(ex) end """ From 97f445eeb13d031231252933fd9da92b8d304308 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Tue, 13 Jun 2023 10:30:57 -0400 Subject: [PATCH 157/290] Update cli/loader_lib.c From Jameson: > apparently on Windows, the _Atomic x syntax means something slightly different and is not correctly implemented Co-authored-by: Jameson Nash --- cli/loader_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/loader_lib.c b/cli/loader_lib.c index 50519db29d811..12feed0c508a0 100644 --- a/cli/loader_lib.c +++ b/cli/loader_lib.c @@ -527,7 +527,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) { } void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static"); void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static"); - _Atomic char *semaphore = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_static_semaphore"); + _Atomic(char) *semaphore = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_static_semaphore"); if (fptr != NULL && key != NULL && semaphore != NULL) { char already_used = 0; atomic_compare_exchange_strong(semaphore, &already_used, 1); From 320e00db00bb95ab5e7a32bf7e00a5346fecb911 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 13 Jun 2023 10:33:57 -0400 Subject: [PATCH 158/290] lowering: try to ignore special metadata nodes in tail position (#49963) A number of packages are relying on lowering removing or ignoring line number nodes that they place in incorrect position in the AST. Accommodate those packages accordingly. Fix #49920 --- src/julia-syntax.scm | 34 +++++++++++++++++++++++++--------- test/syntax.jl | 7 +++++++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index dd231ad13172f..cd11f46b1eb38 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -175,7 +175,7 @@ ,(if (equal? rett '(core Any)) body (let ((meta (take-while (lambda (x) (and (pair? x) - (memq (car x) '(line meta)))) + (memq (car x) '(lineinfo line meta)))) (cdr body))) (R (make-ssavalue))) `(,(car body) ,@meta @@ -3652,12 +3652,22 @@ f(x) = yt(x) (loop (cdr xs)) elt))))) +; try to ignore some metadata expressions for implicit return sometimes +(define (only-meta? blk) + (let loop ((xs blk)) + (if (null? xs) + #t + (let ((elt (car xs))) + (if (and (pair? elt) (memq (car elt) '(lineinfo line loopinfo))) + (loop (cdr xs)) + #f))))) + ;; return `body` with `stmts` inserted after any meta nodes (define (insert-after-meta body stmts) (if (null? stmts) body (let ((meta (take-while (lambda (x) (and (pair? x) - (memq (car x) '(line meta)))) + (memq (car x) '(lineinfo line meta)))) (cdr body)))) `(,(car body) ,@meta @@ -3689,7 +3699,7 @@ f(x) = yt(x) (lambda (x) (and (pair? x) (not (eq? (car x) 'lambda))))))) (define lambda-opt-ignored-exprs - (Set '(quote top core line inert local-def unnecessary copyast + (Set '(quote top core lineinfo line inert local-def unnecessary copyast meta inbounds boundscheck loopinfo decl aliasscope popaliasscope thunk with-static-parameters toplevel-only global globalref outerref const-if-global thismodule @@ -3919,7 +3929,7 @@ f(x) = yt(x) ((atom? e) e) (else (case (car e) - ((quote top core globalref outerref thismodule line break inert module toplevel null true false meta) e) + ((quote top core globalref outerref thismodule lineinfo line break inert module toplevel null true false meta) e) ((toplevel-only) ;; hack to avoid generating a (method x) expr for struct types (if (eq? (cadr e) 'struct) @@ -4553,12 +4563,14 @@ f(x) = yt(x) (file-diff (not (eq? fname last-fname))) ;; don't need a filename node for start of function (need-meta (and file-diff last-fname - (not (eq? e (lam:body lam)))))) + (not (eq? e (lam:body lam))))) + (emit-final-meta (lambda ()))) (if file-diff (set! filename fname)) (if need-meta (emit `(meta push_loc ,fname))) (let ((v (let loop ((xs (cdr e))) - (if (null? (cdr xs)) - (compile (car xs) break-labels value tail) + (if (only-meta? (cdr xs)) + (begin (set! emit-final-meta (lambda () (map (lambda (v) (compile v break-labels #f #f)) (cdr xs)))) + (compile (car xs) break-labels value tail)) (begin (compile (car xs) break-labels #f #f) (loop (cdr xs))))))) (if need-meta @@ -4574,6 +4586,7 @@ f(x) = yt(x) (let ((tmp (make-ssavalue))) (emit `(= ,tmp ,retv)) (set! retv tmp))) + (emit-final-meta) (emit '(meta pop_loc)) (emit `(return ,retv))) (emit '(meta pop_loc)))) @@ -4581,9 +4594,12 @@ f(x) = yt(x) (let ((tmp (make-ssavalue))) (emit `(= ,tmp ,v)) (set! v tmp) + (emit-final-meta) (emit `(meta pop_loc)))) (else - (emit `(meta pop_loc))))) + (emit-final-meta) + (emit `(meta pop_loc)))) + (emit-final-meta)) (if file-diff (set! filename last-fname)) v))) ((return) @@ -4829,7 +4845,7 @@ f(x) = yt(x) (cons (car e) args))) ;; metadata expressions - ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) + ((lineinfo line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return)))) (cond ((eq? (car e) 'line) (set! current-loc e) diff --git a/test/syntax.jl b/test/syntax.jl index f7a6d1c095c00..119f6d427a15a 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3480,6 +3480,13 @@ end :(global (; x::$(GlobalRef(m, :S)), y::T) = $(GlobalRef(m, :a))) end +# issue #49920 +let line1 = (quote end).args[1], + line2 = (quote end).args[1], + line3 = (quote end).args[1] + @test 1 === eval(Meta.lower(Main, Expr(:block, line1, 1, line2, line3))) +end + # issue #49984 macro z49984(s); :(let a; $(esc(s)); end); end @test let a = 1; @z49984(a) === 1; end From 9d839f9794cbe4cc5eb654bcf8064f3f30452705 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 13 Jun 2023 13:25:12 -0400 Subject: [PATCH 159/290] Refactor irinterp refinement logic (#50155) This continues the refactoring begun by #49340 to have irinterp consume the IR_FLAG_REFINED flag. This essentially has the same effect as the extra_reprocess bitset that irinterp takes, so we can remove that. However, there is a related issue where we would like to inform irinterp that we have *already* refined the type of a particular statement (likely using information not available to the irinterp) and would like it to just propagate that if possible. So bring back that extra bitset with a new name and these new semantics to make that possible. While I was working on this, I also noticed that the control hook I added in #48199 wasn't quite working as advertised. I don't currently need it, so rather than trying to work through an API without a concrete consumer, just nuke that hook for now. I do still think it'll be required at some point, but we can always add it back. --- base/compiler/ssair/irinterp.jl | 40 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index 8d75ad3948ee2..fc5085af426a1 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -43,14 +43,6 @@ function abstract_eval_phi_stmt(interp::AbstractInterpreter, phi::PhiNode, ::Int return abstract_eval_phi(interp, phi, nothing, irsv) end -function propagate_control_effects!(interp::AbstractInterpreter, idx::Int, stmt::GotoIfNot, - irsv::IRInterpretationState, extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet}) - # Nothing to do for most abstract interpreters, but if the abstract - # interpreter has control-dependent lattice effects, it can override - # this method. - return false -end - function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState) si = StmtInfo(true) # TODO better job here? (; rt, effects, info) = abstract_call(interp, arginfo, si, irsv) @@ -102,8 +94,7 @@ function kill_terminator_edges!(irsv::IRInterpretationState, term_idx::Int, bb:: end function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union{Int,Nothing}, - @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState, - extra_reprocess::Union{Nothing,BitSet,BitSetBoundedMinPrioritySet}) + @nospecialize(inst), @nospecialize(typ), irsv::IRInterpretationState) ir = irsv.ir if isa(inst, GotoIfNot) cond = inst.cond @@ -126,7 +117,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union end return true end - return propagate_control_effects!(interp, idx, inst, irsv, extra_reprocess) + return false end rt = nothing if isa(inst, Expr) @@ -204,9 +195,8 @@ function process_terminator!(ir::IRCode, @nospecialize(inst), idx::Int, bb::Int, end end -default_reprocess(::AbstractInterpreter, ::IRInterpretationState) = nothing function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState; - extra_reprocess::Union{Nothing,BitSet} = default_reprocess(interp, irsv)) + externally_refined::Union{Nothing,BitSet} = nothing) interp = switch_to_irinterp(interp) (; ir, tpdum, ssa_refined) = irsv @@ -227,12 +217,11 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR irsv.curridx = idx inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] + flag = ir.stmts[idx][:flag] any_refined = false - if extra_reprocess !== nothing - if idx in extra_reprocess - pop!(extra_reprocess, idx) - any_refined = true - end + if (flag & IR_FLAG_REFINED) != 0 + any_refined = true + ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED end for ur in userefs(inst) val = ur[] @@ -251,8 +240,9 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR if typ === Bottom && (idx != lstmt || !is_terminator_or_phi) continue end - if any_refined && reprocess_instruction!(interp, - idx, bb, inst, typ, irsv, extra_reprocess) + if (any_refined && reprocess_instruction!(interp, + idx, bb, inst, typ, irsv)) || + (externally_refined !== nothing && idx in externally_refined) push!(ssa_refined, idx) inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] @@ -277,9 +267,6 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR # Slow path begin @label residual_scan stmt_ip = BitSetBoundedMinPrioritySet(length(ir.stmts)) - if extra_reprocess !== nothing - append!(stmt_ip, extra_reprocess) - end # Slow Path Phase 1.A: Complete use scanning while !isempty(bb_ip) @@ -289,6 +276,11 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR for idx = stmts irsv.curridx = idx inst = ir.stmts[idx][:inst] + flag = ir.stmts[idx][:flag] + if (flag & IR_FLAG_REFINED) != 0 + ir.stmts[idx][:flag] &= ~IR_FLAG_REFINED + push!(stmt_ip, idx) + end for ur in userefs(inst) val = ur[] if isa(val, Argument) @@ -335,7 +327,7 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR inst = ir.stmts[idx][:inst] typ = ir.stmts[idx][:type] if reprocess_instruction!(interp, - idx, nothing, inst, typ, irsv, stmt_ip) + idx, nothing, inst, typ, irsv) append!(stmt_ip, tpdum[idx]) end end From 82c89c680e8326da92412a082073e5e4044fd14f Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 13 Jun 2023 18:17:21 -0400 Subject: [PATCH 160/290] Lookup libraries in `libjulia-*` before `jl_exe_handle` We do not use `dlvsym` to separate the symbols between multiple copies of libjulia, instead preferring to resolve symbols directly against the appropriate internal library handle. During bootstrapping, many internal symbols (e.g. `jl_fl_parse`) are available in the global EXE namespace, so we need to adapt our search order to resolve symbols in internal libraries first. With this fix, no sysimage symbols are resolved to `jl_exe_handle` (which is generally broken in Julia-in-Julia scenarios): ``` $ cat objdump_after.txt | grep libjulia_internal_handle | wc 1131 14703 145899 $ cat objdump_after.txt | grep jl_exe_handle | wc 0 0 0 ``` versus before: ``` $ cat objdump_before.txt | grep libjulia_internal_handle | wc 577 7501 74433 $ cat objdump_before.txt | grep jl_exe_handle | wc 554 7202 63710 ``` --- src/dlload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dlload.c b/src/dlload.c index 4e9e9c9ce48fc..ffa9a053d5f1c 100644 --- a/src/dlload.c +++ b/src/dlload.c @@ -436,12 +436,12 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t JL_DLLEXPORT const char *jl_dlfind(const char *f_name) { void * dummy; - if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0)) - return JL_EXE_LIBNAME; if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0)) return JL_LIBJULIA_INTERNAL_DL_LIBNAME; if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0)) return JL_LIBJULIA_DL_LIBNAME; + if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0)) + return JL_EXE_LIBNAME; #ifdef _OS_WINDOWS_ if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0)) return "kernel32"; From 03c4bc128753a0e34ad560e4cc2faa948e0d9e28 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Tue, 13 Jun 2023 21:29:16 -0300 Subject: [PATCH 161/290] Promote objects more eagerly (#49644) Simplifies generational behaviour --- src/gc-debug.c | 8 --- src/gc-pages.c | 3 - src/gc.c | 148 +++++++++---------------------------------- src/gc.h | 8 +-- src/julia_internal.h | 1 - 5 files changed, 33 insertions(+), 135 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index a5b779c8161b1..02addaa98e44c 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -580,14 +580,6 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high) // Make sure the sweep rebuild the freelist pg->has_marked = 1; pg->has_young = 1; - // Find the age bit - char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET; - int obj_id = (((char*)tag) - page_begin) / osize; - uint32_t *ages = pg->ages + obj_id / 32; - // Force this to be a young object to save some memory - // (especially on 32bit where it's more likely to have pointer-like - // bit patterns) - *ages &= ~(1 << (obj_id % 32)); memset(tag, 0xff, osize); // set mark to GC_MARKED (young and marked) tag->bits.gc = GC_MARKED; diff --git a/src/gc-pages.c b/src/gc-pages.c index d579eb0cd4fbb..28daa9d67a9ed 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -281,9 +281,6 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) memory_map.freemap1[info.pagetable_i32] |= msk; - free(info.meta->ages); - info.meta->ages = NULL; - // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { diff --git a/src/gc.c b/src/gc.c index a9bb584cfcfba..00b0102f72653 100644 --- a/src/gc.c +++ b/src/gc.c @@ -690,7 +690,7 @@ static int mark_reset_age = 0; * * <-[(quick)sweep]- * | - * ----> GC_OLD <--[(quick)sweep && age>promotion]-- + * ----> GC_OLD <--[(quick)sweep]------------------- * | | | * | | GC_MARKED (in remset) | * | | ^ | | @@ -707,9 +707,9 @@ static int mark_reset_age = 0; * ========= above this line objects are old ========= | * | * ----[new]------> GC_CLEAN ------[mark]-----------> GC_MARKED - * | ^ | - * <-[(quick)sweep]--- | | - * --[(quick)sweep && age<=promotion]--- + * | + * <-[(quick)sweep]--- + * */ // A quick sweep is a sweep where `!sweep_full` @@ -723,19 +723,10 @@ static int mark_reset_age = 0; // When a write barrier triggers, the offending marked object is both queued, // so as not to trigger the barrier again, and put in the remset. - -#define PROMOTE_AGE 1 -// this cannot be increased as is without changing : -// - sweep_page which is specialized for 1bit age -// - the size of the age storage in jl_gc_pagemeta_t - - static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking int prev_sweep_full = 1; -#define inc_sat(v,s) v = (v) >= s ? s : (v)+1 - // Full collection heuristics static int64_t live_bytes = 0; static int64_t promoted_bytes = 0; @@ -839,9 +830,8 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, // We can't easily tell if the object is old or being promoted // from the gc bits but if the `age` is `0` then the object // must be already on a young list. - if (mark_reset_age && hdr->age) { + if (mark_reset_age) { // Reset the object as if it was just allocated - hdr->age = 0; gc_queue_big_marked(ptls, hdr, 1); } } @@ -868,10 +858,6 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, ptls->gc_cache.scanned_bytes += page->osize; if (mark_reset_age) { page->has_young = 1; - char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET; - int obj_id = (((char*)o) - page_begin) / page->osize; - uint32_t *ages = page->ages + obj_id / 32; - jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32))); } } objprofile_count(jl_typeof(jl_valueof(o)), @@ -908,7 +894,7 @@ STATIC_INLINE void gc_setmark_buf_(jl_ptls_t ptls, void *o, uint8_t mark_mode, s if (__likely(gc_try_setmark_tag(buf, mark_mode)) && !gc_verifying) { if (minsz <= GC_MAX_SZCLASS) { jl_gc_pagemeta_t *page = page_metadata(buf); - if (page) { + if (page != NULL) { gc_setmark_pool_(ptls, buf, bits, page); return; } @@ -922,37 +908,6 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL gc_setmark_buf_(ptls, o, mark_mode, minsz); } -void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT -{ - jl_taggedvalue_t *o = jl_astaggedvalue(v); - jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(v); - size_t dtsz = jl_datatype_size(dt); - if (o->bits.gc == GC_OLD_MARKED) - return; - o->bits.gc = GC_OLD_MARKED; - if (dt == jl_simplevector_type) { - size_t l = jl_svec_len(v); - dtsz = l * sizeof(void*) + sizeof(jl_svec_t); - } - else if (dt->name == jl_array_typename) { - jl_array_t *a = (jl_array_t*)v; - if (!a->flags.pooled) - dtsz = GC_MAX_SZCLASS + 1; - } - else if (dt == jl_module_type) { - dtsz = sizeof(jl_module_t); - } - else if (dt == jl_task_type) { - dtsz = sizeof(jl_task_t); - } - else if (dt == jl_symbol_type) { - return; - } - gc_setmark(ptls, o, GC_OLD_MARKED, dtsz); - if (dt->layout->npointers != 0) - jl_gc_queue_root(v); -} - STATIC_INLINE void maybe_collect(jl_ptls_t ptls) { if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { @@ -1048,7 +1003,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) memset(v, 0xee, allocsz); #endif v->sz = allocsz; - v->age = 0; gc_big_object_link(v, &ptls->heap.big_objects); return jl_valueof(&v->header); } @@ -1079,16 +1033,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT int old_bits = bits; if (gc_marked(bits)) { pv = &v->next; - int age = v->age; - if (age >= PROMOTE_AGE || bits == GC_OLD_MARKED) { - if (sweep_full || bits == GC_MARKED) { - bits = GC_OLD; - } - } - else { - inc_sat(age, PROMOTE_AGE); - v->age = age; - bits = GC_CLEAN; + if (sweep_full || bits == GC_MARKED) { + bits = GC_OLD; } v->bits.gc = bits; } @@ -1267,12 +1213,11 @@ static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT } // pool allocation -STATIC_INLINE jl_taggedvalue_t *reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT +STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT { assert(GC_PAGE_OFFSET >= sizeof(void*)); pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize; pg->pool_n = p - ptls2->heap.norm_pools; - memset(pg->ages, 0, GC_PAGE_SZ / 8 / p->osize + 1); jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET); jl_taggedvalue_t *next = (jl_taggedvalue_t*)pg->data; if (fl == NULL) { @@ -1293,22 +1238,21 @@ STATIC_INLINE jl_taggedvalue_t *reset_page(jl_ptls_t ptls2, const jl_gc_pool_t * } pg->has_young = 0; pg->has_marked = 0; - pg->fl_begin_offset = -1; - pg->fl_end_offset = -1; + pg->fl_begin_offset = UINT16_MAX; + pg->fl_end_offset = UINT16_MAX; return beg; } // Add a new page to the pool. Discards any pages in `p->newpages` before. -static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT +static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT { // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly jl_ptls_t ptls = jl_current_task->ptls; jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; - pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t))); pg->thread_n = ptls->tid; - jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL); + jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg, NULL); p->newpages = fl; return fl; } @@ -1363,8 +1307,9 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset v = *(jl_taggedvalue_t**)cur_page; } // Not an else!! - if (v == NULL) - v = add_page(p); + if (v == NULL) { + v = gc_add_page(p); + } next = (jl_taggedvalue_t*)((char*)v + osize); } p->newpages = next; @@ -1406,9 +1351,8 @@ int64_t lazy_freed_pages = 0; static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT { char *data = pg->data; - uint32_t *ages = pg->ages; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); - char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize; + char *lim = data + GC_PAGE_SZ - osize; size_t old_nfree = pg->nfree; size_t nfree; @@ -1422,9 +1366,9 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t // on quick sweeps, keep a few pages empty but allocated for performance if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) { jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n]; - jl_taggedvalue_t *begin = reset_page(ptls2, p, pg, p->newpages); + jl_taggedvalue_t *begin = gc_reset_page(ptls2, p, pg, p->newpages); p->newpages = begin; - begin->next = (jl_taggedvalue_t*)0; + begin->next = NULL; lazy_freed_pages++; } else { @@ -1457,47 +1401,24 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t int16_t prev_nold = 0; int pg_nfree = 0; jl_taggedvalue_t **pfl_begin = NULL; - uint32_t msk = 1; // mask for the age bit in the current age byte - uint32_t age = *ages; while ((char*)v <= lim) { - if (!msk) { - msk = 1; - *ages = age; - ages++; - age = *ages; - } int bits = v->bits.gc; if (!gc_marked(bits)) { *pfl = v; pfl = &v->next; - pfl_begin = pfl_begin ? pfl_begin : pfl; + pfl_begin = (pfl_begin != NULL) ? pfl_begin : pfl; pg_nfree++; - age &= ~msk; } else { // marked young or old - if (age & msk || bits == GC_OLD_MARKED) { // old enough - // `!age && bits == GC_OLD_MARKED` is possible for - // non-first-class objects like array buffers - // (they may get promoted by jl_gc_wb_buf for example, - // or explicitly by jl_gc_force_mark_old) - if (sweep_full || bits == GC_MARKED) { - bits = v->bits.gc = GC_OLD; // promote - } - prev_nold++; - } - else { - assert(bits == GC_MARKED); - bits = v->bits.gc = GC_CLEAN; // unmark - has_young = 1; + if (sweep_full || bits == GC_MARKED) { // old enough + bits = v->bits.gc = GC_OLD; // promote } + prev_nold++; has_marked |= gc_marked(bits); - age |= msk; freedall = 0; } v = (jl_taggedvalue_t*)((char*)v + osize); - msk <<= 1; } - *ages = age; assert(!freedall); pg->has_marked = has_marked; pg->has_young = has_young; @@ -1506,8 +1427,8 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t pg->fl_end_offset = (char*)pfl - data; } else { - pg->fl_begin_offset = -1; - pg->fl_end_offset = -1; + pg->fl_begin_offset = UINT16_MAX; + pg->fl_end_offset = UINT16_MAX; } pg->nfree = pg_nfree; @@ -1621,7 +1542,7 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT { - assert(pg->fl_begin_offset != (uint16_t)-1); + assert(pg->fl_begin_offset != UINT16_MAX); char *cur_pg = gc_page_data(last); // Fast path for page that has no allocation jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset); @@ -1662,7 +1583,7 @@ static void gc_sweep_pool(int sweep_full) for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; - if (last) { + if (last != NULL) { jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last)); gc_pool_sync_nfree(pg, last); pg->has_young = 1; @@ -1671,7 +1592,7 @@ static void gc_sweep_pool(int sweep_full) pfl[t_i * JL_GC_N_POOLS + i] = &p->freelist; last = p->newpages; - if (last) { + if (last != NULL) { char *last_p = (char*)last; jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last_p - 1)); assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET); @@ -2933,7 +2854,7 @@ void gc_mark_loop_barrier(void) void gc_mark_clean_reclaim_sets(void) { - // Clean up `reclaim-sets` and reset `top/bottom` of queues + // Clean up `reclaim-sets` for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; arraylist_t *reclaim_set2 = &ptls2->mark_queue.reclaim_set; @@ -3940,7 +3861,6 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz) // old pointer. bigval_t *newbig = (bigval_t*)gc_managed_realloc_(ptls, hdr, allocsz, oldsz, 1, s, 0); newbig->sz = allocsz; - newbig->age = 0; gc_big_object_link(newbig, &ptls->heap.big_objects); jl_value_t *snew = jl_valueof(&newbig->header); *(size_t*)snew = sz; @@ -4111,7 +4031,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) { p = (char *) p - 1; jl_gc_pagemeta_t *meta = page_metadata(p); - if (meta && meta->ages) { + if (meta) { char *page = gc_page_data(p); // offset within page. size_t off = (char *)p - page; @@ -4146,7 +4066,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) char *data = gc_page_data(newpages); if (data != meta->data) { // Pages on newpages form a linked list where only the - // first one is allocated from (see reset_page()). + // first one is allocated from (see gc_reset_page()). // All other pages are empty. return NULL; } @@ -4174,7 +4094,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) // entries and 1 for live objects. The above subcases arise // because allocating a cell will not update the age bit, so we // need extra logic for pages that have been allocated from. - unsigned obj_id = (off - off2) / osize; // We now distinguish between the second and third subcase. // Freelist entries are consumed in ascending order. Anything // before the freelist pointer was either live during the last @@ -4182,11 +4101,6 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) if (gc_page_data(cell) == gc_page_data(pool->freelist) && (char *)cell < (char *)pool->freelist) goto valid_object; - // We know now that the age bit reflects liveness status during - // the last sweep and that the cell has not been reused since. - if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) { - return NULL; - } // Not a freelist entry, therefore a valid object. valid_object: // We have to treat objects with type `jl_buff_tag` differently, diff --git a/src/gc.h b/src/gc.h index f75ec26bc9017..47aab660c0981 100644 --- a/src/gc.h +++ b/src/gc.h @@ -117,10 +117,7 @@ typedef struct _jl_gc_chunk_t { JL_EXTENSION typedef struct _bigval_t { struct _bigval_t *next; struct _bigval_t **prev; // pointer to the next field of the prev entry - union { - size_t sz; - uintptr_t age : 2; - }; + size_t sz; #ifdef _P64 // Add padding so that the value is 64-byte aligned // (8 pointers of 8 bytes each) - (4 other pointers in struct) void *_padding[8 - 4]; @@ -173,12 +170,11 @@ typedef struct { // number of free objects in this page. // invalid if pool that owns this page is allocating objects from this page. uint16_t nfree; - uint16_t osize; // size of each object in this page + uint16_t osize; // size of each object in this page uint16_t fl_begin_offset; // offset of first free object in this page uint16_t fl_end_offset; // offset of last free object in this page uint16_t thread_n; // thread id of the heap that owns this page char *data; - uint32_t *ages; } jl_gc_pagemeta_t; // Page layout: diff --git a/src/julia_internal.h b/src/julia_internal.h index 2a8c2f54fe116..87a35b80516c2 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -342,7 +342,6 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT; -void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v); void gc_sweep_sysimg(void); From 8a1b6422245ec22cfa49d3b125c5bd3136e92852 Mon Sep 17 00:00:00 2001 From: Tanay Sharma Date: Wed, 14 Jun 2023 06:01:55 +0530 Subject: [PATCH 162/290] Add method to rationalize `Rational` (#43427) * Add Method to Rationalize Rational and Integer Co-authored-by: Jeff Bezanson Co-authored-by: Oscar Smith --- base/rational.jl | 21 ++++++++++++++++----- test/rational.jl | 12 ++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/base/rational.jl b/base/rational.jl index 4ffae9043d10a..baca2397c42ff 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -173,10 +173,11 @@ julia> typeof(numerator(a)) BigInt ``` """ -function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer +function rationalize(::Type{T}, x::Union{AbstractFloat, Rational}, tol::Real) where T<:Integer if tol < 0 throw(ArgumentError("negative tolerance $tol")) end + T<:Unsigned && x < 0 && __throw_negate_unsigned() isnan(x) && return T(x)//one(T) isinf(x) && return unsafe_rational(x < 0 ? -one(T) : one(T), zero(T)) @@ -188,7 +189,6 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer a = trunc(x) r = x-a y = one(x) - tolx = oftype(x, tol) nt, t, tt = tolx, zero(tolx), tolx ia = np = nq = zero(T) @@ -233,10 +233,21 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer return p // q end end -rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)::Rational{T} +rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol) rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...) -rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re, kvs...)::Rational{T}, rationalize(T, x.im, kvs...)::Rational{T}) -rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re, kvs...), rationalize(Int, x.im, kvs...)) +rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re; kvs...), rationalize(T, x.im; kvs...)) +rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re; kvs...), rationalize(Int, x.im; kvs...)) +rationalize(::Type{T}, x::Rational; tol::Real = 0) where {T<:Integer} = rationalize(T, x, tol) +rationalize(x::Rational; kvs...) = x +rationalize(x::Integer; kvs...) = Rational(x) +function rationalize(::Type{T}, x::Integer; kvs...) where {T<:Integer} + if Base.hastypemax(T) # BigInt doesn't + x < typemin(T) && return unsafe_rational(-one(T), zero(T)) + x > typemax(T) && return unsafe_rational(one(T), zero(T)) + end + return Rational{T}(x) +end + """ numerator(x) diff --git a/test/rational.jl b/test/rational.jl index a1af6eda64516..0a2501c066052 100644 --- a/test/rational.jl +++ b/test/rational.jl @@ -33,6 +33,11 @@ using Test @test @inferred(rationalize(Int, 3.0, 0.0)) === 3//1 @test @inferred(rationalize(Int, 3.0, 0)) === 3//1 + @test @inferred(rationalize(Int, 33//100; tol=0.1)) === 1//3 # because tol + @test @inferred(rationalize(Int, 3; tol=0.0)) === 3//1 + @test @inferred(rationalize(Int8, 1000//333)) === Rational{Int8}(3//1) + @test @inferred(rationalize(Int8, 1000//3)) === Rational{Int8}(1//0) + @test @inferred(rationalize(Int8, 1000)) === Rational{Int8}(1//0) @test_throws OverflowError rationalize(UInt, -2.0) @test_throws ArgumentError rationalize(Int, big(3.0), -1.) # issue 26823 @@ -727,3 +732,10 @@ end @test rationalize(1.192 + 2.233im) == 149//125 + 2233//1000*im @test rationalize(Int8, 1.192 + 2.233im) == 118//99 + 67//30*im end +@testset "rationalize(Complex) with tol" begin + # test: rationalize(x::Complex; kvs...) + precise_next = 7205759403792795//72057594037927936 + @assert Float64(precise_next) == nextfloat(0.1) + @test rationalize(nextfloat(0.1) * im; tol=0) == precise_next * im + @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im) +end From 1af6648168126bb491f014d83be121c305d527a3 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 13 Jun 2023 21:31:42 -0400 Subject: [PATCH 163/290] Use `-shared-libasan` when linking w/ ASAN enabled. This flag is needed to ensure that `libclang_rt.asan-*.so` appears explicitly in the DT_NEEDED entries of libjulia-*. Without this entry, e.g. `dlsym(libjulia_internal_handle)` can end up finding symbols directly in libc.so.6, effectively bypassing the ASAN interceptors. --- Make.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Make.inc b/Make.inc index 301ee934320f6..ac06dbfcc464f 100644 --- a/Make.inc +++ b/Make.inc @@ -723,7 +723,7 @@ endif # OS Linux or FreeBSD endif # SANITIZE_MEMORY=1 ifeq ($(SANITIZE_ADDRESS),1) SANITIZE_OPTS += -fsanitize=address -SANITIZE_LDFLAGS += -fsanitize=address +SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan -Wl,-rpath=$(dir $(shell $(CC) --print-file-name libclang_rt.asan-x86_64.so)) endif ifeq ($(SANITIZE_THREAD),1) SANITIZE_OPTS += -fsanitize=thread From df09f67cfd08e89ceeb5559b8f71e177cacdba9c Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 14 Jun 2023 05:06:07 -0400 Subject: [PATCH 164/290] irverify: Enforce invariant that PhiNodes are at the beginning of a BB (#50158) We have an invariant that all PhiNodes are at the beginning of a BasicBlock (only possible interrupted by a `nothing`) and we rely on this in various places for correctness. However, we did not actually verify this invariant. --- base/compiler/ssair/verify.jl | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index bf06d6bb3e523..8df42bd499631 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -187,20 +187,30 @@ function verify_ir(ir::IRCode, print::Bool=true, end end end + lastbb = 0 + is_phinode_block = false for (bb, idx) in bbidxiter(ir) + if bb != lastbb + is_phinode_block = true + lastbb = bb + end # We allow invalid IR in dead code to avoid passes having to detect when # they're generating dead code. bb_unreachable(domtree, bb) && continue stmt = ir.stmts[idx][:inst] stmt === nothing && continue if isa(stmt, PhiNode) + if !is_phinode_block + @verify_error "φ node $idx is not at the beginning of the basic block $bb" + error("") + end @assert length(stmt.edges) == length(stmt.values) for i = 1:length(stmt.edges) edge = stmt.edges[i] for j = (i+1):length(stmt.edges) edge′ = stmt.edges[j] if edge == edge′ - # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is + # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is always unique. @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)" error("") end @@ -233,7 +243,14 @@ function verify_ir(ir::IRCode, print::Bool=true, end check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms) end - elseif isa(stmt, PhiCNode) + continue + elseif stmt === nothing + # Nothing to do + continue + end + + is_phinode_block = false + if isa(stmt, PhiCNode) for i = 1:length(stmt.values) val = stmt.values[i] if !isa(val, SSAValue) From 834aad4ab409f4ba65cbed2963b9ab6fa2770354 Mon Sep 17 00:00:00 2001 From: Ashley Milsted Date: Wed, 14 Jun 2023 07:15:41 -0700 Subject: [PATCH 165/290] Add (broken) tests for allocation in 5-arg mul!() (#49210) --- stdlib/LinearAlgebra/test/matmul.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl index 2d99856a2667b..cc24e2bd6a1bf 100644 --- a/stdlib/LinearAlgebra/test/matmul.jl +++ b/stdlib/LinearAlgebra/test/matmul.jl @@ -979,4 +979,17 @@ end end end +@testset "Issue #46865: mul!() with non-const alpha, beta" begin + f!(C,A,B,alphas,betas) = mul!(C, A, B, alphas[1], betas[1]) + alphas = [1.0] + betas = [0.5] + for d in [2,3,4] # test native small-matrix cases as well as BLAS + A = rand(d,d) + B = copy(A) + C = copy(A) + f!(C, A, B, alphas, betas) + @test_broken (@allocated f!(C, A, B, alphas, betas)) == 0 + end +end + end # module TestMatmul From 0fb86dd5c54519602244a0a6ee0eab20629289da Mon Sep 17 00:00:00 2001 From: Oscar Smith Date: Wed, 14 Jun 2023 14:36:36 -0400 Subject: [PATCH 166/290] Fix `rationalize(::Complex)` test on 32 bit systems (#50163) * Fix `rationalize(::Complex)` test on 32 bit systems the test was relying on `Int===Int64` --- test/rational.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/rational.jl b/test/rational.jl index 0a2501c066052..4b29618bd15e0 100644 --- a/test/rational.jl +++ b/test/rational.jl @@ -736,6 +736,6 @@ end # test: rationalize(x::Complex; kvs...) precise_next = 7205759403792795//72057594037927936 @assert Float64(precise_next) == nextfloat(0.1) - @test rationalize(nextfloat(0.1) * im; tol=0) == precise_next * im + @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im) end From abbe045609145711f9c4edc40c65294f5fde5ef0 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Wed, 14 Jun 2023 22:14:58 +0200 Subject: [PATCH 167/290] FAQ: Clarify use of PROGRAM_FILE for importable scripts (#50139) In issue #45852, the proposed `@is_script` macro as a shorthand for `abspath(PROGRAM_FILE) == @__FILE__` was rejected, because it was agreed that the pattern of using the same file as a script and an importable library is questionable, so there is no reason to make it too idiomatic. For this reason, mention in the FAQ that while checking `PROGRAM_FILE` can be used to check if a file is run as a script, it is not recommended to have files double as both scripts and importable libraries. --- doc/src/manual/faq.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index e3960ee1a4690..85b31bf20a99f 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -94,6 +94,9 @@ When a file is run as the main script using `julia file.jl` one might want to ac functionality like command line argument handling. A way to determine that a file is run in this fashion is to check if `abspath(PROGRAM_FILE) == @__FILE__` is `true`. +However, it is recommended to not write files that double as a script and as an importable library. +If one needs functionality both available as a library and a script, it is better to write is as a library, then import the functionality into a distinct script. + ### [How do I catch CTRL-C in a script?](@id catch-ctrl-c) Running a Julia script using `julia file.jl` does not throw From 9e43494bdc017690c6d1242c11838c71f434457c Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Wed, 14 Jun 2023 22:37:12 +0000 Subject: [PATCH 168/290] Compile JITLink for every platform (#50099) --- src/codegen.cpp | 2 -- src/jitlayers.cpp | 46 ++++++++++++++++++++++++---------------------- src/jitlayers.h | 3 --- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 3992631c8ac76..26304c7350c5c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -9076,7 +9076,6 @@ extern "C" void jl_init_llvm(void) } #endif -#ifndef JL_USE_JITLINK #ifdef JL_USE_INTEL_JITEVENTS if (jl_using_intel_jitevents) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener()); @@ -9092,7 +9091,6 @@ extern "C" void jl_init_llvm(void) jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); #endif #endif -#endif #endif cl::PrintOptionValues(); diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 925638af5fa53..1468b4f55f5a7 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -39,16 +39,13 @@ using namespace llvm; #include "julia_assert.h" #include "processor.h" -#ifdef JL_USE_JITLINK # include # include # include # if JL_LLVM_VERSION >= 150000 # include # endif -#else # include -#endif #define DEBUG_TYPE "julia_jitlayers" @@ -694,8 +691,6 @@ void jl_register_jit_object(const object::ObjectFile &debugObj, std::function getLoadAddress, std::function lookupWriteAddress) JL_NOTSAFEPOINT; -#ifdef JL_USE_JITLINK - namespace { using namespace llvm::orc; @@ -871,6 +866,8 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { } graph_size += secsize; } + (void) code_size; + (void) data_size; this->total_size.fetch_add(graph_size, std::memory_order_relaxed); jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size); jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size); @@ -880,6 +877,17 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { } }; +// replace with [[maybe_unused]] when we get to C++17 +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" +#endif + // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. std::unique_ptr createJITLinkMemoryManager() { @@ -889,33 +897,28 @@ std::unique_ptr createJITLinkMemoryManager() { return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); #endif } -} - -# ifdef LLVM_SHLIB -# define EHFRAME_RANGE(name) orc::ExecutorAddrRange name -# define UNPACK_EHFRAME_RANGE(name) \ - name.Start.toPtr(), \ - static_cast(name.size()) +#ifdef _COMPILER_CLANG_ +#pragma clang diagnostic pop +#endif +#ifdef _COMPILER_GCC_ +#pragma GCC diagnostic pop +#endif +} class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { public: - Error registerEHFrames(EHFRAME_RANGE(EHFrameSection)) override { - register_eh_frames( - UNPACK_EHFRAME_RANGE(EHFrameSection)); + Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override { + register_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); return Error::success(); } - Error deregisterEHFrames(EHFRAME_RANGE(EHFrameSection)) override { - deregister_eh_frames( - UNPACK_EHFRAME_RANGE(EHFrameSection)); + Error deregisterEHFrames(orc::ExecutorAddrRange EHFrameSection) override { + deregister_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); return Error::success(); } }; -# endif - -#else // !JL_USE_JITLINK RTDyldMemoryManager* createRTDyldMemoryManager(void); @@ -1015,7 +1018,6 @@ void registerRTDyldJITObject(const object::ObjectFile &Object, #endif ); } -#endif namespace { static std::unique_ptr createTargetMachine() JL_NOTSAFEPOINT { TargetOptions options = TargetOptions(); diff --git a/src/jitlayers.h b/src/jitlayers.h index 344cba8c0f453..4e53bbed6e766 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -61,12 +61,9 @@ # endif #endif -#ifdef JL_USE_JITLINK # include -#else # include # include -#endif using namespace llvm; From a595274188200de83a2b0c227e86a50f33515675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Gonz=C3=A1lez=20Oneto?= <80299581+jotas6@users.noreply.github.com> Date: Wed, 14 Jun 2023 22:27:15 -0500 Subject: [PATCH 169/290] Fixed small typo in `map`'s docstring (#50173) --- base/abstractarray.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 83c97f4932df5..20566265b0c76 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -3269,7 +3269,7 @@ mapany(f, itr) = Any[f(x) for x in itr] map(f, c...) -> collection Transform collection `c` by applying `f` to each element. For multiple collection arguments, -apply `f` elementwise, and stop when when any of them is exhausted. +apply `f` elementwise, and stop when any of them is exhausted. See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref). From 5db2c27fb227f17bab8f7eaf697c497439eee1c7 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Thu, 15 Jun 2023 03:47:49 +0000 Subject: [PATCH 170/290] Add marker passes to various pipeline stages (#50111) --- doc/src/devdocs/llvm.md | 32 ++- src/llvm-demote-float16.cpp | 8 +- src/passes.h | 43 ++++ src/pipeline.cpp | 79 ++++++-- test/llvmpasses/pipeline-prints.ll | 314 +++++++++++++++++++++++++++++ 5 files changed, 458 insertions(+), 18 deletions(-) create mode 100644 test/llvmpasses/pipeline-prints.ll diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md index 93b241d703714..4e5e90d7cdbc6 100644 --- a/doc/src/devdocs/llvm.md +++ b/doc/src/devdocs/llvm.md @@ -82,9 +82,39 @@ Here are example settings using `bash` syntax: * `export JULIA_LLVM_ARGS=-debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for loop vectorizer. If you get warnings about "Unknown command line argument", rebuild LLVM with `LLVM_ASSERTIONS = 1`. - * `export JULIA_LLVM_ARGS=-help` shows a list of available options. + * `export JULIA_LLVM_ARGS=-help` shows a list of available options. `export JULIA_LLVM_ARGS=-help-hidden` shows even more. * `export JULIA_LLVM_ARGS="-fatal-warnings -print-options"` is an example how to use multiple options. +### Useful `JULIA_LLVM_ARGS` parameters + * `-print-after=PASS`: prints the IR after any execution of `PASS`, useful for checking changes done by a pass. + * `-print-before=PASS`: prints the IR before any execution of `PASS`, useful for checking the input to a pass. + * `-print-changed`: prints the IR whenever a pass changes the IR, useful for narrowing down which passes are causing problems. + * `-print-(before|after)=MARKER-PASS`: the Julia pipeline ships with a number of marker passes in the pipeline, which can be used to identify where problems or optimizations are occurring. A marker pass is defined as a pass which appears once in the pipeline and performs no transformations on the IR, and is only useful for targeting print-before/print-after. Currently, the following marker passes exist in the pipeline: + * BeforeOptimization + * BeforeEarlySimplification + * AfterEarlySimplification + * BeforeEarlyOptimization + * AfterEarlyOptimization + * BeforeLoopOptimization + * BeforeLICM + * AfterLICM + * BeforeLoopSimplification + * AfterLoopSimplification + * AfterLoopOptimization + * BeforeScalarOptimization + * AfterScalarOptimization + * BeforeVectorization + * AfterVectorization + * BeforeIntrinsicLowering + * AfterIntrinsicLowering + * BeforeCleanup + * AfterCleanup + * AfterOptimization + * `-time-passes`: prints the time spent in each pass, useful for identifying which passes are taking a long time. + * `-print-module-scope`: used in conjunction with `-print-(before|after)`, gets the entire module rather than the IR unit received by the pass + * `-debug`: prints out a lot of debugging information throughout LLVM + * `-debug-only=NAME`, prints out debugging statements from files with `DEBUG_TYPE` defined to `NAME`, useful for getting additional context about a problem + ## Debugging LLVM transformations in isolation On occasion, it can be useful to debug LLVM's transformations in isolation from diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp index 4875abb390e21..41ccdca327f8c 100644 --- a/src/llvm-demote-float16.cpp +++ b/src/llvm-demote-float16.cpp @@ -51,8 +51,12 @@ namespace { static bool have_fp16(Function &caller, const Triple &TT) { Attribute FSAttr = caller.getFnAttribute("target-features"); - StringRef FS = - FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString(); + StringRef FS = ""; + if (FSAttr.isValid()) + FS = FSAttr.getValueAsString(); + else if (jl_ExecutionEngine) + FS = jl_ExecutionEngine->getTargetFeatureString(); + // else probably called from opt, just do nothing if (TT.isAArch64()) { if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){ return true; diff --git a/src/passes.h b/src/passes.h index d01fe007d667a..2bb33d6eec60d 100644 --- a/src/passes.h +++ b/src/passes.h @@ -103,4 +103,47 @@ struct JuliaLICMPass : PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT; }; +#define MODULE_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ + static bool isRequired() { return true; } \ + }; + +#define FUNCTION_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) JL_NOTSAFEPOINT { return PreservedAnalyses::all(); } \ + static bool isRequired() { return true; } \ + }; + +#define LOOP_MARKER_PASS(NAME) \ + struct NAME##MarkerPass : PassInfoMixin { \ + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, \ + LoopStandardAnalysisResults &AR, LPMUpdater &U) JL_NOTSAFEPOINT { \ + return PreservedAnalyses::all(); \ + } \ + static bool isRequired() { return true; } \ + }; + +// These are useful for debugging with --print-before/after +MODULE_MARKER_PASS(BeforeOptimization) +MODULE_MARKER_PASS(BeforeEarlySimplification) +MODULE_MARKER_PASS(AfterEarlySimplification) +MODULE_MARKER_PASS(BeforeEarlyOptimization) +MODULE_MARKER_PASS(AfterEarlyOptimization) +FUNCTION_MARKER_PASS(BeforeLoopOptimization) +LOOP_MARKER_PASS(BeforeLICM) +LOOP_MARKER_PASS(AfterLICM) +LOOP_MARKER_PASS(BeforeLoopSimplification) +LOOP_MARKER_PASS(AfterLoopSimplification) +FUNCTION_MARKER_PASS(AfterLoopOptimization) +FUNCTION_MARKER_PASS(BeforeScalarOptimization) +FUNCTION_MARKER_PASS(AfterScalarOptimization) +FUNCTION_MARKER_PASS(BeforeVectorization) +FUNCTION_MARKER_PASS(AfterVectorization) +MODULE_MARKER_PASS(BeforeIntrinsicLowering) +MODULE_MARKER_PASS(AfterIntrinsicLowering) +MODULE_MARKER_PASS(BeforeCleanup) +MODULE_MARKER_PASS(AfterCleanup) +MODULE_MARKER_PASS(AfterOptimization) + #endif diff --git a/src/pipeline.cpp b/src/pipeline.cpp index e6b3b284a3768..6afcac5ea8b7c 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -327,6 +327,7 @@ namespace { #define JULIA_PASS(ADD_PASS) if (!options.llvm_only) { ADD_PASS; } else do { } while (0) static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeEarlySimplificationMarkerPass()); #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif @@ -349,9 +350,11 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeEarlySimplificationCallbacks(MPM, PB, O); + MPM.addPass(AfterEarlySimplificationMarkerPass()); } static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeEarlyOptimizationMarkerPass()); invokeOptimizerEarlyCallbacks(MPM, PB, O); { CGSCCPassManager CGPM; @@ -387,9 +390,11 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, invokePeepholeEPCallbacks(FPM, PB, O); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + MPM.addPass(AfterEarlyOptimizationMarkerPass()); } static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeLoopOptimizationMarkerPass()); { LoopPassManager LPM; if (O.getSpeedupLevel() >= 2) { @@ -401,11 +406,13 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB } if (O.getSpeedupLevel() >= 2) { LoopPassManager LPM; + LPM.addPass(BeforeLICMMarkerPass()); LPM.addPass(LICMPass(LICMOptions())); LPM.addPass(JuliaLICMPass()); LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); LPM.addPass(LICMPass(LICMOptions())); LPM.addPass(JuliaLICMPass()); + LPM.addPass(AfterLICMMarkerPass()); //LICM needs MemorySSA now, so we must use it FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); } @@ -414,6 +421,7 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB } { LoopPassManager LPM; + LPM.addPass(BeforeLoopSimplificationMarkerPass()); if (O.getSpeedupLevel() >= 2) { LPM.addPass(LoopInstSimplifyPass()); LPM.addPass(LoopIdiomRecognizePass()); @@ -424,12 +432,15 @@ static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB LPM.addPass(LoopFullUnrollPass()); } invokeLoopOptimizerEndCallbacks(LPM, PB, O); + LPM.addPass(AfterLoopSimplificationMarkerPass()); //We don't know if the loop end callbacks support MSSA FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); } + FPM.addPass(AfterLoopOptimizationMarkerPass()); } static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeScalarOptimizationMarkerPass()); if (O.getSpeedupLevel() >= 2) { JULIA_PASS(FPM.addPass(AllocOptPass())); FPM.addPass(SROAPass()); @@ -460,9 +471,11 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * FPM.addPass(LoopDistributePass()); } invokeScalarOptimizerCallbacks(FPM, PB, O); + FPM.addPass(AfterScalarOptimizationMarkerPass()); } static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + FPM.addPass(BeforeVectorizationMarkerPass()); //TODO look into loop vectorize options FPM.addPass(InjectTLIMappings()); FPM.addPass(LoopVectorizePass()); @@ -477,9 +490,11 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim // This unroll will unroll vectorized loops // as well as loops that we tried but failed to vectorize FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + FPM.addPass(AfterVectorizationMarkerPass()); } static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeIntrinsicLoweringMarkerPass()); if (options.lower_intrinsics) { //TODO barrier pass? { @@ -510,9 +525,11 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * } else { JULIA_PASS(MPM.addPass(RemoveNIPass())); } + MPM.addPass(AfterIntrinsicLoweringMarkerPass()); } static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeCleanupMarkerPass()); if (O.getSpeedupLevel() >= 2) { FunctionPassManager FPM; JULIA_PASS(FPM.addPass(CombineMulAddPass())); @@ -530,9 +547,11 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi } MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } + MPM.addPass(AfterCleanupMarkerPass()); } static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { + MPM.addPass(BeforeOptimizationMarkerPass()); buildEarlySimplificationPipeline(MPM, PB, O, options); MPM.addPass(AlwaysInlinerPass()); buildEarlyOptimizerPipeline(MPM, PB, O, options); @@ -549,40 +568,41 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL } buildIntrinsicLoweringPipeline(MPM, PB, O, options); buildCleanupPipeline(MPM, PB, O, options); + MPM.addPass(AfterOptimizationMarkerPass()); } #undef JULIA_PASS namespace { - auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { - auto PIC = std::make_unique(); + + void adjustPIC(PassInstrumentationCallbacks &PIC) JL_NOTSAFEPOINT { //Borrowed from LLVM PassBuilder.cpp:386 #define MODULE_PASS(NAME, CLASS, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS(NAME, CLASS, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS(NAME, CLASS, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC->addClassToPassName(CLASS, NAME); +PIC.addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ -PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "llvm-julia-passes.inc" @@ -599,7 +619,32 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #undef CGSCC_PASS #undef CGSCC_PASS_WITH_PARAMS #undef CGSCC_ANALYSIS + // Marker passes are set separately so that we don't export them by accident + PIC.addClassToPassName("BeforeOptimizationMarkerPass", "BeforeOptimization"); + PIC.addClassToPassName("BeforeEarlySimplificationMarkerPass", "BeforeEarlySimplification"); + PIC.addClassToPassName("AfterEarlySimplificationMarkerPass", "AfterEarlySimplification"); + PIC.addClassToPassName("BeforeEarlyOptimizationMarkerPass", "BeforeEarlyOptimization"); + PIC.addClassToPassName("AfterEarlyOptimizationMarkerPass", "AfterEarlyOptimization"); + PIC.addClassToPassName("BeforeLoopOptimizationMarkerPass", "BeforeLoopOptimization"); + PIC.addClassToPassName("BeforeLICMMarkerPass", "BeforeLICM"); + PIC.addClassToPassName("AfterLICMMarkerPass", "AfterLICM"); + PIC.addClassToPassName("BeforeLoopSimplificationMarkerPass", "BeforeLoopSimplification"); + PIC.addClassToPassName("AfterLoopSimplificationMarkerPass", "AfterLoopSimplification"); + PIC.addClassToPassName("AfterLoopOptimizationMarkerPass", "AfterLoopOptimization"); + PIC.addClassToPassName("BeforeScalarOptimizationMarkerPass", "BeforeScalarOptimization"); + PIC.addClassToPassName("AfterScalarOptimizationMarkerPass", "AfterScalarOptimization"); + PIC.addClassToPassName("BeforeVectorizationMarkerPass", "BeforeVectorization"); + PIC.addClassToPassName("AfterVectorizationMarkerPass", "AfterVectorization"); + PIC.addClassToPassName("BeforeIntrinsicLoweringMarkerPass", "BeforeIntrinsicLowering"); + PIC.addClassToPassName("AfterIntrinsicLoweringMarkerPass", "AfterIntrinsicLowering"); + PIC.addClassToPassName("BeforeCleanupMarkerPass", "BeforeCleanup"); + PIC.addClassToPassName("AfterCleanupMarkerPass", "AfterCleanup"); + PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization"); + } + auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT { + auto PIC = std::make_unique(); + adjustPIC(*PIC); SI.registerCallbacks(*PIC); return PIC; } @@ -744,6 +789,10 @@ static llvm::Optional> parseJu // forward the callbacks to the respective passes. LLVM seems to prefer this, // and when we add the full pass builder having them directly will be helpful. void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { + auto PIC = PB.getPassInstrumentationCallbacks(); + if (PIC) { + adjustPIC(*PIC); + } PB.registerPipelineParsingCallback( [](StringRef Name, FunctionPassManager &PM, ArrayRef InnerPipeline) { diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll new file mode 100644 index 0000000000000..0ea25aa9fb0f0 --- /dev/null +++ b/test/llvmpasses/pipeline-prints.ll @@ -0,0 +1,314 @@ +; COM: This is a newpm-only test, no legacypm command +; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP +; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION + +; ModuleID = 'f' +source_filename = "f" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @julia_f_199({} addrspace(10)* noundef nonnull align 16 dereferenceable(40) %0) #0 !dbg !4 { +top: + %x = alloca {} addrspace(10)*, align 8 + %1 = call {}*** @julia.get_pgcstack() + store {} addrspace(10)* null, {} addrspace(10)** %x, align 8 + %2 = bitcast {}*** %1 to {}** + %current_task = getelementptr inbounds {}*, {}** %2, i64 -14 + %3 = bitcast {}** %current_task to i64* + %world_age = getelementptr inbounds i64, i64* %3, i64 15 + store {} addrspace(10)* %0, {} addrspace(10)** %x, align 8 + %4 = bitcast {}*** %1 to {}** + %current_task1 = getelementptr inbounds {}*, {}** %4, i64 -14 + %ptls_field = getelementptr inbounds {}*, {}** %current_task1, i64 16 + %ptls_load = load {}*, {}** %ptls_field, align 8, !tbaa !8 + %ptls = bitcast {}* %ptls_load to {}** + %5 = bitcast {}** %ptls to i64** + %6 = getelementptr inbounds i64*, i64** %5, i64 2 + %safepoint = load i64*, i64** %6, align 8, !tbaa !12, !invariant.load !7 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(i64* %safepoint), !dbg !14 + fence syncscope("singlethread") seq_cst + %7 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !15, !nonnull !7, !dereferenceable !23, !align !24 + %8 = addrspacecast {} addrspace(10)* %7 to {} addrspace(11)*, !dbg !15 + %9 = bitcast {} addrspace(11)* %8 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !15 + %10 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %9, i32 0, i32 1, !dbg !15 + %11 = load i64, i64 addrspace(11)* %10, align 8, !dbg !15, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29 + %12 = icmp sle i64 0, %11, !dbg !34 + %13 = icmp ult i64 0, %11, !dbg !42 + %14 = and i1 %12, %13, !dbg !43 + %15 = zext i1 %14 to i8, !dbg !18 + %16 = trunc i8 %15 to i1, !dbg !18 + %17 = xor i1 %16, true, !dbg !18 + br i1 %17, label %L12, label %L9, !dbg !18 + +L9: ; preds = %top + %18 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !46, !nonnull !7, !dereferenceable !23, !align !24 + %19 = addrspacecast {} addrspace(10)* %18 to {} addrspace(11)*, !dbg !46 + %20 = bitcast {} addrspace(11)* %19 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !46 + %21 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %20, i32 0, i32 0, !dbg !46 + %22 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %21, align 8, !dbg !46, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7 + %23 = bitcast i8 addrspace(13)* %22 to i64 addrspace(13)*, !dbg !46 + %24 = getelementptr inbounds i64, i64 addrspace(13)* %23, i64 0, !dbg !46 + %25 = load i64, i64 addrspace(13)* %24, align 8, !dbg !46, !tbaa !48, !alias.scope !51, !noalias !52 + br label %L13, !dbg !18 + +L12: ; preds = %top + br label %L13, !dbg !18 + +L13: ; preds = %L12, %L9 + %value_phi = phi i8 [ 0, %L9 ], [ 1, %L12 ] + %value_phi2 = phi i64 [ %25, %L9 ], [ undef, %L12 ] + %value_phi3 = phi i64 [ 2, %L9 ], [ undef, %L12 ] + br label %L17, !dbg !21 + +L17: ; preds = %L13 + %26 = trunc i8 %value_phi to i1, !dbg !22 + %27 = xor i1 %26, true, !dbg !22 + %28 = zext i1 %27 to i8, !dbg !22 + %29 = trunc i8 %28 to i1, !dbg !22 + %30 = xor i1 %29, true, !dbg !22 + br i1 %30, label %L17.L41_crit_edge, label %L17.L19_crit_edge, !dbg !22 + +L17.L41_crit_edge: ; preds = %L17 + br label %L41, !dbg !53 + +L17.L19_crit_edge: ; preds = %L17 + br label %L19, !dbg !18 + +L19: ; preds = %L17.L19_crit_edge, %L40 + %value_phi4 = phi i64 [ %value_phi2, %L17.L19_crit_edge ], [ %value_phi7, %L40 ] + %value_phi5 = phi i64 [ %value_phi3, %L17.L19_crit_edge ], [ %value_phi8, %L40 ] + %value_phi6 = phi i64 [ 0, %L17.L19_crit_edge ], [ %31, %L40 ] + %31 = add i64 %value_phi6, %value_phi4, !dbg !55 + %32 = sub i64 %value_phi5, 1, !dbg !58 + %33 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !61, !nonnull !7, !dereferenceable !23, !align !24 + %34 = addrspacecast {} addrspace(10)* %33 to {} addrspace(11)*, !dbg !61 + %35 = bitcast {} addrspace(11)* %34 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !61 + %36 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %35, i32 0, i32 1, !dbg !61 + %37 = load i64, i64 addrspace(11)* %36, align 8, !dbg !61, !tbaa !12, !range !25, !invariant.load !7, !alias.scope !26, !noalias !29 + %38 = icmp sle i64 0, %37, !dbg !62 + %39 = icmp ult i64 %32, %37, !dbg !65 + %40 = and i1 %38, %39, !dbg !66 + %41 = zext i1 %40 to i8, !dbg !53 + %42 = trunc i8 %41 to i1, !dbg !53 + %43 = xor i1 %42, true, !dbg !53 + br i1 %43, label %L34, label %L31, !dbg !53 + +L31: ; preds = %L19 + %44 = load {} addrspace(10)*, {} addrspace(10)** %x, align 8, !dbg !67, !nonnull !7, !dereferenceable !23, !align !24 + %45 = sub i64 %value_phi5, 1, !dbg !67 + %46 = mul i64 %45, 1, !dbg !67 + %47 = add i64 0, %46, !dbg !67 + %48 = addrspacecast {} addrspace(10)* %44 to {} addrspace(11)*, !dbg !67 + %49 = bitcast {} addrspace(11)* %48 to { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)*, !dbg !67 + %50 = getelementptr inbounds { i8 addrspace(13)*, i64, i16, i16, i32 }, { i8 addrspace(13)*, i64, i16, i16, i32 } addrspace(11)* %49, i32 0, i32 0, !dbg !67 + %51 = load i8 addrspace(13)*, i8 addrspace(13)* addrspace(11)* %50, align 8, !dbg !67, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7 + %52 = bitcast i8 addrspace(13)* %51 to i64 addrspace(13)*, !dbg !67 + %53 = getelementptr inbounds i64, i64 addrspace(13)* %52, i64 %47, !dbg !67 + %54 = load i64, i64 addrspace(13)* %53, align 8, !dbg !67, !tbaa !48, !alias.scope !51, !noalias !52 + %55 = add i64 %value_phi5, 1, !dbg !68 + br label %L35, !dbg !53 + +L34: ; preds = %L19 + br label %L35, !dbg !53 + +L35: ; preds = %L34, %L31 + %value_phi7 = phi i64 [ %54, %L31 ], [ undef, %L34 ] + %value_phi8 = phi i64 [ %55, %L31 ], [ undef, %L34 ] + %value_phi9 = phi i8 [ 0, %L31 ], [ 1, %L34 ] + %56 = trunc i8 %value_phi9 to i1, !dbg !54 + %57 = xor i1 %56, true, !dbg !54 + %58 = zext i1 %57 to i8, !dbg !54 + %59 = trunc i8 %58 to i1, !dbg !54 + %60 = xor i1 %59, true, !dbg !54 + br i1 %60, label %L35.L41_crit_edge, label %L40, !dbg !54 + +L35.L41_crit_edge: ; preds = %L35 + br label %L41, !dbg !53 + +L40: ; preds = %L35 + br label %L19, !dbg !18 + +L41: ; preds = %L17.L41_crit_edge, %L35.L41_crit_edge + %value_phi10 = phi i64 [ %31, %L35.L41_crit_edge ], [ 0, %L17.L41_crit_edge ] + ret i64 %value_phi10, !dbg !69 +} + +; Function Attrs: noinline optnone +define nonnull {} addrspace(10)* @jfptr_f_200({} addrspace(10)* %0, {} addrspace(10)** noalias nocapture noundef readonly %1, i32 %2) #1 { +top: + %3 = call {}*** @julia.get_pgcstack() + %4 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %1, i32 0 + %5 = load {} addrspace(10)*, {} addrspace(10)** %4, align 8, !tbaa !12, !invariant.load !7, !alias.scope !26, !noalias !29, !nonnull !7, !dereferenceable !23, !align !24 + %6 = call i64 @julia_f_199({} addrspace(10)* %5) + %7 = call nonnull {} addrspace(10)* @ijl_box_int64(i64 signext %6) + ret {} addrspace(10)* %7 +} + +declare {}*** @julia.get_pgcstack() + +declare nonnull {} addrspace(10)* @ijl_box_int64(i64 signext) + +; Function Attrs: inaccessiblemem_or_argmemonly +declare void @julia.safepoint(i64*) #2 + +attributes #0 = { "frame-pointer"="all" "probe-stack"="inline-asm" } +attributes #1 = { noinline optnone "frame-pointer"="all" "probe-stack"="inline-asm" } +attributes #2 = { inaccessiblemem_or_argmemonly } + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: GNU) +!3 = !DIFile(filename: "julia", directory: ".") +!4 = distinct !DISubprogram(name: "f", linkageName: "julia_f_199", scope: null, file: !5, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!5 = !DIFile(filename: "REPL[2]", directory: ".") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !{!9, !9, i64 0} +!9 = !{!"jtbaa_gcframe", !10, i64 0} +!10 = !{!"jtbaa", !11, i64 0} +!11 = !{!"jtbaa"} +!12 = !{!13, !13, i64 0, i64 1} +!13 = !{!"jtbaa_const", !10, i64 0} +!14 = !DILocation(line: 1, scope: !4) +!15 = !DILocation(line: 10, scope: !16, inlinedAt: !18) +!16 = distinct !DISubprogram(name: "length;", linkageName: "length", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!17 = !DIFile(filename: "essentials.jl", directory: ".") +!18 = !DILocation(line: 943, scope: !19, inlinedAt: !21) +!19 = distinct !DISubprogram(name: "iterate;", linkageName: "iterate", scope: !20, file: !20, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!20 = !DIFile(filename: "array.jl", directory: ".") +!21 = !DILocation(line: 943, scope: !19, inlinedAt: !22) +!22 = !DILocation(line: 3, scope: !4) +!23 = !{i64 40} +!24 = !{i64 16} +!25 = !{i64 0, i64 9223372036854775807} +!26 = !{!27} +!27 = !{!"jnoalias_const", !28} +!28 = !{!"jnoalias"} +!29 = !{!30, !31, !32, !33} +!30 = !{!"jnoalias_gcframe", !28} +!31 = !{!"jnoalias_stack", !28} +!32 = !{!"jnoalias_data", !28} +!33 = !{!"jnoalias_typemd", !28} +!34 = !DILocation(line: 514, scope: !35, inlinedAt: !37) +!35 = distinct !DISubprogram(name: "<=;", linkageName: "<=", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!36 = !DIFile(filename: "int.jl", directory: ".") +!37 = !DILocation(line: 423, scope: !38, inlinedAt: !40) +!38 = distinct !DISubprogram(name: ">=;", linkageName: ">=", scope: !39, file: !39, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!39 = !DIFile(filename: "operators.jl", directory: ".") +!40 = !DILocation(line: 520, scope: !41, inlinedAt: !18) +!41 = distinct !DISubprogram(name: "<;", linkageName: "<", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!42 = !DILocation(line: 513, scope: !41, inlinedAt: !40) +!43 = !DILocation(line: 38, scope: !44, inlinedAt: !40) +!44 = distinct !DISubprogram(name: "&;", linkageName: "&", scope: !45, file: !45, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!45 = !DIFile(filename: "bool.jl", directory: ".") +!46 = !DILocation(line: 13, scope: !47, inlinedAt: !18) +!47 = distinct !DISubprogram(name: "getindex;", linkageName: "getindex", scope: !17, file: !17, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!48 = !{!49, !49, i64 0} +!49 = !{!"jtbaa_arraybuf", !50, i64 0} +!50 = !{!"jtbaa_data", !10, i64 0} +!51 = !{!32} +!52 = !{!30, !31, !33, !27} +!53 = !DILocation(line: 943, scope: !19, inlinedAt: !54) +!54 = !DILocation(line: 5, scope: !4) +!55 = !DILocation(line: 87, scope: !56, inlinedAt: !57) +!56 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!57 = !DILocation(line: 4, scope: !4) +!58 = !DILocation(line: 86, scope: !59, inlinedAt: !60) +!59 = distinct !DISubprogram(name: "-;", linkageName: "-", scope: !36, file: !36, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !7) +!60 = !DILocation(line: 1068, scope: !59, inlinedAt: !53) +!61 = !DILocation(line: 10, scope: !16, inlinedAt: !53) +!62 = !DILocation(line: 514, scope: !35, inlinedAt: !63) +!63 = !DILocation(line: 423, scope: !38, inlinedAt: !64) +!64 = !DILocation(line: 520, scope: !41, inlinedAt: !53) +!65 = !DILocation(line: 513, scope: !41, inlinedAt: !64) +!66 = !DILocation(line: 38, scope: !44, inlinedAt: !64) +!67 = !DILocation(line: 13, scope: !47, inlinedAt: !53) +!68 = !DILocation(line: 87, scope: !56, inlinedAt: !53) +!69 = !DILocation(line: 6, scope: !4) + +; BEFOREEARLYSIMPLIFICATION: IR Dump Before BeforeEarlySimplification +; AFTEREARLYSIMPLIFICATION: IR Dump Before AfterEarlySimplification +; BEFOREEARLYOPTIMIZATION: IR Dump Before BeforeEarlyOptimization +; AFTEREARLYOPTIMIZATION: IR Dump Before AfterEarlyOptimization +; BEFORELOOPOPTIMIZATION: IR Dump Before BeforeLoopOptimization +; BEFORELICM: IR Dump Before BeforeLICM +; AFTERLICM: IR Dump Before AfterLICM +; BEFORELOOPSIMPLIFICATION: IR Dump Before BeforeLoopSimplification +; AFTERLOOPSIMPLIFICATION: IR Dump Before AfterLoopSimplification +; AFTERLOOPOPTIMIZATION: IR Dump Before AfterLoopOptimization +; BEFORESCALAROPTIMIZATION: IR Dump Before BeforeScalarOptimization +; AFTERSCALAROPTIMIZATION: IR Dump Before AfterScalarOptimization +; BEFOREVECTORIZATION: IR Dump Before BeforeVectorization +; AFTERVECTORIZATION: IR Dump Before AfterVectorization +; BEFOREINTRINSICLOWERING: IR Dump Before BeforeIntrinsicLowering +; AFTERINTRINSICLOWERING: IR Dump Before AfterIntrinsicLowering +; BEFORECLEANUP: IR Dump Before BeforeCleanup +; AFTERCLEANUP: IR Dump Before AfterCleanup +; AFTEROPTIMIZATION: IR Dump Before AfterOptimization + +; COM: simplifycfg should have killed this block +; BEFOREOPTIMIZATION: L17.L41_crit_edge: ; preds = %L17 +; BEFOREOPTIMIZATION-NEXT: br label %L41, !dbg !53 + +; BEFOREEARLYSIMPLIFICATION: L17.L41_crit_edge: ; preds = %L17 +; BEFOREEARLYSIMPLIFICATION-NEXT: br label %L41, !dbg !53 + +; AFTEREARLYSIMPLIFICATION-NOT: L17.L41_crit_edge: ; preds = %L17 +; AFTEREARLYSIMPLIFICATION-NOT: br label %L41, !dbg !53 + +; BEFOREEARLYOPTIMIZATION-NOT: L17.L41_crit_edge: ; preds = %L17 +; BEFOREEARLYOPTIMIZATION-NOT: br label %L41, !dbg !53 + + +; COM: InstSimplify/InstCombine should kill this zext-trunc pair +; AFTEREARLYSIMPLIFICATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8 +; AFTEREARLYSIMPLIFICATION-NEXT: trunc i8 [[ZEXT]] to i1 + +; BEFOREEARLYOPTIMIZATION: [[ZEXT:%.*]] = zext i1 {{%.*}} to i8 +; BEFOREEARLYOPTIMIZATION-NEXT: trunc i8 [[ZEXT]] to i1 + +; AFTEREARLYOPTIMIZATION-NOT: zext i1 {{%.*}} to i8 +; AFTEREARLYOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1 + +; BEFORELOOPOPTIMIZATION-NOT: zext i1 {{%.*}} to i8 +; BEFORELOOPOPTIMIZATION-NOT: trunc i8 {{%.*}} to i1 + +; COM: Loop simplification makes the exit condition obvious +; AFTERLOOPSIMPLIFICATION: L35.lr.ph: +; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw + +; COM: Scalar optimization removes the previous add from the preheader +; AFTERSCALAROPTIMIZATION: L35.preheader: +; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw +; AFTERSCALAROPTIMIZATION-NEXT: br label %L35 + +; COM: Vectorization does stuff +; AFTERVECTORIZATION: vector.body +; AFTERVECTORIZATION: llvm.vector.reduce.add + +; COM: Intrinsics are lowered and cleaned up by the time optimization is finished +; AFTEROPTIMIZATION-NOT: call void @julia.safepoint +; AFTEROPTIMIZATION: load volatile i64{{.*}}%safepoint \ No newline at end of file From 0f269668c468ff6a2876221cdb21dfd78defb68c Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 15 Jun 2023 02:07:56 -0400 Subject: [PATCH 171/290] Make sure Core.Compiler can throw kwarg mismatch errors (#50174) The _new_NamedTuple helper was in a Base-only branch, causing ``` julia> Core.eval(Core.Compiler, quote f(;a=1) = a end) f (generic function with 1 method) julia> Core.Compiler.f(;b=2) ERROR: UndefVarError: `_new_NamedTuple` not defined Stacktrace: [1] macro expansion @ Core.Compiler ./namedtuple.jl:0 [inlined] [2] structdiff(a::@NamedTuple{b::Int64}, b::Type{NamedTuple{(:a,)}}) @ Core.Compiler ./namedtuple.jl:421 [3] top-level scope @ REPL[2]:1 ``` After this change, we have the expected ``` julia> Core.eval(Core.Compiler, quote f(;a=1) = a end) f (generic function with 1 method) julia> Core.Compiler.f(;b=2) ERROR: MethodError: no method matching f(; b::Int64) Closest candidates are: f(; a) got unsupported keyword argument "b" @ Core REPL[13]:1 Stacktrace: [1] kwerr(kw::@NamedTuple{b::Int64}, args::Function) @ Core.Compiler ./error.jl:165 [2] top-level scope @ REPL[14]:1 ``` --- base/namedtuple.jl | 12 ++++++------ test/compiler/AbstractInterpreter.jl | 5 +++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/base/namedtuple.jl b/base/namedtuple.jl index 5f6bdefbefd75..e489508bc55ea 100644 --- a/base/namedtuple.jl +++ b/base/namedtuple.jl @@ -133,12 +133,6 @@ function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple} end end -# Like NamedTuple{names, T} as a constructor, but omits the additional -# `convert` call, when the types are known to match the fields -@eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple) - $(Expr(:splatnew, :T, :args)) -end - function NamedTuple{names}(nt::NamedTuple) where {names} if @generated idx = Int[ fieldindex(nt, names[n]) for n in 1:length(names) ] @@ -161,6 +155,12 @@ NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTu end # if Base +# Like NamedTuple{names, T} as a constructor, but omits the additional +# `convert` call, when the types are known to match the fields +@eval function _new_NamedTuple(T::Type{NamedTuple{NTN, NTT}} where {NTN, NTT}, args::Tuple) + $(Expr(:splatnew, :T, :args)) +end + length(t::NamedTuple) = nfields(t) iterate(t::NamedTuple, iter=1) = iter > nfields(t) ? nothing : (getfield(t, iter), iter + 1) rest(t::NamedTuple) = t diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index 0e94d42fa8866..2cac29e76098b 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -348,3 +348,8 @@ let NoinlineModule = Module() @test count(iscall((src, inlined_usually)), src.code) == 0 end end + +# Make sure that Core.Compiler has enough NamedTuple infrastructure +# to properly give error messages for basic kwargs... +Core.eval(Core.Compiler, quote f(;a=1) = a end) +@test_throws MethodError Core.Compiler.f(;b=2) From 0b87d95dea7ed3f4678ee440f5a4f881d7d5081e Mon Sep 17 00:00:00 2001 From: Steve Kelly Date: Thu, 15 Jun 2023 18:44:49 +0100 Subject: [PATCH 172/290] Use in-place operations where appropriate (#50119) This changes some uses of reverse/sort/filter to the in-place versions where appropriate. Should provide some minor memory savings. Co-authored-by: Lilith Orion Hafner Co-authored-by: Jakob Nybo Nissen --- base/binaryplatforms.jl | 10 +++++----- base/loading.jl | 2 +- base/threadingconstructs.jl | 2 +- stdlib/REPL/src/docview.jl | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl index a4935d060b74a..f96887d554af0 100644 --- a/base/binaryplatforms.jl +++ b/base/binaryplatforms.jl @@ -186,7 +186,7 @@ end function Base.show(io::IO, p::Platform) str = string(platform_name(p), " ", arch(p)) # Add on all the other tags not covered by os/arch: - other_tags = sort(collect(filter(kv -> kv[1] ∉ ("os", "arch"), tags(p)))) + other_tags = sort!(filter!(kv -> kv[1] ∉ ("os", "arch"), collect(tags(p)))) if !isempty(other_tags) str = string(str, " {", join([string(k, "=", v) for (k, v) in other_tags], ", "), "}") end @@ -835,7 +835,7 @@ Inspects the current Julia process to determine the libgfortran version this Jul linked against (if any). """ function detect_libgfortran_version() - libgfortran_paths = filter(x -> occursin("libgfortran", x), Libdl.dllist()) + libgfortran_paths = filter!(x -> occursin("libgfortran", x), Libdl.dllist()) if isempty(libgfortran_paths) # One day, I hope to not be linking against libgfortran in base Julia return nothing @@ -865,7 +865,7 @@ it is linked against (if any). `max_minor_version` is the latest version in the 3.4 series of GLIBCXX where the search is performed. """ function detect_libstdcxx_version(max_minor_version::Int=30) - libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist()) + libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist()) if isempty(libstdcxx_paths) # This can happen if we were built by clang, so we don't link against # libstdc++ at all. @@ -897,7 +897,7 @@ between Julia and LLVM; they must match. """ function detect_cxxstring_abi() # First, if we're not linked against libstdc++, then early-exit because this doesn't matter. - libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist()) + libstdcxx_paths = filter!(x -> occursin("libstdc++", x), Libdl.dllist()) if isempty(libstdcxx_paths) # We were probably built by `clang`; we don't link against `libstdc++`` at all. return nothing @@ -1080,7 +1080,7 @@ function select_platform(download_info::Dict, platform::AbstractPlatform = HostP # We prefer these better matches, and secondarily reverse-sort by triplet so # as to generally choose the latest release (e.g. a `libgfortran5` tarball # over a `libgfortran3` tarball). - ps = sort(ps, lt = (a, b) -> begin + sort!(ps, lt = (a, b) -> begin loss_a = match_loss(a, platform) loss_b = match_loss(b, platform) if loss_a != loss_b diff --git a/base/loading.jl b/base/loading.jl index db2a09752a2f1..b345293e3bafc 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2718,7 +2718,7 @@ end function get_preferences(uuid::Union{UUID,Nothing} = nothing) merged_prefs = Dict{String,Any}() - for env in reverse(load_path()) + for env in reverse!(load_path()) project_toml = env_project_file(env) if !isa(project_toml, String) continue diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index a59c0046dd07b..14368042c33cb 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -141,7 +141,7 @@ function threading_run(fun, static) Base._wait(tasks[i]) end ccall(:jl_exit_threaded_region, Cvoid, ()) - failed_tasks = filter(istaskfailed, tasks) + failed_tasks = filter!(istaskfailed, tasks) if !isempty(failed_tasks) throw(CompositeException(map(TaskFailedException, failed_tasks))) end diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl index db28c84b07cb6..b9797dee910c2 100644 --- a/stdlib/REPL/src/docview.jl +++ b/stdlib/REPL/src/docview.jl @@ -743,7 +743,7 @@ function doc_completions(name, mod::Module=Main) idxs = findall(!isnothing, ms) # avoid messing up the order while inserting - for i in reverse(idxs) + for i in reverse!(idxs) c = only((ms[i]::AbstractMatch).captures) insert!(res, i, "$(c)\"\"") end From 9d1ac97e82fbe2245215d71052201124aa7e5c94 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 15 Jun 2023 14:14:45 -0400 Subject: [PATCH 173/290] Allow unquoted symbols for threadpool in `Threads.@spawn` (#50182) Co-authored-by: Julian Samaroo --- base/threadingconstructs.jl | 32 +++++++++++++++++++++----------- test/threadpool_use.jl | 6 ++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index 14368042c33cb..0854048e6b96c 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -59,11 +59,23 @@ function _nthreads_in_pool(tpid::Int8) end function _tpid_to_sym(tpid::Int8) - return tpid == 0 ? :interactive : :default + if tpid == 0 + return :interactive + elseif tpid == 1 + return :default + else + throw(ArgumentError("Unrecognized threadpool id $tpid")) + end end function _sym_to_tpid(tp::Symbol) - return tp === :interactive ? Int8(0) : Int8(1) + if tp === :interactive + return Int8(0) + elseif tp === :default + return Int8(1) + else + throw(ArgumentError("Unrecognized threadpool name `$(repr(tp))`")) + end end """ @@ -386,20 +398,18 @@ Hello from 4 ``` """ macro spawn(args...) - tp = :default + tp = QuoteNode(:default) na = length(args) if na == 2 ttype, ex = args if ttype isa QuoteNode ttype = ttype.value - elseif ttype isa Symbol - # TODO: allow unquoted symbols - ttype = nothing - end - if ttype === :interactive || ttype === :default - tp = ttype + if ttype !== :interactive && ttype !== :default + throw(ArgumentError("unsupported threadpool in @spawn: $ttype")) + end + tp = QuoteNode(ttype) else - throw(ArgumentError("unsupported threadpool in @spawn: $ttype")) + tp = ttype end elseif na == 1 ex = args[1] @@ -415,7 +425,7 @@ macro spawn(args...) let $(letargs...) local task = Task($thunk) task.sticky = false - _spawn_set_thrpool(task, $(QuoteNode(tp))) + _spawn_set_thrpool(task, $(esc(tp))) if $(Expr(:islocal, var)) put!($var, task) end diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl index e5ea5f95cf4ff..7523991fdf6a7 100644 --- a/test/threadpool_use.jl +++ b/test/threadpool_use.jl @@ -9,5 +9,11 @@ using Base.Threads @test fetch(Threads.@spawn Threads.threadpool()) === :default @test fetch(Threads.@spawn :default Threads.threadpool()) === :default @test fetch(Threads.@spawn :interactive Threads.threadpool()) === :interactive +tp = :default +@test fetch(Threads.@spawn tp Threads.threadpool()) === :default +tp = :interactive +@test fetch(Threads.@spawn tp Threads.threadpool()) === :interactive +tp = :foo +@test_throws ArgumentError Threads.@spawn tp Threads.threadpool() @test Threads.threadpooltids(:interactive) == [1] @test Threads.threadpooltids(:default) == [2] From 0aa93e36b92be5551ded9c90e20d835f144efd85 Mon Sep 17 00:00:00 2001 From: Igor Proskurin <105472369+sciprosk@users.noreply.github.com> Date: Thu, 15 Jun 2023 13:56:20 -0700 Subject: [PATCH 174/290] fix missing punctuation in docs (#50126) * fix missing punctuation * Update types.md --------- Co-authored-by: Viral B. Shah --- doc/src/manual/types.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md index 430a006c67788..3510dfe7a7042 100644 --- a/doc/src/manual/types.md +++ b/doc/src/manual/types.md @@ -57,9 +57,9 @@ kinds of programming, however, become clearer, simpler, faster and more robust w The `::` operator can be used to attach type annotations to expressions and variables in programs. There are two primary reasons to do this: -1. As an assertion to help confirm that your program works the way you expect, +1. As an assertion to help confirm that your program works the way you expect, and 2. To provide extra type information to the compiler, which can then improve performance in some - cases + cases. When appended to an expression computing a value, the `::` operator is read as "is an instance of". It can be used anywhere to assert that the value of the expression on the left is an instance From 432f300eea6bd65d4575fe9ae0969e6ebbc208c7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 15 Jun 2023 18:41:38 -0400 Subject: [PATCH 175/290] Remove xsaves from Zen cpuspec (#50186) --- src/processor_x86.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp index 30a6ff9b3dede..e129b1239c7df 100644 --- a/src/processor_x86.cpp +++ b/src/processor_x86.cpp @@ -227,8 +227,11 @@ constexpr auto bdver2 = bdver1 | get_feature_masks(f16c, bmi, tbm, fma); constexpr auto bdver3 = bdver2 | get_feature_masks(xsaveopt, fsgsbase); constexpr auto bdver4 = bdver3 | get_feature_masks(avx2, bmi2, mwaitx, movbe, rdrnd); +// technically xsaves is part of znver1, znver2, and znver3 +// Disabled due to Erratum 1386 +// See: https://github.com/JuliaLang/julia/issues/50102 constexpr auto znver1 = haswell | get_feature_masks(adx, aes, clflushopt, clzero, mwaitx, prfchw, - rdseed, sha, sse4a, xsavec, xsaves); + rdseed, sha, sse4a, xsavec); constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd); constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq); From 71c51156b1090176afff781ec7457db2e568f2f9 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 16 Jun 2023 08:47:59 +0200 Subject: [PATCH 176/290] Export jl_method_set_source. (#49236) This makes it possible to experiment with an OpaqueClosure-like API, which uses Method objects, in the GPU stack. --- src/method.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/method.c b/src/method.c index ac381994ef23d..06a05361a927d 100644 --- a/src/method.c +++ b/src/method.c @@ -665,7 +665,7 @@ jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_s return new_linfo; } -static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) +JL_DLLEXPORT void jl_method_set_source(jl_method_t *m, jl_code_info_t *src) { uint8_t j; uint8_t called = 0; From 631d187b741ae0f391ba1bd067c32382f3122473 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 16 Jun 2023 10:33:32 -0300 Subject: [PATCH 177/290] codegen: pass the pgcstack as an argument to specsig calls (#50093) The safepoint at function entry made it so that every function call did a relatively expensive load from the PTLS, we can instead pass the PTLS as an argument to functions making it significantly cheaper. Also use the swift calling conventions, that together with the `swiftself` attribute makes it so it's very likely the argument is kept in a register between calls. Fixes: https://github.com/JuliaLang/julia/issues/50068 --- base/reflection.jl | 3 ++ src/codegen.cpp | 62 +++++++++++++++++++------ src/julia.h | 1 + src/llvm-ptls.cpp | 13 ++++++ stdlib/InteractiveUtils/src/codeview.jl | 2 +- test/compiler/codegen.jl | 2 +- test/llvmpasses/fastmath.jl | 2 +- test/llvmpasses/llvmcall.jl | 2 +- test/llvmpasses/loopinfo.jl | 34 +++++++------- test/llvmpasses/pipeline-o0.jl | 1 - 10 files changed, 87 insertions(+), 35 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index bcfc39d2bd3a8..96b7a832cc575 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -1194,6 +1194,7 @@ struct CodegenParams gnu_pubnames::Cint debug_info_kind::Cint safepoint_on_entry::Cint + gcstack_arg::Cint lookup::Ptr{Cvoid} @@ -1203,6 +1204,7 @@ struct CodegenParams prefer_specsig::Bool=false, gnu_pubnames=true, debug_info_kind::Cint = default_debug_info_kind(), safepoint_on_entry::Bool=true, + gcstack_arg::Bool=true, lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})), generic_context = nothing) return new( @@ -1210,6 +1212,7 @@ struct CodegenParams Cint(prefer_specsig), Cint(gnu_pubnames), debug_info_kind, Cint(safepoint_on_entry), + Cint(gcstack_arg), lookup, generic_context) end end diff --git a/src/codegen.cpp b/src/codegen.cpp index 26304c7350c5c..37281ed3038ec 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1296,6 +1296,7 @@ extern "C" { #endif (int) DICompileUnit::DebugEmissionKind::FullDebug, 1, + 1, jl_rettype_inferred_addr, NULL }; } @@ -1719,7 +1720,7 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) { } static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL); -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure); +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg); static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1); static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s, jl_binding_t **pbnd, bool assign); @@ -4107,7 +4108,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos { ++EmittedSpecfunCalls; // emit specialized call site - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure); + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); *cc = returninfo.cc; *return_roots = returninfo.return_roots; @@ -4141,7 +4143,10 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos argvals[idx] = return_roots; idx++; } - + if (gcstack_arg) { + argvals[idx] = ctx.pgcstack; + idx++; + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(specTypes, i); // n.b.: specTypes is required to be a datatype by construction for specsig @@ -4205,6 +4210,8 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos } CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; switch (returninfo.cc) { @@ -5273,7 +5280,7 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); if (specF) { jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, NULL, - closure_decls.specFunctionObject, sigtype, rettype, true); + closure_decls.specFunctionObject, sigtype, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); specF = cast(returninfo.decl.getCallee()); } } @@ -5786,13 +5793,15 @@ static void emit_cfunc_invalidate( DebugLoc noDbg; ctx.builder.SetCurrentDebugLocation(noDbg); allocate_gc_frame(ctx, b0); - Function::arg_iterator AI = gf_thunk->arg_begin(); SmallVector myargs(nargs); if (cc == jl_returninfo_t::SRet || cc == jl_returninfo_t::Union) ++AI; if (return_roots) ++AI; + if (JL_FEAT_TEST(ctx,gcstack_arg)){ + ++AI; // gcstack_arg + } for (size_t i = 0; i < nargs; i++) { jl_value_t *jt = jl_nth_slot_type(calltype, i); // n.b. specTypes is required to be a datatype by construction for specsig @@ -6258,8 +6267,9 @@ static Function* gen_cfun_wrapper( bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; assert(calltype == 3); // emit a specsig call + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst); - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet); @@ -6286,6 +6296,8 @@ static Function* gen_cfun_wrapper( AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots)); args.push_back(return_roots); } + if (gcstack_arg) + args.push_back(ctx.pgcstack); for (size_t i = 0; i < nargs + 1; i++) { // figure out how to repack the arguments jl_cgval_t &inputarg = inputargs[i]; @@ -6332,11 +6344,15 @@ static Function* gen_cfun_wrapper( emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context); theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk); } + assert(cast(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType())); CallInst *call = ctx.builder.CreateCall( returninfo.decl.getFunctionType(), theFptr, ArrayRef(args)); call->setAttributes(returninfo.attrs); + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); + switch (returninfo.cc) { case jl_returninfo_t::Boxed: retval = mark_julia_type(ctx, call, true, astrt); @@ -6710,7 +6726,11 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret args[idx] = return_roots; idx++; } - + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + if (gcstack_arg) { + args[idx] = ctx.pgcstack; + idx++; + } bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) { jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : @@ -6748,7 +6768,8 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } CallInst *call = ctx.builder.CreateCall(f.decl, args); call->setAttributes(f.attrs); - + if (gcstack_arg) + call->setCallingConv(CallingConv::Swift); jl_cgval_t retval; if (retarg != -1) { Value *theArg; @@ -6790,7 +6811,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret return w; } -static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure) +static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg) { jl_returninfo_t props = {}; SmallVector fsig; @@ -6875,6 +6896,14 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0)); } + if (gcstack_arg){ + AttrBuilder param(ctx.builder.getContext()); + param.addAttribute(Attribute::SwiftSelf); + param.addAttribute(Attribute::NonNull); + attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); + fsig.push_back(PointerType::get(JuliaType::get_ppjlvalue_ty(ctx.builder.getContext()), 0)); + } + for (size_t i = 0; i < jl_nparams(sig); i++) { jl_value_t *jt = jl_tparam(sig, i); bool isboxed = false; @@ -6936,7 +6965,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value else fval = emit_bitcast(ctx, fval, ftype->getPointerTo()); } - + if (gcstack_arg && isa(fval)) + cast(fval)->setCallingConv(CallingConv::Swift); props.decl = FunctionCallee(ftype, fval); props.attrs = attributes; return props; @@ -7163,7 +7193,8 @@ static jl_llvm_functions_t Function *f = NULL; bool has_sret = false; if (specsig) { // assumes !va and !needsparams - returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure); + returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes, + jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg)); f = cast(returninfo.decl.getCallee()); has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union); jl_init_function(f, ctx.emission_context.TargetTriple); @@ -7348,7 +7379,6 @@ static jl_llvm_functions_t ctx.spvals_ptr = &*AI++; } } - // step 6. set up GC frame allocate_gc_frame(ctx, b0); Value *last_age = NULL; @@ -7554,6 +7584,12 @@ static jl_llvm_functions_t param.addAlignmentAttr(Align(sizeof(jl_value_t*))); attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); // function declaration attributes } + if (specsig && JL_FEAT_TEST(ctx, gcstack_arg)){ + Argument *Arg = &*AI; + ++AI; + AttrBuilder param(ctx.builder.getContext()); + attrs.at(Arg->getArgNo()) = AttributeSet::get(Arg->getContext(), param); + } for (i = 0; i < nreq; i++) { jl_sym_t *s = slot_symbol(ctx, i); jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i); @@ -8564,7 +8600,7 @@ static jl_llvm_functions_t jl_emit_oc_wrapper(orc::ThreadSafeModule &m, jl_codeg jl_llvm_functions_t declarations; declarations.functionObject = "jl_f_opaque_closure_call"; if (uses_specsig(mi->specTypes, false, true, rettype, true)) { - jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, 1); + jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, funcName, mi->specTypes, rettype, true, JL_FEAT_TEST(ctx,gcstack_arg)); Function *gf_thunk = cast(returninfo.decl.getCallee()); jl_init_function(gf_thunk, ctx.emission_context.TargetTriple); size_t nrealargs = jl_nparams(mi->specTypes); diff --git a/src/julia.h b/src/julia.h index 2140b0ad0ab90..694a8d81b06e9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2344,6 +2344,7 @@ typedef struct { // limited, standalone int safepoint_on_entry; // Emit a safepoint on entry to each function + int gcstack_arg; // Pass the ptls value as an argument with swiftself // Cache access. Default: jl_rettype_inferred. jl_codeinstance_lookup_t lookup; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 84f8d7121ff03..f69078433941f 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -314,6 +314,19 @@ bool LowerPTLS::run(bool *CFGModified) for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) { auto call = cast(*it); ++it; + auto f = call->getCaller(); + Value *pgcstack = NULL; + for (Function::arg_iterator arg = f->arg_begin(); arg != f->arg_end();++arg) { + if (arg->hasSwiftSelfAttr()){ + pgcstack = &*arg; + break; + } + } + if (pgcstack) { + call->replaceAllUsesWith(pgcstack); + call->eraseFromParent(); + continue; + } assert(call->getCalledOperand() == pgcstack_getter); fix_pgcstack_use(call, pgcstack_getter, or_new, CFGModified); } diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl index 9ce5be9706bac..646028575d052 100644 --- a/stdlib/InteractiveUtils/src/codeview.jl +++ b/stdlib/InteractiveUtils/src/codeview.jl @@ -172,7 +172,7 @@ function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrappe raw::Bool, dump_module::Bool, syntax::Symbol, optimize::Bool, debuginfo::Symbol, binary::Bool) params = CodegenParams(debug_info_kind=Cint(0), - safepoint_on_entry=raw) + safepoint_on_entry=raw, gcstack_arg=raw) _dump_function(f, t, native, wrapper, raw, dump_module, syntax, optimize, debuginfo, binary, params) end diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index c29f82bfd6008..e93ecd232498f 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -17,7 +17,7 @@ end # The tests below assume a certain format and safepoint_on_entry=true breaks that. function get_llvm(@nospecialize(f), @nospecialize(t), raw=true, dump_module=false, optimize=true) - params = Base.CodegenParams(safepoint_on_entry=false) + params = Base.CodegenParams(safepoint_on_entry=false, gcstack_arg = false) d = InteractiveUtils._dump_function(f, t, false, false, raw, dump_module, :att, optimize, :none, false, params) sprint(print, d) end diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl index 76b048c19a2a0..7338d1c3ccc5a 100644 --- a/test/llvmpasses/fastmath.jl +++ b/test/llvmpasses/fastmath.jl @@ -14,7 +14,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) import Base.FastMath -# CHECK: call fast float @llvm.sqrt.f32(float %0) +# CHECK: call fast float @llvm.sqrt.f32(float %{{[0-9]+}}) emit(FastMath.sqrt_fast, Float32) diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index 687abe0a8cd46..a55201c3e3bc3 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3}) # CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}}) emit(foo, Foo) -# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half] +# CHECK: define {{(swiftcc )?}}<2 x half> @julia_bar_{{[0-9]+}}( emit(bar, NTuple{2, Float16}) diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index c970e07f8a125..18661ea6fde67 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -64,10 +64,10 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO3:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID3:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration # FINAL: br end end @@ -90,17 +90,17 @@ end # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO4:![0-9]+]] # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID4:![0-9]+]] -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end end @@ -111,8 +111,8 @@ end 1 <= j <= I && continue @show (i,j) iteration(i) -# FINAL: call void @j_iteration -# FINAL-NOT: call void @j_iteration +# FINAL: call {{(swiftcc )?}}void @j_iteration +# FINAL-NOT: call {{(swiftcc )?}}void @j_iteration end $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),))) end diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl index 1b5d1df3c9f36..1075d126c59ca 100644 --- a/test/llvmpasses/pipeline-o0.jl +++ b/test/llvmpasses/pipeline-o0.jl @@ -9,7 +9,6 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl")) # CHECK-LABEL: @julia_simple # CHECK-NOT: julia.get_pgcstack -# CHECK: asm # CHECK-NOT: julia.gc_alloc_obj # CHECK: ijl_gc_pool_alloc # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes From c5b0a6cbf54e0a70ecf537984bd6063a1135b04d Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Fri, 16 Jun 2023 16:32:46 -0400 Subject: [PATCH 178/290] pidlock cache file precompilation (#49052) --- base/loading.jl | 42 +++++++++++++++++++++++-- stdlib/FileWatching/docs/src/index.md | 1 + stdlib/FileWatching/src/FileWatching.jl | 10 ++++-- stdlib/FileWatching/src/pidfile.jl | 32 +++++++++++++++++-- stdlib/FileWatching/test/pidfile.jl | 8 ++--- 5 files changed, 82 insertions(+), 11 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index b345293e3bafc..f5c7aa28395ef 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1902,8 +1902,17 @@ function _require(pkg::PkgId, env=nothing) @goto load_from_cache end # spawn off a new incremental pre-compile task for recursive `require` calls - cachefile = compilecache(pkg, path) - if isa(cachefile, Exception) + cachefile_or_module = maybe_cachefile_lock(pkg, path) do + # double-check now that we have lock + m = _require_search_from_serialized(pkg, path, UInt128(0)) + m isa Module && return m + compilecache(pkg, path) + end + cachefile_or_module isa Module && return cachefile_or_module::Module + cachefile = cachefile_or_module + if isnothing(cachefile) # maybe_cachefile_lock returns nothing if it had to wait for another process + @goto load_from_cache # the new cachefile will have the newest mtime so will come first in the search + elseif isa(cachefile, Exception) if precompilableerror(cachefile) verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug @logmsg verbosity "Skipping precompilation since __precompile__(false). Importing $pkg." @@ -2805,6 +2814,35 @@ function show(io::IO, cf::CacheFlags) print(io, ", opt_level = ", cf.opt_level) end +# Set by FileWatching.__init__() +global mkpidlock_hook +global trymkpidlock_hook +global parse_pidfile_hook + +# allows processes to wait if another process is precompiling a given source already +function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String) + if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook) + pidfile = string(srcpath, ".pidlock") + cachefile = invokelatest(trymkpidlock_hook, f, pidfile) + if cachefile === false + pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile) + verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug + if isempty(hostname) || hostname == gethostname() + @logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg" + else + @logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg" + end + # wait until the lock is available, but don't actually acquire it + # returning nothing indicates a process waited for another + return invokelatest(mkpidlock_hook, Returns(nothing), pidfile) + end + return cachefile + else + # for packages loaded before FileWatching.__init__() + f() + end +end + # returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey # otherwise returns the list of dependencies to also check @constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false) diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md index 6c332511f578f..a420d49232345 100644 --- a/stdlib/FileWatching/docs/src/index.md +++ b/stdlib/FileWatching/docs/src/index.md @@ -20,6 +20,7 @@ A simple utility tool for creating advisory pidfiles (lock files). ```@docs mkpidlock +trymkpidlock close(lock::LockMonitor) ``` diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index 17ae24460db6b..2a654547ae6e3 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -18,7 +18,8 @@ export PollingFileWatcher, FDWatcher, # pidfile: - mkpidlock + mkpidlock, + trymkpidlock import Base: @handle_as, wait, close, eventloop, notify_error, IOError, _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError, @@ -462,6 +463,11 @@ function __init__() global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid})) global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32)) + + Base.mkpidlock_hook = mkpidlock + Base.trymkpidlock_hook = trymkpidlock + Base.parse_pidfile_hook = Pidfile.parse_pidfile + nothing end @@ -885,6 +891,6 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R end include("pidfile.jl") -import .Pidfile: mkpidlock +import .Pidfile: mkpidlock, trymkpidlock end diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl index b78f7ef070018..6d40414e20db2 100644 --- a/stdlib/FileWatching/src/pidfile.jl +++ b/stdlib/FileWatching/src/pidfile.jl @@ -1,7 +1,7 @@ module Pidfile -export mkpidlock +export mkpidlock, trymkpidlock using Base: IOError, UV_EEXIST, UV_ESRCH, @@ -41,6 +41,16 @@ Optional keyword arguments: """ function mkpidlock end +""" + trymkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...) + +Like `mkpidlock` except returns `false` instead of waiting if the file is already locked. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. + +""" +function trymkpidlock end # mutable only because we want to add a finalizer mutable struct LockMonitor @@ -95,6 +105,18 @@ function mkpidlock(at::String, proc::Process; kwopts...) return lock end +function trymkpidlock(args...; kwargs...) + try + mkpidlock(args...; kwargs..., wait=false) + catch ex + if ex isa PidlockedError + return false + else + rethrow() + end + end +end + """ Base.touch(::Pidfile.LockMonitor) @@ -192,8 +214,12 @@ function tryopen_exclusive(path::String, mode::Integer = 0o444) return nothing end +struct PidlockedError <: Exception + msg::AbstractString +end + """ - open_exclusive(path::String; mode, poll_interval, stale_age) :: File + open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File Create a new a file for read-write advisory-exclusive access. If `wait` is `false` then error out if the lock files exist @@ -218,7 +244,7 @@ function open_exclusive(path::String; file = tryopen_exclusive(path, mode) end if file === nothing - error("Failed to get pidfile lock for $(repr(path)).") + throw(PidlockedError("Failed to get pidfile lock for $(repr(path)).")) else return file end diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl index 94621f6af78e3..c2cb0c88a1b1e 100644 --- a/stdlib/FileWatching/test/pidfile.jl +++ b/stdlib/FileWatching/test/pidfile.jl @@ -180,14 +180,14 @@ end Base.errormonitor(rmtask) t1 = time() - @test_throws ErrorException open_exclusive("pidfile", wait=false) + @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false) @test time()-t1 ≈ 0 atol=1 sleep(1) @test !deleted t1 = time() - @test_throws ErrorException open_exclusive("pidfile", wait=false) + @test_throws Pidfile.PidlockedError open_exclusive("pidfile", wait=false) @test time()-t1 ≈ 0 atol=1 wait(rmtask) @@ -246,7 +246,7 @@ end Base.errormonitor(waittask) # mkpidlock with no waiting - t = @elapsed @test_throws ErrorException mkpidlock("pidfile", wait=false) + t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile", wait=false) @test t ≈ 0 atol=1 t = @elapsed lockf1 = mkpidlock(joinpath(dir, "pidfile")) @@ -354,7 +354,7 @@ end @test lockf.update === nothing sleep(1) - t = @elapsed @test_throws ErrorException mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0) + t = @elapsed @test_throws Pidfile.PidlockedError mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0) @test t ≈ 0 atol=1 sleep(5) From ba251e8d551dfa0b4d8a86932e06b4d1162d8b9f Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Fri, 16 Jun 2023 15:49:24 -0500 Subject: [PATCH 179/290] Fix sorting bugs (esp `MissingOptimization`) that come up when using SortingAlgorithms.TimSort (#50171) --- base/sort.jl | 28 ++++++++++++++-------------- test/sorting.jl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/base/sort.jl b/base/sort.jl index 99f2ed3e1aeb8..90f8755d3b1a4 100644 --- a/base/sort.jl +++ b/base/sort.jl @@ -44,6 +44,7 @@ export # not exported by Base SMALL_ALGORITHM, SMALL_THRESHOLD +abstract type Algorithm end ## functions requiring only ordering ## @@ -436,7 +437,7 @@ for (sym, exp, type) in [ (:mn, :(throw(ArgumentError("mn is needed but has not been computed"))), :(eltype(v))), (:mx, :(throw(ArgumentError("mx is needed but has not been computed"))), :(eltype(v))), (:scratch, nothing, :(Union{Nothing, Vector})), # could have different eltype - (:allow_legacy_dispatch, true, Bool)] + (:legacy_dispatch_entry, nothing, Union{Nothing, Algorithm})] usym = Symbol(:_, sym) @eval function $usym(v, o, kw) # using missing instead of nothing because scratch could === nothing. @@ -499,8 +500,6 @@ internal or recursive calls. """ function _sort! end -abstract type Algorithm end - """ MissingOptimization(next) <: Algorithm @@ -524,12 +523,12 @@ struct WithoutMissingVector{T, U} <: AbstractVector{T} new{nonmissingtype(eltype(data)), typeof(data)}(data) end end -Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i) +Base.@propagate_inbounds function Base.getindex(v::WithoutMissingVector, i::Integer) out = v.data[i] @assert !(out isa Missing) out::eltype(v) end -Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i) +Base.@propagate_inbounds function Base.setindex!(v::WithoutMissingVector, x, i::Integer) v.data[i] = x v end @@ -590,8 +589,9 @@ function _sort!(v::AbstractVector, a::MissingOptimization, o::Ordering, kw) # we can assume v is equal to eachindex(o.data) which allows a copying partition # without allocations. lo_i, hi_i = lo, hi - for i in eachindex(o.data) # equal to copy(v) - x = o.data[i] + cv = eachindex(o.data) # equal to copy(v) + for i in lo:hi + x = o.data[cv[i]] if ismissing(x) == (o.order == Reverse) # should x go at the beginning/end? v[lo_i] = i lo_i += 1 @@ -2149,25 +2149,25 @@ end # Support 3-, 5-, and 6-argument versions of sort! for calling into the internals in the old way sort!(v::AbstractVector, a::Algorithm, o::Ordering) = sort!(v, firstindex(v), lastindex(v), a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering) - _sort!(v, a, o, (; lo, hi, allow_legacy_dispatch=false)) + _sort!(v, a, o, (; lo, hi, legacy_dispatch_entry=a)) v end sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, _) = sort!(v, lo, hi, a, o) function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::Algorithm, o::Ordering, scratch::Vector) - _sort!(v, a, o, (; lo, hi, scratch, allow_legacy_dispatch=false)) + _sort!(v, a, o, (; lo, hi, scratch, legacy_dispatch_entry=a)) v end # Support dispatch on custom algorithms in the old way # sort!(::AbstractVector, ::Integer, ::Integer, ::MyCustomAlgorithm, ::Ordering) = ... function _sort!(v::AbstractVector, a::Algorithm, o::Ordering, kw) - @getkw lo hi scratch allow_legacy_dispatch - if allow_legacy_dispatch + @getkw lo hi scratch legacy_dispatch_entry + if legacy_dispatch_entry === a + # This error prevents infinite recursion for unknown algorithms + throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o)), ::Any) is not defined")) + else sort!(v, lo, hi, a, o) scratch - else - # This error prevents infinite recursion for unknown algorithms - throw(ArgumentError("Base.Sort._sort!(::$(typeof(v)), ::$(typeof(a)), ::$(typeof(o))) is not defined")) end end diff --git a/test/sorting.jl b/test/sorting.jl index cf98182307088..147a70a5db7d9 100644 --- a/test/sorting.jl +++ b/test/sorting.jl @@ -1025,6 +1025,46 @@ Base.similar(A::MyArray49392, ::Type{T}, dims::Dims{N}) where {T, N} = MyArray49 @test all(sort!(y, dims=2) .== sort!(x,dims=2)) end +@testset "MissingOptimization fastpath for Perm ordering when lo:hi ≠ eachindex(v)" begin + v = [rand() < .5 ? missing : rand() for _ in 1:100] + ix = collect(1:100) + sort!(ix, 1, 10, Base.Sort.DEFAULT_STABLE, Base.Order.Perm(Base.Order.Forward, v)) + @test issorted(v[ix[1:10]]) +end + +struct NonScalarIndexingOfWithoutMissingVectorAlg <: Base.Sort.Algorithm end +function Base.Sort._sort!(v::AbstractVector, ::NonScalarIndexingOfWithoutMissingVectorAlg, o::Base.Order.Ordering, kw) + Base.Sort.@getkw lo hi + first_half = v[lo:lo+(hi-lo)÷2] + second_half = v[lo+(hi-lo)÷2+1:hi] + whole = v[lo:hi] + all(vcat(first_half, second_half) .=== whole) || error() + out = Base.Sort._sort!(whole, Base.Sort.DEFAULT_STABLE, o, (;kw..., lo=1, hi=length(whole))) + v[lo:hi] .= whole + out +end + +@testset "Non-scaler indexing of WithoutMissingVector" begin + @testset "Unit test" begin + wmv = Base.Sort.WithoutMissingVector(Union{Missing, Int}[1, 7, 2, 9]) + @test wmv[[1, 3]] == [1, 2] + @test wmv[1:3] == [1, 7, 2] + end + @testset "End to end" begin + alg = Base.Sort.InitialOptimizations(NonScalarIndexingOfWithoutMissingVectorAlg()) + @test issorted(sort(rand(100); alg)) + @test issorted(sort([rand() < .5 ? missing : randstring() for _ in 1:100]; alg)) + end +end + +struct DispatchLoopTestAlg <: Base.Sort.Algorithm end +function Base.sort!(v::AbstractVector, lo::Integer, hi::Integer, ::DispatchLoopTestAlg, order::Base.Order.Ordering) + sort!(view(v, lo:hi); order) +end +@testset "Support dispatch from the old style to the new style and back" begin + @test issorted(sort!(rand(100), Base.Sort.InitialOptimizations(DispatchLoopTestAlg()), Base.Order.Forward)) +end + # This testset is at the end of the file because it is slow. @testset "searchsorted" begin numTypes = [ Int8, Int16, Int32, Int64, Int128, From 1983b8d734bfbd9a95a51d49771ae520ab001749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 12 Jun 2023 20:46:41 +0100 Subject: [PATCH 180/290] Make `deps/llvm.mk` `USECCACHE`-friendly --- deps/llvm.mk | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deps/llvm.mk b/deps/llvm.mk index 83b9a66ec608e..d40cc44834643 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -73,6 +73,15 @@ LLVM_CPPFLAGS := LLVM_LDFLAGS := LLVM_CMAKE := +ifeq ($(USECCACHE), 1) +# When USECCACHE is set to 1 we can't use `CC` and `CXX` as compilers because they include +# `ccache` at the beginning and CMake would think that's the actual compiler, but we can use +# `CC_ARG`/`CXX_ARG` in their place. +LLVM_CMAKE += -DCMAKE_C_COMPILER_LAUNCHER=ccache +LLVM_CMAKE += -DCMAKE_C_COMPILER="$(CC_ARG)" +LLVM_CMAKE += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache +LLVM_CMAKE += -DCMAKE_CXX_COMPILER="$(CXX_ARG)" +endif LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)" LLVM_CMAKE += -DLLVM_EXTERNAL_PROJECTS="$(LLVM_EXTERNAL_PROJECTS)" LLVM_CMAKE += -DLLVM_ENABLE_RUNTIMES="$(LLVM_ENABLE_RUNTIMES)" From b1aad4d9edcebdeffba49a5601bf31f0d970c4f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Fri, 16 Jun 2023 22:47:50 +0100 Subject: [PATCH 181/290] Deal with `USECCACHE` in `CMAKE_COMMON` --- deps/llvm.mk | 9 --------- deps/tools/common.mk | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/deps/llvm.mk b/deps/llvm.mk index d40cc44834643..83b9a66ec608e 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -73,15 +73,6 @@ LLVM_CPPFLAGS := LLVM_LDFLAGS := LLVM_CMAKE := -ifeq ($(USECCACHE), 1) -# When USECCACHE is set to 1 we can't use `CC` and `CXX` as compilers because they include -# `ccache` at the beginning and CMake would think that's the actual compiler, but we can use -# `CC_ARG`/`CXX_ARG` in their place. -LLVM_CMAKE += -DCMAKE_C_COMPILER_LAUNCHER=ccache -LLVM_CMAKE += -DCMAKE_C_COMPILER="$(CC_ARG)" -LLVM_CMAKE += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -LLVM_CMAKE += -DCMAKE_CXX_COMPILER="$(CXX_ARG)" -endif LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)" LLVM_CMAKE += -DLLVM_EXTERNAL_PROJECTS="$(LLVM_EXTERNAL_PROJECTS)" LLVM_CMAKE += -DLLVM_ENABLE_RUNTIMES="$(LLVM_ENABLE_RUNTIMES)" diff --git a/deps/tools/common.mk b/deps/tools/common.mk index c19886114c14e..be87e5585e67d 100644 --- a/deps/tools/common.mk +++ b/deps/tools/common.mk @@ -15,9 +15,6 @@ CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN) $(SANITIZE_LDFLA endif CONFIGURE_COMMON += F77="$(FC)" CC="$(CC) $(SANITIZE_OPTS)" CXX="$(CXX) $(SANITIZE_OPTS)" LD="$(LD)" -CMAKE_CC_ARG := $(CC_ARG) -CMAKE_CXX_ARG := $(CXX_ARG) - CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix) CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir) ifeq ($(OS), Darwin) @@ -27,12 +24,27 @@ endif ifneq ($(VERBOSE), 0) CMAKE_COMMON += -DCMAKE_VERBOSE_MAKEFILE=ON endif -# The call to which here is to work around https://cmake.org/Bug/view.php?id=14366 -CMAKE_COMMON += -DCMAKE_C_COMPILER="$$(which $(CC_BASE))" + +# The calls to `which` are to work around https://cmake.org/Bug/view.php?id=14366 +ifeq ($(USECCACHE), 1) +# `ccache` must be used as compiler launcher, not compiler itself. +CMAKE_COMMON += -DCMAKE_C_COMPILER_LAUNCHER=ccache +CMAKE_COMMON += -DCMAKE_CXX_COMPILER_LAUNCHER=ccache +CMAKE_CC := "$$(which $(shell echo $(CC_ARG) | cut -d' ' -f1))" +CMAKE_CXX := "$$(which $(shell echo $(CXX_ARG) | cut -d' ' -f1))" +CMAKE_CC_ARG := $(shell echo $(CC_ARG) | cut -d' ' -f2-) +CMAKE_CXX_ARG := $(shell echo $(CXX_ARG) | cut -d' ' -f2-) +else +CMAKE_CC := "$$(which $(CC_BASE))" +CMAKE_CXX := "$$(which $(CXX_BASE))" +CMAKE_CC_ARG := $(CC_ARG) +CMAKE_CXX_ARG := $(CXX_ARG) +endif +CMAKE_COMMON += -DCMAKE_C_COMPILER=$(CMAKE_CC) ifneq ($(strip $(CMAKE_CC_ARG)),) CMAKE_COMMON += -DCMAKE_C_COMPILER_ARG1="$(CMAKE_CC_ARG) $(SANITIZE_OPTS)" endif -CMAKE_COMMON += -DCMAKE_CXX_COMPILER="$(CXX_BASE)" +CMAKE_COMMON += -DCMAKE_CXX_COMPILER=$(CMAKE_CXX) ifneq ($(strip $(CMAKE_CXX_ARG)),) CMAKE_COMMON += -DCMAKE_CXX_COMPILER_ARG1="$(CMAKE_CXX_ARG) $(SANITIZE_OPTS)" endif From f0881ef625712727d44ac9645b6cb66660d6cb70 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 16 Jun 2023 22:38:00 +0000 Subject: [PATCH 182/290] Pass through world age for kwargs MethodError Fixes #50200 --- base/errorshow.jl | 2 +- test/errorshow.jl | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index 176cae4b5251a..4d32743f1af52 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -243,7 +243,7 @@ function showerror(io::IO, ex::MethodError) ft = typeof(f) arg_types_param = arg_types_param[3:end] kwargs = pairs(ex.args[1]) - ex = MethodError(f, ex.args[3:end::Int]) + ex = MethodError(f, ex.args[3:end::Int], ex.world) end name = ft.name.mt.name if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types diff --git a/test/errorshow.jl b/test/errorshow.jl index 9be3e675cede3..404b8e6843a83 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -578,7 +578,7 @@ let end end -@testset "show for manually thrown MethodError" begin +@testset "show for MethodError with world age issue" begin global f21006 f21006() = nothing @@ -620,6 +620,31 @@ end end end +# Issue #50200 +using Base.Experimental: @opaque +@testset "show for MethodError with world age issue (kwarg)" begin + test_no_error(f) = @test f() === nothing + function test_worldage_error(f) + ex = try; f(); error("Should not have been reached") catch ex; ex; end + @test occursin("The applicable method may be too new", sprint(Base.showerror, ex)) + end + + global callback50200 + + # First the no-kwargs version + callback50200 = (args...)->nothing + f = @opaque ()->callback50200() + test_no_error(f) + callback50200 = (args...)->nothing + test_worldage_error(f) + + callback50200 = (args...; kwargs...)->nothing + f = @opaque ()->callback50200(;a=1) + test_no_error(f) + callback50200 = (args...; kwargs...)->nothing + test_worldage_error(f) +end + # Custom hints struct HasNoOne end function recommend_oneunit(io, ex, arg_types, kwargs) From 18dd7a2d282a36cd672ec48b59bc7b0a959028c2 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 16 Jun 2023 22:50:28 +0000 Subject: [PATCH 183/290] Don't color Any... red if the method actually matched Fixes the drive-by observation I made in #50200. --- base/errorshow.jl | 6 +++++- test/errorshow.jl | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index 4d32743f1af52..24bd3a37d5298 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -490,7 +490,11 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=() if !((min(length(t_i), length(sig)) == 0) && k==1) print(iob, ", ") end - if get(io, :color, false)::Bool + if k == 1 && Base.isvarargtype(sigtype) + # There wasn't actually a mismatch - the method match failed for + # some other reason, e.g. world age. Just print the sigstr. + print(iob, sigstr...) + elseif get(io, :color, false)::Bool let sigstr=sigstr Base.with_output_color(Base.error_color(), iob) do iob print(iob, "::", sigstr...) diff --git a/test/errorshow.jl b/test/errorshow.jl index 404b8e6843a83..28ae3fd32365a 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -627,6 +627,7 @@ using Base.Experimental: @opaque function test_worldage_error(f) ex = try; f(); error("Should not have been reached") catch ex; ex; end @test occursin("The applicable method may be too new", sprint(Base.showerror, ex)) + @test !occursin("!Matched::", sprint(Base.showerror, ex)) end global callback50200 From fd1cec2de1a34d49782f3a1b8628bfa6f45a0500 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sat, 17 Jun 2023 01:03:37 -0400 Subject: [PATCH 184/290] Improve effects for Base.fieldindex (#50199) Split out the error path into a function with separate effects assumptions, so that constant propagation on `err` can conclude that the `err=false` case does not throw. Fixes #50198. --- base/reflection.jl | 12 +++++++++++- test/compiler/effects.jl | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/base/reflection.jl b/base/reflection.jl index 96b7a832cc575..5268beddeb8eb 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -820,9 +820,19 @@ julia> Base.fieldindex(Foo, :z, false) ``` """ function fieldindex(T::DataType, name::Symbol, err::Bool=true) + return err ? _fieldindex_maythrow(T, name) : _fieldindex_nothrow(T, name) +end + +function _fieldindex_maythrow(T::DataType, name::Symbol) @_foldable_meta @noinline - return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, err)+1) + return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, true)+1) +end + +function _fieldindex_nothrow(T::DataType, name::Symbol) + @_total_meta + @noinline + return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, false)+1) end function fieldindex(t::UnionAll, name::Symbol, err::Bool=true) diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl index 99e788c0cff12..65719f4a5f27d 100644 --- a/test/compiler/effects.jl +++ b/test/compiler/effects.jl @@ -988,3 +988,8 @@ isassigned_effects(s) = isassigned(Ref(s)) @test fully_eliminated(; retval=true) do isassigned_effects(:foo) end + +# Effects of Base.hasfield (#50198) +hf50198(s) = hasfield(typeof((;x=1, y=2)), s) +f50198() = (hf50198(Ref(:x)[]); nothing) +@test fully_eliminated(f50198) From b6902aeddc3bfba887c734d87012d56d49e2d710 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 07:10:58 +1000 Subject: [PATCH 185/290] Make incomplete_tag extensible This allows `incomplete_tag` to work when Expr(:incomplete) holds a Meta.ParseError as its child rather than a plain string, as it will when JuliaSyntax is enabled. --- base/client.jl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/base/client.jl b/base/client.jl index dd529dad5281e..6e30c9991e45e 100644 --- a/base/client.jl +++ b/base/client.jl @@ -202,10 +202,7 @@ parse_input_line(s::AbstractString) = parse_input_line(String(s)) # detect the reason which caused an :incomplete expression # from the error message # NOTE: the error messages are defined in src/julia-parser.scm -incomplete_tag(ex) = :none -function incomplete_tag(ex::Expr) - Meta.isexpr(ex, :incomplete) || return :none - msg = ex.args[1] +function fl_incomplete_tag(msg::AbstractString) occursin("string", msg) && return :string occursin("comment", msg) && return :comment occursin("requires end", msg) && return :block @@ -214,6 +211,20 @@ function incomplete_tag(ex::Expr) return :other end +incomplete_tag(ex) = :none +function incomplete_tag(ex::Expr) + if ex.head !== :incomplete + return :none + elseif isempty(ex.args) + return :other + elseif ex.args[1] isa String + return fl_incomplete_tag(ex.args[1]) + else + return incomplete_tag(ex.args[1]) + end +end +incomplete_tag(exc::Meta.ParseError) = incomplete_tag(exc.detail) + function exec_options(opts) quiet = (opts.quiet != 0) startup = (opts.startupfile != 2) From 9e7bb1290999b1170a4977785743220b26a81c92 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 07:11:01 +1000 Subject: [PATCH 186/290] Add `Meta.ParseError` detail field Here we add a `detail` field to `Meta.ParseError`, but retain the `msg::String` field for compatibility. `showerror(::ParseError)` defers to the `detail` field if it's present. This allows us to still throw `Meta.ParseError` from `Meta.parse` for compatibility, but allow more expressivity when `detail` is set to an exception type like `JuliaSyntax.ParseError`. --- base/errorshow.jl | 7 +++++++ base/meta.jl | 3 +++ 2 files changed, 10 insertions(+) diff --git a/base/errorshow.jl b/base/errorshow.jl index 176cae4b5251a..ca583cfe071b3 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -35,6 +35,13 @@ show_index(io::IO, x::LogicalIndex) = summary(io, x.mask) show_index(io::IO, x::OneTo) = print(io, "1:", x.stop) show_index(io::IO, x::Colon) = print(io, ':') +function showerror(io::IO, ex::Meta.ParseError) + if isnothing(ex.detail) + print(io, "ParseError(", repr(ex.msg), ")") + else + showerror(io, ex.detail) + end +end function showerror(io::IO, ex::BoundsError) print(io, "BoundsError") diff --git a/base/meta.jl b/base/meta.jl index b0e0dc371b26c..5dba11ac442eb 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -187,8 +187,11 @@ expression. """ struct ParseError <: Exception msg::String + detail::Any end +ParseError(msg::AbstractString) = ParseError(msg, nothing) + function _parse_string(text::AbstractString, filename::AbstractString, lineno::Integer, index::Integer, options) if index < 1 || index > ncodeunits(text) + 1 From 1a6cd971df3752f7f90b69f0c61104a3a19ea5ad Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 09:13:33 +1000 Subject: [PATCH 187/290] Only wrap Strings in Meta.ParseError String errors come from the flisp parser as Expr(:error). But other than that, allow the parser library to choose its own error type. --- base/meta.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/meta.jl b/base/meta.jl index 5dba11ac442eb..5d1cfe9c4a1a6 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -236,7 +236,11 @@ function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool depwarn::Bool=true) ex, pos = _parse_string(str, "none", 1, pos, greedy ? :statement : :atom) if raise && isa(ex,Expr) && ex.head === :error - throw(ParseError(ex.args[1])) + err = ex.args[1] + if err isa String + err = ParseError(err) # For flisp parser + end + throw(err) end return ex, pos end From 8caba95bb02656b7774c00c5e419d08845ad9b70 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 27 May 2023 06:33:00 +1000 Subject: [PATCH 188/290] Show top level location for any `exc` in `eval(Expr(:error, exc))` Previously this only worked when `exc` was a `String`. --- src/toplevel.c | 21 +++++++++++++++------ test/backtrace.jl | 9 ++++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/toplevel.c b/src/toplevel.c index cf0104879a7b0..51ff93488426f 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -656,19 +656,28 @@ static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword jl_errorf("cannot rename non-macro \"%s\" to macro \"%s\" in \"%s\"", n1, n2, keyword); } -// Format msg and eval `throw(ErrorException(msg)))` in module `m`. -// Used in `jl_toplevel_eval_flex` instead of `jl_errorf` so that the error +// Eval `throw(ErrorException(msg)))` in module `m`. +// Used in `jl_toplevel_eval_flex` instead of `jl_throw` so that the error // location in julia code gets into the backtrace. -static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...) +static void jl_eval_throw(jl_module_t *m, jl_value_t *exc) { jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2); JL_GC_PUSH1(&throw_ex); jl_exprargset(throw_ex, 0, jl_builtin_throw); + jl_exprargset(throw_ex, 1, exc); + jl_toplevel_eval_flex(m, throw_ex, 0, 0); + JL_GC_POP(); +} + +// Format error message and call jl_eval +static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...) +{ va_list args; va_start(args, fmt); - jl_exprargset(throw_ex, 1, jl_vexceptionf(jl_errorexception_type, fmt, args)); + jl_value_t *exc = jl_vexceptionf(jl_errorexception_type, fmt, args); va_end(args); - jl_toplevel_eval_flex(m, throw_ex, 0, 0); + JL_GC_PUSH1(&exc); + jl_eval_throw(m, exc); JL_GC_POP(); } @@ -875,7 +884,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head)); if (jl_is_string(jl_exprarg(ex, 0))) jl_eval_errorf(m, "syntax: %s", jl_string_data(jl_exprarg(ex, 0))); - jl_throw(jl_exprarg(ex, 0)); + jl_eval_throw(m, jl_exprarg(ex, 0)); } else if (jl_is_symbol(ex)) { JL_GC_POP(); diff --git a/test/backtrace.jl b/test/backtrace.jl index 38019880da35d..50a50100488c4 100644 --- a/test/backtrace.jl +++ b/test/backtrace.jl @@ -195,6 +195,13 @@ let bt, found = false end # Syntax error locations appear in backtraces +let trace = try + eval(Expr(:error, 1)) + catch + stacktrace(catch_backtrace()) + end + @test trace[1].func === Symbol("top-level scope") +end let trace = try include_string(@__MODULE__, """ @@ -221,7 +228,7 @@ let trace = try end @test trace[1].func === Symbol("top-level scope") @test trace[1].file === :a_filename - @test trace[1].line == 2 + @test trace[1].line == 3 end # issue #45171 From 964f0d63259aebb0598c678a01af2a02d332f557 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 28 May 2023 17:12:42 +1000 Subject: [PATCH 189/290] Pass Int rather than UInt as lengths to core parser hook This is more consistent with the way we're likely call it from the Julia side via Meta.parse(). --- src/ast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast.c b/src/ast.c index bd1ffee5b76b1..06727b453d6a3 100644 --- a/src/ast.c +++ b/src/ast.c @@ -1348,8 +1348,8 @@ jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename, jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text)); jl_svecset(args[1], 1, jl_box_long(text_len)); args[2] = filename; - args[3] = jl_box_ulong(lineno); - args[4] = jl_box_ulong(offset); + args[3] = jl_box_long(lineno); + args[4] = jl_box_long(offset); args[5] = options; jl_task_t *ct = jl_current_task; size_t last_age = ct->world_age; From 2d68286d40aa01bc6c5132d560a4a496316a3e2f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sun, 28 May 2023 07:23:10 +1000 Subject: [PATCH 190/290] Fix string escaping in REPL completion of paths REPL completion of paths within strings need to be escaped according to the usual escaping rules, and delimited by the starting " rather than whitespace. This differs from completion of paths within cmd backticks which need to be escaped according to shell escaping rules. Separate these cases and fix string escaping. This was found because JuliaSyntax emits an Expr(:error) rather than Expr(:incomplete) for paths inside strings with invalid escape sequences before whitespace. --- stdlib/REPL/src/REPLCompletions.jl | 58 +++++++++++++++++------ stdlib/REPL/test/replcompletions.jl | 71 ++++++++++++++++++++++------- 2 files changed, 98 insertions(+), 31 deletions(-) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 6ec7074f105fd..20d26953eb22b 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -232,7 +232,10 @@ function complete_keyword(s::Union{String,SubString{String}}) Completion[KeywordCompletion(kw) for kw in sorted_keywords[r]] end -function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_escape=false) +function complete_path(path::AbstractString, pos::Int; + use_envpath=false, shell_escape=false, + string_escape=false) + @assert !(shell_escape && string_escape) if Base.Sys.isunix() && occursin(r"^~(?:/|$)", path) # if the path is just "~", don't consider the expanded username as a prefix if path == "~" @@ -259,9 +262,9 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_ matches = Set{String}() for file in files if startswith(file, prefix) - id = try isdir(joinpath(dir, file)) catch; false end - # joinpath is not used because windows needs to complete with double-backslash - push!(matches, id ? file * (@static Sys.iswindows() ? "\\\\" : "/") : file) + p = joinpath(dir, file) + is_dir = try isdir(p) catch; false end + push!(matches, is_dir ? joinpath(file, "") : file) end end @@ -307,8 +310,14 @@ function complete_path(path::AbstractString, pos::Int; use_envpath=false, shell_ end end - matchList = Completion[PathCompletion(shell_escape ? replace(s, r"\s" => s"\\\0") : s) for s in matches] - startpos = pos - lastindex(prefix) + 1 - count(isequal(' '), prefix) + function do_escape(s) + return shell_escape ? replace(s, r"(\s|\\)" => s"\\\0") : + string_escape ? escape_string(s, ('\"','$')) : + s + end + + matchList = Completion[PathCompletion(do_escape(s)) for s in matches] + startpos = pos - lastindex(do_escape(prefix)) + 1 # The pos - lastindex(prefix) + 1 is correct due to `lastindex(prefix)-lastindex(prefix)==0`, # hence we need to add one to get the first index. This is also correct when considering # pos, because pos is the `lastindex` a larger string which `endswith(path)==true`. @@ -767,7 +776,7 @@ end function close_path_completion(str, startpos, r, paths, pos) length(paths) == 1 || return false # Only close if there's a single choice... _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path - path = expanduser(replace(_path, r"\\ " => " ")) + path = expanduser(unescape_string(replace(_path, "\\\$"=>"\$", "\\\""=>"\""))) # ...except if it's a directory... try isdir(path) @@ -1039,23 +1048,44 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif dotpos = something(findprev(isequal('.'), string, first(varrange)-1), 0) return complete_identifiers!(Completion[], ffunc, context_module, string, string[startpos:pos], pos, dotpos, startpos) - # otherwise... - elseif inc_tag in [:cmd, :string] + elseif inc_tag === :cmd m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) startpos = nextind(partial, reverseind(partial, m.offset)) r = startpos:pos + # This expansion with "\\ "=>' ' replacement and shell_escape=true + # assumes the path isn't further quoted within the cmd backticks. expanded = complete_expanduser(replace(string[r], r"\\ " => " "), r) expanded[3] && return expanded # If user expansion available, return it - paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos) + paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos, + shell_escape=true) + + return sort!(paths, by=p->p.path), r, success + elseif inc_tag === :string + # Find first non-escaped quote + m = match(r"\"(?!\\)", reverse(partial)) + startpos = nextind(partial, reverseind(partial, m.offset)) + r = startpos:pos + + expanded = complete_expanduser(string[r], r) + expanded[3] && return expanded # If user expansion available, return it - if inc_tag === :string && close_path_completion(string, startpos, r, paths, pos) - paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + path_prefix = try + unescape_string(replace(string[r], "\\\$"=>"\$", "\\\""=>"\"")) + catch + nothing end + if !isnothing(path_prefix) + paths, r, success = complete_path(path_prefix, pos, string_escape=true) - #Latex symbols can be completed for strings - (success || inc_tag === :cmd) && return sort!(paths, by=p->p.path), r, success + if close_path_completion(string, startpos, r, paths, pos) + paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"") + end + + # Fallthrough allowed so that Latex symbols can be completed in strings + success && return sort!(paths, by=p->p.path), r, success + end end ok, ret = bslash_completions(string, pos) diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index b0d1ff4b5237a..b2199e10bef55 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -1177,7 +1177,7 @@ let current_dir, forbidden catch e e isa Base.IOError && occursin("ELOOP", e.msg) end - c, r = test_complete("\"$(joinpath(path, "selfsym"))") + c, r = test_complete("\""*escape_string(joinpath(path, "selfsym"))) @test c == ["selfsymlink"] end end @@ -1207,26 +1207,62 @@ end mktempdir() do path space_folder = randstring() * " α" dir = joinpath(path, space_folder) - dir_space = replace(space_folder, " " => "\\ ") - mkdir(dir) cd(path) do - open(joinpath(space_folder, "space .file"),"w") do f - s = Sys.iswindows() ? "rm $dir_space\\\\space" : "cd $dir_space/space" - c, r = test_scomplete(s) - @test r == lastindex(s)-4:lastindex(s) - @test "space\\ .file" in c + touch(joinpath(space_folder, "space .file")) + + dir_space = replace(space_folder, " " => "\\ ") + s = Sys.iswindows() ? "cd $dir_space\\\\space" : "cd $dir_space/space" + c, r = test_scomplete(s) + @test s[r] == "space" + @test "space\\ .file" in c + # Also use shell escape rules within cmd backticks + s = "`$s" + c, r = test_scomplete(s) + @test s[r] == "space" + @test "space\\ .file" in c + + # escape string according to Julia escaping rules + julia_esc(str) = escape_string(str, ('\"','$')) + + # For normal strings the string should be properly escaped according to + # the usual rules for Julia strings. + s = "cd(\"" * julia_esc(joinpath(path, space_folder, "space")) + c, r = test_complete(s) + @test s[r] == "space" + @test "space .file\"" in c + + # '$' is the only character which can appear in a windows filename and + # which needs to be escaped in Julia strings (on unix we could do this + # test with all sorts of special chars) + touch(joinpath(space_folder, "needs_escape\$.file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape\$")) + s = "cd(\"$escpath" + c, r = test_complete(s) + @test s[r] == "needs_escape\\\$" + @test "needs_escape\\\$.file\"" in c - s = Sys.iswindows() ? "cd(\"β $dir_space\\\\space" : "cd(\"β $dir_space/space" + if !Sys.iswindows() + touch(joinpath(space_folder, "needs_escape2\n\".file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape2\n\"")) + s = "cd(\"$escpath" c, r = test_complete(s) - @test r == lastindex(s)-4:lastindex(s) - @test "space .file\"" in c + @test s[r] == "needs_escape2\\n\\\"" + @test "needs_escape2\\n\\\".file\"" in c + + touch(joinpath(space_folder, "needs_escape3\\.file")) + escpath = julia_esc(joinpath(path, space_folder, "needs_escape3\\")) + s = "cd(\"$escpath" + c, r = test_complete(s) + @test s[r] == "needs_escape3\\\\" + @test "needs_escape3\\\\.file\"" in c end + # Test for issue #10324 - s = "cd(\"$dir_space" + s = "cd(\"$space_folder" c, r = test_complete(s) - @test r == 5:15 - @test s[r] == dir_space + @test r == 5:14 + @test s[r] == space_folder #Test for #18479 for c in "'`@\$;&" @@ -1240,8 +1276,9 @@ mktempdir() do path @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/") @test res end - c, r, res = test_complete("\""*test_dir) - @test c[1] == test_dir*(Sys.iswindows() ? "\\\\" : "/") + escdir = julia_esc(test_dir) + c, r, res = test_complete("\""*escdir) + @test c[1] == escdir*(Sys.iswindows() ? "\\\\" : "/") @test res finally rm(joinpath(path, test_dir), recursive=true) @@ -1285,7 +1322,7 @@ if Sys.iswindows() @test r == length(s)-1:length(s) @test file in c - s = "cd(\"..\\" + s = "cd(\"..\\\\" c,r = test_complete(s) @test r == length(s)+1:length(s) @test temp_name * "\\\\" in c From 60bf0b684d49076227115e721c52a47d02878e49 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 25 May 2023 07:10:29 +1000 Subject: [PATCH 191/290] Enable JuliaSyntax.jl as the defult parser * Vendor JuliaSyntax into Base via deps directory * Install JuliaSyntax as the Julia parser unless the environment variable JULIA_USE_NEW_PARSER=0 is set. * Add a function to set the Core._parse binding. Required because we'd like to set the binding during Base.__init__. This can be done with `Core.eval` but that doesn't work well in incremental compilation mode. Also accommodate JuliaSyntax within tests: * When JuliaSyntax is enabled, ignore error messages in parser tests which are tested separately upstream - error messages are inherently expressed a bit differently when they go alongside full source location info. * Accommodate a small number of incompatibilities where in JuliaSyntax - `import .Mod.x as (a.b)` is a syntax not lowering error - `f(2x for x=1:10, y` is `Expr(:incomplete)` not `Expr(:error)` - `incomplete_tag` is more precise for `:block` vs `:other` - `global const` without an assignment is a syntax error, in keeping with plain `const` without assignment being a syntax error (not lowering error). * Adjust a few tests to be more precise about testing lowering vs the parser. * Make Meta.parse doctest compatible with JuliaSyntax errors --- Makefile | 4 + NEWS.md | 3 + base/.gitignore | 1 + base/Base.jl | 7 + base/boot.jl | 4 +- base/compiler/compiler.jl | 2 +- base/meta.jl | 16 +- contrib/generate_precompile.jl | 1 - deps/JuliaSyntax.mk | 16 + deps/JuliaSyntax.version | 4 + deps/Makefile | 7 +- .../md5 | 1 + .../sha512 | 1 + test/cmdlineargs.jl | 2 +- test/show.jl | 2 +- test/strings/basic.jl | 7 - test/syntax.jl | 419 +++++++++--------- 17 files changed, 272 insertions(+), 225 deletions(-) create mode 100644 deps/JuliaSyntax.mk create mode 100644 deps/JuliaSyntax.version create mode 100644 deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 create mode 100644 deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 diff --git a/Makefile b/Makefile index 046f18492bc3e..eb6e54ae70b34 100644 --- a/Makefile +++ b/Makefile @@ -365,6 +365,10 @@ endif # Remove various files which should not be installed -rm -f $(DESTDIR)$(datarootdir)/julia/base/version_git.sh -rm -f $(DESTDIR)$(datarootdir)/julia/test/Makefile + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/source-extracted + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-configured + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-compiled + -rm -f $(DESTDIR)$(datarootdir)/julia/base/*/build-checked -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/source-extracted -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-configured -rm -f $(DESTDIR)$(datarootdir)/julia/stdlib/$(VERSDIR)/*/build-compiled diff --git a/NEWS.md b/NEWS.md index 6c60b56b7a028..d73373d95d26e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ Julia v1.10 Release Notes New language features --------------------- +* JuliaSyntax.jl is now used as the default parser, providing better diagnostics and faster + parsing. Set environment variable `JULIA_USE_NEW_PARSER` to `0` to switch back to the old + parser if necessary (and if you find this necessary, please file an issue) ([#46372]). * `⥺` (U+297A, `\leftarrowsubset`) and `⥷` (U+2977, `\leftarrowless`) may now be used as binary operators with arrow precedence. ([#45962]) diff --git a/base/.gitignore b/base/.gitignore index e572b8ea229d0..0fab5b41fda08 100644 --- a/base/.gitignore +++ b/base/.gitignore @@ -8,3 +8,4 @@ /version_git.jl /version_git.jl.phony /userimg.jl +/JuliaSyntax diff --git a/base/Base.jl b/base/Base.jl index 1a677bf508977..1fc20293aa384 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -489,6 +489,10 @@ a_method_to_overwrite_in_test() = inferencebarrier(1) include(mod::Module, _path::AbstractString) = _include(identity, mod, _path) include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexpr, mod, _path) +# External libraries vendored into Base +Core.println("JuliaSyntax/src/JuliaSyntax.jl") +include(@__MODULE__, "JuliaSyntax/src/JuliaSyntax.jl") + end_base_include = time_ns() const _sysimage_modules = PkgId[] @@ -600,6 +604,9 @@ function __init__() _require_world_age[] = get_world_counter() # Prevent spawned Julia process from getting stuck waiting on Tracy to connect. delete!(ENV, "JULIA_WAIT_FOR_TRACY") + if get_bool_env("JULIA_USE_NEW_PARSER", true) === true + JuliaSyntax.enable_in_core!() + end nothing end diff --git a/base/boot.jl b/base/boot.jl index ec25fa2bc0b6d..6698d4360cc7d 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -825,9 +825,11 @@ Integer(x::Union{Float16, Float32, Float64}) = Int(x) # `_parse` must return an `svec` containing an `Expr` and the new offset as an # `Int`. # -# The internal jl_parse which will call into Core._parse if not `nothing`. +# The internal jl_parse will call into Core._parse if not `nothing`. _parse = nothing +_setparser!(parser) = setglobal!(Core, :_parse, parser) + # support for deprecated uses of internal _apply function _apply(x...) = Core._apply_iterate(Main.Base.iterate, x...) diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl index 58f77078ddb5e..04b0791d9a79e 100644 --- a/base/compiler/compiler.jl +++ b/base/compiler/compiler.jl @@ -171,7 +171,7 @@ include("compiler/bootstrap.jl") ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) include("compiler/parsing.jl") -Core.eval(Core, :(_parse = Compiler.fl_parse)) +Core._setparser!(fl_parse) end # baremodule Compiler )) diff --git a/base/meta.jl b/base/meta.jl index 5d1cfe9c4a1a6..ba2a5eeb6858b 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -254,20 +254,22 @@ syntax errors will raise an error; otherwise, `parse` will return an expression raise an error upon evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed. -```jldoctest +```jldoctest; filter=r"(?<=Expr\\(:error).*|(?<=Expr\\(:incomplete).*" julia> Meta.parse("x = 3") :(x = 3) -julia> Meta.parse("x = ") -:($(Expr(:incomplete, "incomplete: premature end of input"))) - julia> Meta.parse("1.0.2") -ERROR: Base.Meta.ParseError("invalid numeric constant \\\"1.0.\\\"") -Stacktrace: +ERROR: ParseError: +# Error @ none:1:1 +1.0.2 +└──┘ ── invalid numeric constant [...] julia> Meta.parse("1.0.2"; raise = false) -:($(Expr(:error, "invalid numeric constant \"1.0.\""))) +:(\$(Expr(:error, "invalid numeric constant \"1.0.\""))) + +julia> Meta.parse("x = ") +:(\$(Expr(:incomplete, "incomplete: premature end of input"))) ``` """ function parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 8fa40e4920eea..7312726fe2eaa 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -153,7 +153,6 @@ if Artifacts !== nothing """ end - Pkg = get(Base.loaded_modules, Base.PkgId(Base.UUID("44cfe95a-1eb2-52ea-b672-e2afdf69b78f"), "Pkg"), nothing) diff --git a/deps/JuliaSyntax.mk b/deps/JuliaSyntax.mk new file mode 100644 index 0000000000000..e9cc0c942dbe0 --- /dev/null +++ b/deps/JuliaSyntax.mk @@ -0,0 +1,16 @@ +$(eval $(call git-external,JuliaSyntax,JULIASYNTAX,,,$(BUILDDIR))) + +$(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted + @# no build steps + echo 1 > $@ + +$(eval $(call symlink_install,JuliaSyntax,$$(JULIASYNTAX_SRC_DIR),$$(JULIAHOME)/base)) + +clean-JuliaSyntax: + -rm -f $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled +get-JuliaSyntax: $(JULIASYNTAX_SRC_FILE) +extract-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/source-extracted +configure-JuliaSyntax: extract-JuliaSyntax +compile-JuliaSyntax: $(BUILDDIR)/$(JULIASYNTAX_SRC_DIR)/build-compiled +fastcheck-JuliSyntax: check-JuliSyntax +check-JuliSyntax: compile-JuliSyntax diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version new file mode 100644 index 0000000000000..2bd765e6f4535 --- /dev/null +++ b/deps/JuliaSyntax.version @@ -0,0 +1,4 @@ +JULIASYNTAX_BRANCH = main +JULIASYNTAX_SHA1 = ec51994833d78f8c5525bc1647f448dfadc370c1 +JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git +JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1 diff --git a/deps/Makefile b/deps/Makefile index 62bb85e72c492..ac899b634a3fa 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -36,7 +36,7 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST) # prevent installing libs into usr/lib64 on opensuse unexport CONFIG_SITE -DEP_LIBS := +DEP_LIBS := JuliaSyntax ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0) DEP_LIBS += blastrampoline @@ -188,7 +188,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS) DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \ openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \ objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \ - libsuitesparse lld libtracyclient ittapi + libsuitesparse lld libtracyclient ittapi JuliaSyntax DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL) ifneq ($(USE_BINARYBUILDER_OPENBLAS),0) @@ -248,4 +248,7 @@ include $(SRCDIR)/libwhich.mk include $(SRCDIR)/p7zip.mk include $(SRCDIR)/libtracyclient.mk +# vendored Julia libs +include $(SRCDIR)/JuliaSyntax.mk + include $(SRCDIR)/tools/uninstallers.mk diff --git a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 new file mode 100644 index 0000000000000..e1f51dd3d711a --- /dev/null +++ b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 @@ -0,0 +1 @@ +b1d1ccb00e422eb8b70b2120d7083bf3 diff --git a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 new file mode 100644 index 0000000000000..2ac2b9ed7c903 --- /dev/null +++ b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 @@ -0,0 +1 @@ +e6df6dc2b5d2a5618da0d553eed793e1192147175d84d51f725c0ea8f7b6be92fbeb37de9abee2b2f548b0f0736f836ec7e3e20e93c12f77e1a2b2058bbfd6db diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 9c8c0ac553c24..13a68be2927de 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -924,7 +924,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` close(in) close(err.in) txt = readline(err) - @test startswith(txt, "ERROR: syntax: incomplete") + @test startswith(txt, r"ERROR: (syntax: incomplete|ParseError:)") end # Issue #29855 diff --git a/test/show.jl b/test/show.jl index f2c553b3ff49a..25c5a49372054 100644 --- a/test/show.jl +++ b/test/show.jl @@ -633,7 +633,7 @@ end @test_repr "::@m(x, y) + z" @test_repr "[@m(x) y z]" @test_repr "[@m(x) y; z]" -@test_repr "let @m(x), y=z; end" +test_repr("let @m(x), y=z; end", true) @test repr(:(@m x y)) == ":(#= $(@__FILE__):$(@__LINE__) =# @m x y)" @test string(:(@m x y)) == "#= $(@__FILE__):$(@__LINE__) =# @m x y" diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 7151a4d4fd60a..13f2f5197187a 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -250,8 +250,6 @@ end @test string(sym) == string(Char(0xdcdb)) @test String(sym) == string(Char(0xdcdb)) @test Meta.lower(Main, sym) === sym - @test Meta.parse(string(Char(0xe0080)," = 1"), 1, raise=false)[1] == - Expr(:error, "invalid character \"\Ue0080\" near column 1") end @testset "Symbol and gensym" begin @@ -761,11 +759,6 @@ function getData(dic) end @test getData(Dict()) == ",,,,,,,,,,,,,,,,,," -@testset "unrecognized escapes in string/char literals" begin - @test_throws Meta.ParseError Meta.parse("\"\\.\"") - @test_throws Meta.ParseError Meta.parse("\'\\.\'") -end - @testset "thisind" begin let strs = Any["∀α>β:α+1>β", s"∀α>β:α+1>β", SubString("123∀α>β:α+1>β123", 4, 18), diff --git a/test/syntax.jl b/test/syntax.jl index 119f6d427a15a..4d1b167693adb 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -5,22 +5,29 @@ using Random using Base: remove_linenums! -import Base.Meta.ParseError - -function parseall(str) - pos = firstindex(str) - exs = [] - while pos <= lastindex(str) - ex, pos = Meta.parse(str, pos) - push!(exs, ex) - end - if length(exs) == 0 - throw(ParseError("end of input")) - elseif length(exs) == 1 - return exs[1] +using_JuliaSyntax = parentmodule(Core._parse) != Core.Compiler + +macro test_parseerror(str, msg) + if using_JuliaSyntax + # Diagnostics are tested separately in JuliaSyntax + ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str)))) else - return Expr(:block, exs...) + ex = :(@test_throws Meta.ParseError($(esc(msg))) Meta.parse($(esc(str)))) end + ex.args[2] = __source__ + return ex +end + +macro test_parseerror(str) + ex = :(@test_throws Meta.ParseError Meta.parse($(esc(str)))) + ex.args[2] = __source__ + return ex +end + +function parseall_nolines(str) + ex = Meta.parseall(str) + filter!(e->!(e isa LineNumberNode), ex.args) + return ex end # issue #9684 @@ -60,19 +67,19 @@ macro test999_str(args...); args; end @test test999"foo"123 == ("foo", 123) # issue #5997 -@test_throws ParseError Meta.parse(": x") -@test_throws ParseError Meta.parse("""begin +@test_parseerror ": x" +@test_parseerror """begin : - x""") -@test_throws ParseError Meta.parse("d[: 2]") + x""" +@test_parseerror "d[: 2]" # issue #6770 -@test_throws ParseError Meta.parse("x.3") +@test_parseerror "x.3" # issue #8763 -@test_throws ParseError Meta.parse("sqrt(16)2") -@test_throws ParseError Meta.parse("x' y") -@test_throws ParseError Meta.parse("x 'y") +@test_parseerror "sqrt(16)2" +@test_parseerror "x' y" +@test_parseerror "x 'y" @test Meta.parse("x'y") == Expr(:call, :*, Expr(Symbol("'"), :x), :y) # issue #18851 @@ -84,22 +91,22 @@ macro test999_str(args...); args; end @test Meta.parse("-2(m)") == Expr(:call, :*, -2, :m) # issue #8301 -@test_throws ParseError Meta.parse("&*s") +@test_parseerror "&*s" # issue #10677 -@test_throws ParseError Meta.parse("/1") -@test_throws ParseError Meta.parse("/pi") +@test_parseerror "/1" +@test_parseerror "/pi" @test Meta.parse("- = 2") == Expr(:(=), :(-), 2) @test Meta.parse("/ = 2") == Expr(:(=), :(/), 2) -@test_throws ParseError Meta.parse("< : 2") -@test_throws ParseError Meta.parse("+ : 2") -@test_throws ParseError Meta.parse("< :2") +@test_parseerror "< : 2" +@test_parseerror "+ : 2" +@test_parseerror "< :2" @test Meta.parse("+ :2") == Expr(:call, :(+), QuoteNode(2)) # issue #10900 -@test_throws ParseError Meta.parse("+=") -@test_throws ParseError Meta.parse(".") -@test_throws ParseError Meta.parse("...") +@test_parseerror "+=" +@test_parseerror "." +@test_parseerror "..." # issue #10901 @test Meta.parse("/([1], 1)[1]") == :(([1] / 1)[1]) @@ -152,35 +159,35 @@ macro test999_str(args...); args; end Expr(:., Expr(:$, :c), Expr(:$, :d)))) # fix pr #11338 and test for #11497 -@test parseall("using \$\na") == Expr(:block, Expr(:using, Expr(:., :$)), :a) -@test parseall("using \$,\na") == Expr(:using, Expr(:., :$), Expr(:., :a)) -@test parseall("using &\na") == Expr(:block, Expr(:using, Expr(:., :&)), :a) +@test parseall_nolines("using \$\na") == Expr(:toplevel, Expr(:using, Expr(:., :$)), :a) +@test parseall_nolines("using \$,\na") == Expr(:toplevel, Expr(:using, Expr(:., :$), Expr(:., :a))) +@test parseall_nolines("using &\na") == Expr(:toplevel, Expr(:using, Expr(:., :&)), :a) -@test parseall("a = &\nb") == Expr(:block, Expr(:(=), :a, :&), :b) -@test parseall("a = \$\nb") == Expr(:block, Expr(:(=), :a, :$), :b) -@test parseall(":(a = &\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:&, :b))) -@test parseall(":(a = \$\nb)") == Expr(:quote, Expr(:(=), :a, Expr(:$, :b))) +@test parseall_nolines("a = &\nb") == Expr(:toplevel, Expr(:(=), :a, :&), :b) +@test parseall_nolines("a = \$\nb") == Expr(:toplevel, Expr(:(=), :a, :$), :b) +@test parseall_nolines(":(a = &\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:&, :b)))) +@test parseall_nolines(":(a = \$\nb)") == Expr(:toplevel, Expr(:quote, Expr(:(=), :a, Expr(:$, :b)))) # issue 12027 - short macro name parsing vs _str suffix -@test parseall(""" - macro f(args...) end; @f "macro argument" +@test parseall_nolines(""" + macro f(args...) end\n@f "macro argument" """) == Expr(:toplevel, Expr(:macro, Expr(:call, :f, Expr(:..., :args)), Expr(:block, LineNumberNode(1, :none), LineNumberNode(1, :none))), - Expr(:macrocall, Symbol("@f"), LineNumberNode(1, :none), "macro argument")) + Expr(:macrocall, Symbol("@f"), LineNumberNode(2, :none), "macro argument")) # blocks vs. tuples @test Meta.parse("()") == Expr(:tuple) @test Meta.parse("(;)") == Expr(:tuple, Expr(:parameters)) @test Meta.parse("(;;)") == Expr(:block) @test Meta.parse("(;;;;)") == Expr(:block) -@test_throws ParseError Meta.parse("(,)") -@test_throws ParseError Meta.parse("(;,)") -@test_throws ParseError Meta.parse("(,;)") +@test_parseerror "(,)" +@test_parseerror "(;,)" +@test_parseerror "(,;)" # TODO: would be nice to make these errors, but needed to parse e.g. `(x;y,)->x` -#@test_throws ParseError Meta.parse("(1;2,)") -#@test_throws ParseError Meta.parse("(1;2,;)") -#@test_throws ParseError Meta.parse("(1;2,;3)") +#@test_parseerror "(1;2,)" +#@test_parseerror "(1;2,;)" +#@test_parseerror "(1;2,;3)" @test Meta.parse("(x;)") == Expr(:block, :x) @test Meta.parse("(;x)") == Expr(:tuple, Expr(:parameters, :x)) @test Meta.parse("(;x,)") == Expr(:tuple, Expr(:parameters, :x)) @@ -197,7 +204,7 @@ macro test999_str(args...); args; end @test Meta.parse("(x,a;y=1)") == Expr(:tuple, Expr(:parameters, Expr(:kw, :y, 1)), :x, :a) @test Meta.parse("(x,a;y=1,z=2)") == Expr(:tuple, Expr(:parameters, Expr(:kw,:y,1), Expr(:kw,:z,2)), :x, :a) @test Meta.parse("(a=1, b=2)") == Expr(:tuple, Expr(:(=), :a, 1), Expr(:(=), :b, 2)) -@test_throws ParseError Meta.parse("(1 2)") # issue #15248 +@test_parseerror "(1 2)" # issue #15248 @test Meta.parse("f(x;)") == Expr(:call, :f, Expr(:parameters), :x) @@ -268,13 +275,16 @@ end @test_throws BoundsError Meta.parse("x = 1", 7) # issue #14683 -@test_throws ParseError Meta.parse("'\\A\"'") +@test_parseerror "'\\A\"'" @test Meta.parse("'\"'") == Meta.parse("'\\\"'") == '"' == "\""[1] == '\42' # issue #24558 @test '\u2200' == "\u2200"[1] -@test_throws ParseError Meta.parse("f(2x for x=1:10, y") +if !using_JuliaSyntax + # This should be Expr(:incomplete) + @test_parseerror "f(2x for x=1:10, y" +end # issue #15223 call0(f) = f() @@ -310,11 +320,6 @@ let p = 15 @test 2p+1 == 31 # not a hex float literal end -macro test_parseerror(str, msg) - ex = :(@test_throws ParseError($(esc(msg))) Meta.parse($(esc(str)))) - ex.args[2] = __source__ - return ex -end @test_parseerror("0x", "invalid numeric constant \"0x\"") @test_parseerror("0b", "invalid numeric constant \"0b\"") @test_parseerror("0o", "invalid numeric constant \"0o\"") @@ -322,9 +327,8 @@ end @test_parseerror("0x1.0p", "invalid numeric constant \"0x1.0\"") # issue #15798 -@test Meta.lower(Main, Base.parse_input_line(""" - try = "No" - """)) == Expr(:error, "unexpected \"=\"") +# lowering preserves Expr(:error) +@test Meta.lower(Main, Expr(:error, "no")) == Expr(:error, "no") # issue #19861 make sure macro-expansion happens in the newest world for top-level expression @test eval(Base.parse_input_line(""" @@ -368,9 +372,9 @@ add_method_to_glob_fn!() @test f15844(Int64(1)) == 3 # issue #15661 -@test_throws ParseError Meta.parse("function catch() end") -@test_throws ParseError Meta.parse("function end() end") -@test_throws ParseError Meta.parse("function finally() end") +@test_parseerror "function catch() end" +@test_parseerror "function end() end" +@test_parseerror "function finally() end" # PR #16170 @test Meta.lower(Main, Meta.parse("true(x) = x")) == Expr(:error, "invalid function name \"true\"") @@ -421,18 +425,18 @@ end :y)) # test that pre 0.5 deprecated syntax is a parse error -@test_throws ParseError Meta.parse("Int [1,2,3]") -@test_throws ParseError Meta.parse("Int [x for x in 1:10]") -@test_throws ParseError Meta.parse("foo (x) = x") -@test_throws ParseError Meta.parse("foo {T<:Int}(x::T) = x") +@test_parseerror "Int [1,2,3]" +@test_parseerror "Int [x for x in 1:10]" +@test_parseerror "foo (x) = x" +@test_parseerror "foo {T<:Int}(x::T) = x" -@test_throws ParseError Meta.parse("Foo .bar") +@test_parseerror "Foo .bar" -@test_throws ParseError Meta.parse("import x .y") -@test_throws ParseError Meta.parse("using x .y") +@test_parseerror "import x .y" +@test_parseerror "using x .y" -@test_throws ParseError Meta.parse("--x") -@test_throws ParseError Meta.parse("stagedfunction foo(x); end") +@test_parseerror "--x" +@test_parseerror "stagedfunction foo(x); end" @test Meta.parse("A=>B") == Expr(:call, :(=>), :A, :B) @@ -448,7 +452,7 @@ end @test Meta.parse("[a,;c]") == Expr(:vect, Expr(:parameters, :c), :a) @test Meta.parse("a[b,c;d]") == Expr(:ref, :a, Expr(:parameters, :d), :b, :c) @test Meta.parse("a[b,;d]") == Expr(:ref, :a, Expr(:parameters, :d), :b) -@test_throws ParseError Meta.parse("[a,;,b]") +@test_parseerror "[a,;,b]" @test Meta.parse("{a,b;c}") == Expr(:braces, Expr(:parameters, :c), :a, :b) @test Meta.parse("{a,;c}") == Expr(:braces, Expr(:parameters, :c), :a) @test Meta.parse("a{b,c;d}") == Expr(:curly, :a, Expr(:parameters, :d), :b, :c) @@ -534,10 +538,13 @@ for (str, tag) in Dict("" => :none, "\"" => :string, "#=" => :comment, "'" => :c "let;" => :block, "for i=1;" => :block, "function f();" => :block, "f() do x;" => :block, "module X;" => :block, "mutable struct X;" => :block, "struct X;" => :block, "(" => :other, "[" => :other, - "begin" => :other, "quote" => :other, - "let" => :other, "for" => :other, "function" => :other, + "for" => :other, "function" => :other, "f() do" => :other, "module" => :other, "mutable struct" => :other, - "struct" => :other) + "struct" => :other, + "quote" => using_JuliaSyntax ? :block : :other, + "let" => using_JuliaSyntax ? :block : :other, + "begin" => using_JuliaSyntax ? :block : :other, + ) @test Base.incomplete_tag(Meta.parse(str, raise=false)) == tag end @@ -622,7 +629,7 @@ end # issue 10046 for op in ["+", "-", "\$", "|", ".+", ".-", "*", ".*"] - @test_throws ParseError Meta.parse("$op in [+, -]") + @test_parseerror "$op in [+, -]" end # issue #17701 @@ -634,7 +641,7 @@ end # PR #15592 let str = "[1] [2]" - @test_throws ParseError Meta.parse(str) + @test_parseerror str end # issue 15896 and PR 15913 @@ -997,14 +1004,14 @@ end @test Test21604.X(1.0) === Test21604.X(1.0) # issue #20575 -@test_throws ParseError Meta.parse("\"a\"x") -@test_throws ParseError Meta.parse("\"a\"begin end") -@test_throws ParseError Meta.parse("\"a\"begin end\"b\"") +@test_parseerror "\"a\"x" +@test_parseerror "\"a\"begin end" +@test_parseerror "\"a\"begin end\"b\"" # issue #16427 -@test_throws ParseError Meta.parse("for i=1:1 end(3)") -@test_throws ParseError Meta.parse("begin end(3)") -@test_throws ParseError Meta.parse("while false end(3)") +@test_parseerror "for i=1:1 end(3)" +@test_parseerror "begin end(3)" +@test_parseerror "while false end(3)" # comment 298107224 on pull #21607 module Test21607 @@ -1065,7 +1072,7 @@ end === (3, String) @test Meta.parse("3 +⁽¹⁾ 4") == Expr(:call, :+⁽¹⁾, 3, 4) @test Meta.parse("3 +₍₀₎ 4") == Expr(:call, :+₍₀₎, 3, 4) for bad in ('=', '$', ':', "||", "&&", "->", "<:") - @test_throws ParseError Meta.parse("3 $(bad)⁽¹⁾ 4") + @test_parseerror "3 $(bad)⁽¹⁾ 4" end @test Base.operator_precedence(:+̂) == Base.operator_precedence(:+) @@ -1080,20 +1087,20 @@ end Expr(:tuple, :x, :y), Expr(:tuple, 1, 2))) -@test_throws ParseError Meta.parse("[2for i=1:10]") -@test_throws ParseError Meta.parse("[1 for i in 1:2for j in 2]") -@test_throws ParseError Meta.parse("(1 for i in 1:2for j in 2)") +@test_parseerror "[2for i=1:10]" +@test_parseerror "[1 for i in 1:2for j in 2]" +@test_parseerror "(1 for i in 1:2for j in 2)" # issue #20441 -@test_throws ParseError Meta.parse("[x.2]") -@test_throws ParseError Meta.parse("x.2") +@test_parseerror "[x.2]" +@test_parseerror "x.2" @test Meta.parse("[x;.2]") == Expr(:vcat, :x, 0.2) # issue #22840 @test Meta.parse("[:a :b]") == Expr(:hcat, QuoteNode(:a), QuoteNode(:b)) # issue #22868 -@test_throws ParseError Meta.parse("x@time 2") -@test_throws ParseError Meta.parse("@ time") +@test_parseerror "x@time 2" +@test_parseerror "@ time" # issue #7479 @test Meta.lower(Main, Meta.parse("(true &&& false)")) == Expr(:error, "invalid syntax &false") @@ -1102,9 +1109,9 @@ end @test Meta.lower(Main, :(&(1, 2))) == Expr(:error, "invalid syntax &(1, 2)") # if an indexing expression becomes a cat expression, `end` is not special -@test_throws ParseError Meta.parse("a[end end]") -@test_throws ParseError Meta.parse("a[end;end]") -#@test_throws ParseError Meta.parse("a[end;]") # this is difficult to fix +@test_parseerror "a[end end]" +@test_parseerror "a[end;end]" +#@test_parseerror "a[end;]" # this is difficult to fix let a = rand(8), i = 3 @test a[[1:i-1; i+1:end]] == a[[1,2,4,5,6,7,8]] end @@ -1115,12 +1122,12 @@ end end for i = 1:5] == fill(nothing, 5) # issue #18912 -@test_throws ParseError Meta.parse("(::)") +@test_parseerror "(::)" @test Meta.parse(":(::)") == QuoteNode(Symbol("::")) -@test_throws ParseError Meta.parse("f(::) = ::") +@test_parseerror "f(::) = ::" @test Meta.parse("(::A)") == Expr(Symbol("::"), :A) -@test_throws ParseError Meta.parse("(::, 1)") -@test_throws ParseError Meta.parse("(1, ::)") +@test_parseerror "(::, 1)" +@test_parseerror "(1, ::)" # issue #18650 let ex = Meta.parse("maximum(@elapsed sleep(1) for k = 1:10)") @@ -1192,10 +1199,10 @@ M24289.@m24289 # parsing numbers with _ and . @test Meta.parse("1_2.3_4") == 12.34 -@test_throws ParseError Meta.parse("1._") -@test_throws ParseError Meta.parse("1._5") -@test_throws ParseError Meta.parse("1e.3") -@test_throws ParseError Meta.parse("1e3.") +@test_parseerror "1._" +@test_parseerror "1._5" +@test_parseerror "1e.3" +@test_parseerror "1e3." @test Meta.parse("2e_1") == Expr(:call, :*, 2, :e_1) # issue #17705 @test Meta.parse("2e3_") == Expr(:call, :*, 2e3, :_) @@ -1261,8 +1268,10 @@ end @test raw"x \\\ y" == "x \\\\\\ y" end -@test_throws ParseError("expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"") Meta.parse("f(x::V) where {V) = x") -@test_throws ParseError("expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"") Meta.parse("[1)") +@test_parseerror("f(x::V) where {V) = x", + "expected \"}\" or separator in arguments to \"{ }\"; got \"V)\"") +@test_parseerror("[1)", + "expected \"]\" or separator in arguments to \"[ ]\"; got \"1)\"") # issue #9972 @test Meta.lower(@__MODULE__, :(f(;3))) == Expr(:error, "invalid keyword argument syntax \"3\"") @@ -1310,7 +1319,7 @@ let getindex = 0, setindex! = 1, colon = 2, vcat = 3, hcat = 4, hvcat = 5 end # issue #25020 -@test_throws ParseError Meta.parse("using Colors()") +@test_parseerror "using Colors()" let ex = Meta.parse("md\"x\" f(x) = x", 1)[1] # custom string literal is not a docstring @@ -1364,18 +1373,18 @@ end @test Meta.parse("-(x;;;)^2") == Expr(:call, :-, Expr(:call, :^, Expr(:block, :x), 2)) @test Meta.parse("+((1,2))") == Expr(:call, :+, Expr(:tuple, 1, 2)) -@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:1") Meta.parse("1 -+ (a=1, b=2)") +@test_parseerror "1 -+ (a=1, b=2)" "space before \"(\" not allowed in \"+ (\" at none:1" # issue #29781 -@test_throws ParseError("space before \"(\" not allowed in \"sin. (\" at none:1") Meta.parse("sin. (1)") +@test_parseerror "sin. (1)" "space before \"(\" not allowed in \"sin. (\" at none:1" # Parser errors for disallowed space contain line numbers -@test_throws ParseError("space before \"[\" not allowed in \"f() [\" at none:2") Meta.parse("\nf() [i]") -@test_throws ParseError("space before \"(\" not allowed in \"f() (\" at none:2") Meta.parse("\nf() (i)") -@test_throws ParseError("space before \".\" not allowed in \"f() .\" at none:2") Meta.parse("\nf() .i") -@test_throws ParseError("space before \"{\" not allowed in \"f() {\" at none:2") Meta.parse("\nf() {i}") -@test_throws ParseError("space before \"m\" not allowed in \"@ m\" at none:2") Meta.parse("\n@ m") -@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b") -@test_throws ParseError("space before \".\" not allowed in \"a .\" at none:2") Meta.parse("\nusing a .b") -@test_throws ParseError("space before \"(\" not allowed in \"+ (\" at none:2") Meta.parse("\n+ (x, y)") +@test_parseerror "\nf() [i]" "space before \"[\" not allowed in \"f() [\" at none:2" +@test_parseerror "\nf() (i)" "space before \"(\" not allowed in \"f() (\" at none:2" +@test_parseerror "\nf() .i" "space before \".\" not allowed in \"f() .\" at none:2" +@test_parseerror "\nf() {i}" "space before \"{\" not allowed in \"f() {\" at none:2" +@test_parseerror "\n@ m" "space before \"m\" not allowed in \"@ m\" at none:2" +@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2" +@test_parseerror "\nusing a .b" "space before \".\" not allowed in \"a .\" at none:2" +@test_parseerror "\n+ (x, y)" "space before \"(\" not allowed in \"+ (\" at none:2" @test Meta.parse("1 -+(a=1, b=2)") == Expr(:call, :-, 1, Expr(:call, :+, Expr(:kw, :a, 1), Expr(:kw, :b, 2))) @@ -1397,7 +1406,7 @@ end @test Meta.parse("-√2") == Expr(:call, :-, Expr(:call, :√, 2)) @test Meta.parse("√3x^2") == Expr(:call, :*, Expr(:call, :√, 3), Expr(:call, :^, :x, 2)) @test Meta.parse("-3x^2") == Expr(:call, :*, -3, Expr(:call, :^, :x, 2)) -@test_throws ParseError Meta.parse("2!3") +@test_parseerror "2!3" # issue #27914 @test Meta.parse("2f(x)") == Expr(:call, :*, 2, Expr(:call, :f, :x)) @@ -1407,7 +1416,7 @@ end @test Meta.parse("2(x)") == Expr(:call, :*, 2, :x) @test Meta.parse("2(x)y") == Expr(:call, :*, 2, :x, :y) -@test_throws ParseError Meta.parse("a.: b") +@test_parseerror "a.: b" @test Meta.parse("a.:end") == Expr(:., :a, QuoteNode(:end)) @test Meta.parse("a.:catch") == Expr(:., :a, QuoteNode(:catch)) @test Meta.parse("a.end") == Expr(:., :a, QuoteNode(:end)) @@ -1423,7 +1432,7 @@ let len = 10 end # Module name cannot be a reserved word. -@test_throws ParseError Meta.parse("module module end") +@test_parseerror "module module end" @test Meta.lower(@__MODULE__, :(global true)) == Expr(:error, "invalid syntax in \"global\" declaration") @test Meta.lower(@__MODULE__, :(let ccall end)) == Expr(:error, "invalid identifier name \"ccall\"") @@ -1440,7 +1449,7 @@ end # issue #27690 # previously, this was allowed since it thought `end` was being used for indexing. # however the quote should disable that context. -@test_throws ParseError Meta.parse("Any[:(end)]") +@test_parseerror "Any[:(end)]" # issue #17781 let ex = Meta.lower(@__MODULE__, Meta.parse(" @@ -1671,20 +1680,20 @@ let x = @macroexpand @foo28244(var"let") end # #16356 -@test_throws ParseError Meta.parse("0xapi") +@test_parseerror "0xapi" # #22523 #22712 -@test_throws ParseError Meta.parse("a?b:c") -@test_throws ParseError Meta.parse("a ?b:c") -@test_throws ParseError Meta.parse("a ? b:c") -@test_throws ParseError Meta.parse("a ? b :c") -@test_throws ParseError Meta.parse("?") +@test_parseerror "a?b:c" +@test_parseerror "a ?b:c" +@test_parseerror "a ? b:c" +@test_parseerror "a ? b :c" +@test_parseerror "?" # #13079 @test Meta.parse("1<<2*3") == :((1<<2)*3) # #19987 -@test_throws ParseError Meta.parse("try ; catch f() ; end") +@test_parseerror "try ; catch f() ; end" # #23076 @test :([1,2;]) == Expr(:vect, Expr(:parameters), 1, 2) @@ -1721,8 +1730,8 @@ end @test Meta.lower(@__MODULE__, :(f(x) = (y = x + 1; ccall((:a, y), Cvoid, ())))) == Expr(:error, "ccall function name and library expression cannot reference local variables") -@test_throws ParseError Meta.parse("x.'") -@test_throws ParseError Meta.parse("0.+1") +@test_parseerror "x.'" +@test_parseerror "0.+1" # #24221 @test Meta.isexpr(Meta.lower(@__MODULE__, :(a=_)), :error) @@ -1816,7 +1825,7 @@ end @test Meta.parse("1⁝2") == Expr(:call, :⁝, 1, 2) @test Meta.parse("1..2") == Expr(:call, :.., 1, 2) # we don't parse chains of these since the associativity and meaning aren't clear -@test_throws ParseError Meta.parse("1..2..3") +@test_parseerror "1..2..3" # issue #30048 @test Meta.isexpr(Meta.lower(@__MODULE__, :(for a in b @@ -1990,9 +1999,9 @@ end @test Meta.parse("var\"#\"") === Symbol("#") @test Meta.parse("var\"true\"") === Symbol("true") @test Meta.parse("var\"false\"") === Symbol("false") -@test_throws ParseError Meta.parse("var\"#\"x") # Reject string macro-like suffix -@test_throws ParseError Meta.parse("var \"#\"") -@test_throws ParseError Meta.parse("var\"for\" i = 1:10; end") +@test_parseerror "var\"#\"x" # Reject string macro-like suffix +@test_parseerror "var \"#\"" +@test_parseerror "var\"for\" i = 1:10; end" # A few cases which would be ugly to deal with if var"#" were a string macro: @test Meta.parse("var\"#\".var\"a-b\"") == Expr(:., Symbol("#"), QuoteNode(Symbol("a-b"))) @test Meta.parse("export var\"#\"") == Expr(:export, Symbol("#")) @@ -2217,7 +2226,7 @@ end end # line break in : expression disallowed -@test_throws Meta.ParseError Meta.parse("[1 :\n2] == [1:2]") +@test_parseerror "[1 :\n2] == [1:2]" # added ⟂ to operator precedence (#24404) @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c) @@ -2238,7 +2247,8 @@ end end # only allow certain characters after interpolated vars (#25231) -@test Meta.parse("\"\$x෴ \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.") +@test_parseerror("\"\$x෴ \"", + "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.") @test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) === :string @testset "issue #30341" begin @@ -2277,14 +2287,11 @@ end err = Expr( :error, - "\":\" in \"$imprt\" syntax can only be used when importing a single module. " * - "Split imports into multiple lines." ) - ex = Meta.parse("$imprt A, B: x, y", raise=false) - @test ex == err - - ex = Meta.parse("$imprt A: x, B: y", raise=false) - @test ex == err + @test_parseerror("$imprt A, B: x, y", + "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.") + @test_parseerror("$imprt A: x, B: y", + "\":\" in \"$imprt\" syntax can only be used when importing a single module. Split imports into multiple lines.") end end @@ -2304,24 +2311,31 @@ let exc = try eval(:(f(x,x)=1)) catch e ; e ; end @test !occursin("incorrect_file", exc.msg) end -# issue #34967 -@test_throws LoadError("string", 2, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - "x34967 = 1\n# Halloa\xf5b\nx34967 = 2") -@test x34967 == 1 -@test_throws LoadError("string", 1, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - "x\xf5 = 3\n# Halloa\xf5b\nx34967 = 4") -@test_throws LoadError("string", 3, ErrorException("syntax: invalid UTF-8 sequence")) include_string(@__MODULE__, - """ - # line 1 - # line 2 - # Hello\xf5b - x34967 = 6 - """) - -@test Meta.parse("aa\u200b_", raise=false) == - Expr(:error, "invisible character \\u200b near column 3") -@test Meta.parse("aa\UE0080", raise=false) == - Expr(:error, "invalid character \"\Ue0080\" near column 3") +@testset "issue #34967" begin + @test_parseerror "#\xf5b\nx" "invalid UTF-8 sequence" + + # Test line UTF-8 errors with line numbers + let ex = Meta.parseall("x\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 4) && Meta.isexpr(last(ex.args), :error) + @test ex.args[3] == LineNumberNode(2,:none) + end + let ex = Meta.parseall("x\xf5\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error) + @test ex.args[1] == LineNumberNode(1,:none) + end + let ex = Meta.parseall("#line1\n#line2\n#\xf5b\ny") + @test Meta.isexpr(ex, :toplevel, 2) && Meta.isexpr(last(ex.args), :error) + @test ex.args[1] == LineNumberNode(3,:none) + end +end + +@test_parseerror "aa\u200b_" "invisible character \\u200b near column 3" +@test_parseerror "aa\UE0080" "invalid character \"\Ue0080\" near column 3" + +@testset "unrecognized escapes in string/char literals" begin + @test_parseerror "\"\\.\"" + @test_parseerror "\'\\.\'" +end # issue #31238 a31238, b31238 = let x @@ -2390,8 +2404,8 @@ end @test x == 6 # issue #36196 -@test_throws ParseError("\"for\" at none:1 expected \"end\", got \")\"") Meta.parse("(for i=1; println())") -@test_throws ParseError("\"try\" at none:1 expected \"end\", got \")\"") Meta.parse("(try i=1; println())") +@test_parseerror "(for i=1; println())" "\"for\" at none:1 expected \"end\", got \")\"" +@test_parseerror "(try i=1; println())" "\"try\" at none:1 expected \"end\", got \")\"" # issue #36272 macro m36272() @@ -2438,10 +2452,10 @@ end let (-->) = (+) @test (40 --> 2) == 42 end -@test_throws ParseError("invalid operator \"<---\"") Meta.parse("1<---2") -@test_throws ParseError("invalid operator \".<---\"") Meta.parse("1 .<--- 2") -@test_throws ParseError("invalid operator \"--\"") Meta.parse("a---b") -@test_throws ParseError("invalid operator \".--\"") Meta.parse("a.---b") +@test_parseerror("1<---2", "invalid operator \"<---\"") +@test_parseerror("1 .<--- 2", "invalid operator \".<---\"") +@test_parseerror("a---b", "invalid operator \"--\"") +@test_parseerror("a.---b", "invalid operator \".--\"") # issue #37228 # NOTE: the `if` needs to be at the top level @@ -2476,15 +2490,14 @@ end @test :(if true 'a' else 1 end) == Expr(:if, true, quote 'a' end, quote 1 end) # issue #37664 -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==#b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a #==#b") -@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==# b") - -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==#2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 #==#2") -@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==# 2") +@test_parseerror("a b", "extra token \"b\" after end of expression") +@test_parseerror("a#==#b", "extra token \"b\" after end of expression") +@test_parseerror("a #==#b", "extra token \"b\" after end of expression") +@test_parseerror("a#==# b", "extra token \"b\" after end of expression") +@test_parseerror("1 2", "extra token \"2\" after end of expression") +@test_parseerror("1#==#2", "extra token \"2\" after end of expression") +@test_parseerror("1 #==#2", "extra token \"2\" after end of expression") +@test_parseerror("1#==# 2", "extra token \"2\" after end of expression") @test size([1#==#2#==#3]) == size([1 2 3]) @test size([1#==#2#==#3]) == size([1 2 3]) # tabs @@ -2507,9 +2520,7 @@ end Meta.parse("if#==#x0#==#y+1#==#else#==#z#==#end") @test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end") @test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c") -@test_throws ParseError("space before \"(\" not allowed in \"f (\" at none:1") begin - Meta.parse("f#==#(x)=x") -end +@test_parseerror("f#==#(x)=x", "space before \"(\" not allowed in \"f (\" at none:1") @test Meta.parse("try f() catch e g() finally h() end") == Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end") @test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b") @@ -2541,11 +2552,11 @@ end @test B37890(1.0, 2.0f0) isa B37890{Int, Int8} # import ... as -@test_throws ParseError("invalid syntax \"using A as ...\"") Meta.parse("using A as B") -@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using A.b as B") -@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using X, A.b as B") -@test_throws ParseError("invalid syntax \"import A as B:\"") Meta.parse("import A as B: c") -@test_throws ParseError("invalid syntax \"import A.b as B:\"") Meta.parse("import A.b as B: c") +@test_parseerror("using A as B", "invalid syntax \"using A as ...\"") +@test_parseerror("using A.b as B", "invalid syntax \"using A.b as ...\"") +@test_parseerror("using X, A.b as B", "invalid syntax \"using A.b as ...\"") +@test_parseerror("import A as B: c", "invalid syntax \"import A as B:\"") +@test_parseerror("import A.b as B: c", "invalid syntax \"import A.b as B:\"") module TestImportAs using Test @@ -2584,7 +2595,9 @@ import .Mod2.y as y2 @test y2 == 2 @test !@isdefined(y) -@test_throws ErrorException eval(:(import .Mod.x as (a.b))) +# Test that eval rejects the invalid syntax `import .Mod.x as (a.b)` +@test_throws ErrorException eval( + Expr(:import, Expr(:as, Expr(:., :., :Mod, :x), Expr(:., :a, QuoteNode(:b))))) import .Mod.maybe_undef as mu @test_throws UndefVarError mu @@ -2662,10 +2675,10 @@ end @test Meta.isexpr(Meta.parse(""" f(i for i in 1:3)""").args[2], :generator) - @test_throws Meta.ParseError Meta.parse(""" + @test_parseerror """ for i in 1:3 - end""") + end""" end # PR #37973 @@ -2820,7 +2833,7 @@ end Expr(:nrow, 1, Expr(:row, 0, 9, 3), Expr(:row, 4, 5, 4))) @test :([1 ; 2 ;; 3 ; 4]) == Expr(:ncat, 2, Expr(:nrow, 1, 1, 2), Expr(:nrow, 1, 3, 4)) - @test_throws ParseError Meta.parse("[1 2 ;; 3 4]") # cannot mix spaces and ;; except as line break + @test_parseerror "[1 2 ;; 3 4]" # cannot mix spaces and ;; except as line break @test :([1 2 ;; 3 4]) == :([1 2 3 4]) @test :([1 2 ;; @@ -2830,8 +2843,8 @@ end @test Meta.parse("[1;\n\n]") == :([1;]) @test Meta.parse("[1\n;]") == :([1;]) # semicolons following a linebreak are fine @test Meta.parse("[1\n;;; 2]") == :([1;;; 2]) - @test_throws ParseError Meta.parse("[1;\n;2]") # semicolons cannot straddle a line break - @test_throws ParseError Meta.parse("[1; ;2]") # semicolons cannot be separated by a space + @test_parseerror "[1;\n;2]" # semicolons cannot straddle a line break + @test_parseerror "[1; ;2]" # semicolons cannot be separated by a space end # issue #25652 @@ -3104,10 +3117,10 @@ end @test fails(error) @test !fails(() -> 1 + 2) - @test_throws ParseError Meta.parse("try foo() else bar() end") - @test_throws ParseError Meta.parse("try foo() else bar() catch; baz() end") - @test_throws ParseError Meta.parse("try foo() catch; baz() finally foobar() else bar() end") - @test_throws ParseError Meta.parse("try foo() finally foobar() else bar() catch; baz() end") + @test_parseerror "try foo() else bar() end" + @test_parseerror "try foo() else bar() catch; baz() end" + @test_parseerror "try foo() catch; baz() finally foobar() else bar() end" + @test_parseerror "try foo() finally foobar() else bar() catch; baz() end" err = try try @@ -3172,23 +3185,23 @@ end @test x == 1 end -@test_throws ParseError Meta.parse(""" +@test_parseerror """ function checkUserAccess(u::User) if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066" return true end return false end -""") +""" -@test_throws ParseError Meta.parse(""" +@test_parseerror """ function checkUserAccess(u::User) #=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =# return true #= end admin only \u202e \u2066end\u2069 \u2066=# return false end -""") +""" @testset "empty nd arrays" begin @test :([]) == Expr(:vect) @@ -3219,9 +3232,9 @@ end ;; ]) == Expr(:ncat, 2) - @test_throws ParseError Meta.parse("[; ;]") - @test_throws ParseError Meta.parse("[;; ;]") - @test_throws ParseError Meta.parse("[;\n;]") + @test_parseerror "[; ;]" + @test_parseerror "[;; ;]" + @test_parseerror "[;\n;]" end @test Meta.parseatom("@foo", 1; filename="foo", lineno=7) == (Expr(:macrocall, :var"@foo", LineNumberNode(7, :foo)), 5) @@ -3415,14 +3428,12 @@ f45162(f) = f(x=1) @test first(methods(f45162)).called != 0 # issue #45024 -@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x") -@test_throws ParseError("expected assignment after \"const\"") Meta.parse("const x::Int") +@test_parseerror "const x" "expected assignment after \"const\"" +@test_parseerror "const x::Int" "expected assignment after \"const\"" # these cases have always been caught during lowering, since (const (global x)) is not # ambiguous with the lowered form (const x), but that could probably be changed. -@test Meta.lower(@__MODULE__, :(global const x)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(global const x::Int)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(const global x)) == Expr(:error, "expected assignment after \"const\"") -@test Meta.lower(@__MODULE__, :(const global x::Int)) == Expr(:error, "expected assignment after \"const\"") +@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, :x))) == Expr(:error, "expected assignment after \"const\"") +@test Meta.lower(@__MODULE__, Expr(:const, Expr(:global, Expr(:(::), :x, :Int)))) == Expr(:error, "expected assignment after \"const\"") @testset "issue 25072" begin @test '\xc0\x80' == reinterpret(Char, 0xc0800000) From 65523e459daa455500202196abedc64bce77bef3 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Sat, 17 Jun 2023 20:46:45 +0000 Subject: [PATCH 192/290] Add some JIT docs (#50168) --- doc/src/devdocs/jit.md | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 doc/src/devdocs/jit.md diff --git a/doc/src/devdocs/jit.md b/doc/src/devdocs/jit.md new file mode 100644 index 0000000000000..f33b968ad3948 --- /dev/null +++ b/doc/src/devdocs/jit.md @@ -0,0 +1,78 @@ +# JIT Design and Implementation + +This document explains the design and implementation of Julia's JIT, after codegen has finished and unoptimized LLVM IR has been produced. The JIT is responsible for optimizing and compiling this IR to machine code, and for linking it into the current process and making the code available for execution. + +## Introduction + +The JIT is responsible for managing compilation resources, looking up previously compiled code, and compiling new code. It is primarily built on LLVM's [On-Request-Compilation](https://llvm.org/docs/ORCv2.html) (ORCv2) technology, which provides support for a number of useful features such as concurrent compilation, lazy compilation, and the ability to compile code in a separate process. Though LLVM provides a basic JIT compiler in the form of LLJIT, Julia uses many ORCv2 APIs directly to create its own custom JIT compiler. + +## Overview + +![Diagram of the compiler flow](./img/compiler_diagram.png) + +Codegen produces an LLVM module containing IR for one or more Julia functions from the original Julia SSA IR produced by type inference (labeled as translate on the compiler diagram above). It also produces a mapping of code-instance to LLVM function name. However, though some optimizations have been applied by the Julia-based compiler on Julia IR, the LLVM IR produced by codegen still contains many opportunities for optimization. Thus, the first step the JIT takes is to run a target-independent optimization pipeline[^tdp] on the LLVM module. Then, the JIT runs a target-dependent optimization pipeline, which includes target-specific optimizations and code generation, and outputs an object file. Finally, the JIT links the resulting object file into the current process and makes the code available for execution. All of this is controlled by code in `src/jitlayers.cpp`. + +[^tdp]: This is not a totally-target independent pipeline, as transformations such as vectorization rely upon target information such as vector register width and cost modeling. Additionally, codegen itself makes a few target-dependent assumptions, and the optimization pipeline will take advantage of that knowledge. + +Currently, only one thread at a time is permitted to enter the optimize-compile-link pipeline at a time, due to restrictions imposed by one of our linkers (RuntimeDyld). However, the JIT is designed to support concurrent optimization and compilation, and the linker restriction is expected to be lifted in the future when RuntimeDyld has been fully superseded on all platforms. + +## Optimization Pipeline + +The optimization pipeline is based off LLVM's new pass manager, but the pipeline is customized for Julia's needs. The pipeline is defined in `src/pipeline.cpp`, and broadly proceeds through a number of stages as detailed below. + +1. Early Simplification + 1. These passes are mainly used to simplify the IR and canonicalize patterns so that later passes can identify those patterns more easily. Additionally, various intrinsic calls such as branch prediction hints and annotations are lowered into other metadata or other IR features. [`SimplifyCFG`](https://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg) (simplify control flow graph), [`DCE`](https://llvm.org/docs/Passes.html#dce-dead-code-elimination) (dead code elimination), and [`SROA`](https://llvm.org/docs/Passes.html#sroa-scalar-replacement-of-aggregates) (scalar replacement of aggregates) are some of the key players here. +2. Early Optimization + 1. These passes are typically cheap and are primarily focused around reducing the number of instructions in the IR and propagating knowledge to other instructions. For example, [`EarlyCSE`](https://en.wikipedia.org/wiki/Common_subexpression_elimination) is used to perform common subexpression elimination, and [`InstCombine`](https://llvm.org/docs/Passes.html#instcombine-combine-redundant-instructions) and [`InstSimplify`](https://llvm.org/doxygen/classllvm_1_1InstSimplifyPass.html#details) perform a number of small peephole optimizations to make operations less expensive. +3. Loop Optimization + 1. These passes canonicalize and simplify loops. Loops are often hot code, which makes loop optimization extremely important for performance. Key players here include [`LoopRotate`](https://llvm.org/docs/Passes.html#loop-rotate-rotate-loops), [`LICM`](https://llvm.org/docs/Passes.html#licm-loop-invariant-code-motion), and [`LoopFullUnroll`](https://llvm.org/docs/Passes.html#loop-unroll-unroll-loops). Some bounds check elimination also happens here, as a result of the [`IRCE`](https://llvm.org/doxygen/InductiveRangeCheckElimination_8cpp_source.html) pass which can prove certain bounds are never exceeded. +4. Scalar Optimization + 1. The scalar optimization pipeline contains a number of more expensive, but more powerful passes such as [`GVN`](https://llvm.org/docs/Passes.html#gvn-global-value-numbering) (global value numbering), [`SCCP`](https://llvm.org/docs/Passes.html#sccp-sparse-conditional-constant-propagation) (sparse conditional constant propagation), and another round of bounds check elimination. These passes are expensive, but they can often remove large amounts of code and make vectorization much more successful and effective. Several other simplification and optimization passes intersperse the more expensive ones to reduce the amount of work they have to do. +5. Vectorization + 1. [Automatic vectorization](https://en.wikipedia.org/wiki/Automatic_vectorization) is an extremely powerful transformation for CPU-intensive code. Briefly, vectorization allows execution of a [single instruction on multiple data](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) (SIMD), e.g. performing 8 addition operations at the same time. However, proving code to be both capable of vectorization and profitable to vectorize is difficult, and this relies heavily on the prior optimization passes to massage the IR into a state where vectorization is worth it. +6. Intrinsic Lowering + 1. Julia inserts a number of custom intrinsics, for reasons such as object allocation, garbage collection, and exception handling. These intrinsics were originally placed to make optimization opportunities more obvious, but they are now lowered into LLVM IR to enable the IR to be emitted as machine code. +7. Cleanup + 1. These passes are last-chance optimizations, and perform small optimizations such as fused multiply-add propagation and division-remainder simplification. Additionally, targets that do not support half-precision floating point numbers will have their half-precision instructions lowered into single-precision instructions here, and passes are added to provide sanitizer support. + +## Target-Dependent Optimization and Code Generation + +LLVM provides target-dependent optimization and machine code generation in the same pipeline, located in the TargetMachine for a given platform. These passes include instruction selection, instruction scheduling, register allocation, and machine code emission. The LLVM documentation provides a good overview of the process, and the LLVM source code is the best place to look for details on the pipeline and passes. + +## Linking + +Currently, Julia is transitioning between two linkers: the older RuntimeDyld linker, and the newer [JITLink](https://llvm.org/docs/JITLink.html) linker. JITLink contains a number of features that RuntimeDyld does not have, such as concurrent and reentrant linking, but currently lacks good support for profiling integrations and does not yet support all of the platforms that RuntimeDyld supports. Over time, JITLink is expected to replace RuntimeDyld entirely. Further details on JITLink can be found in the LLVM documentation. + +## Execution + +Once the code has been linked into the current process, it is available for execution. This fact is made known to the generating codeinst by updating the `invoke`, `specsigflags`, and `specptr` fields appropriately. Codeinsts support upgrading `invoke`, `specsigflags`, and `specptr` fields, so long as every combination of these fields that exists at any given point in time is valid to be called. This allows the JIT to update these fields without invalidating existing codeinsts, supporting a potential future concurrent JIT. Specifically, the following states may be valid: +1. `invoke` is NULL, `specsigflags` is 0b00, `specptr` is NULL + 1. This is the initial state of a codeinst, and indicates that the codeinst has not yet been compiled. +2. `invoke` is non-null, `specsigflags` is 0b00, `specptr` is NULL + 1. This indicates that the codeinst was not compiled with any specialization, and that the codeinst should be invoked directly. Note that in this instance, `invoke` does not read either the `specsigflags` or `specptr` fields, and therefore they may be modified without invalidating the `invoke` pointer. +3. `invoke` is non-null, `specsigflags` is 0b10, `specptr` is non-null + 1. This indicates that the codeinst was compiled, but a specialized function signature was deemed unnecessary by codegen. +4. `invoke` is non-null, `specsigflags` is 0b11, `specptr` is non-null + 1. This indicates that the codeinst was compiled, and a specialized function signature was deemed necessary by codegen. The `specptr` field contains a pointer to the specialized function signature. The `invoke` pointer is permitted to read both `specsigflags` and `specptr` fields. + +In addition, there are a number of different transitional states that occur during the update process. To account for these potential situations, the following write and read patterns should be used when dealing with these codeinst fields. + +1. When writing `invoke`, `specsigflags`, and `specptr`: + 1. Perform an atomic compare-exchange operation of specptr assuming the old value was NULL. This compare-exchange operation should have at least acquire-release ordering, to provide ordering guarantees of the remaining memory operations in the write. + 2. If `specptr` was non-null, cease the write operation and wait for bit 0b10 of `specsigflags` to be written. + 3. Write the new low bit of `specsigflags` to its final value. This may be a relaxed write. + 4. Write the new `invoke` pointer to its final value. This must have at least a release memory ordering to synchronize with reads of `invoke`. + 5. Set the second bit of `specsigflags` to 1. This must be at least a release memory ordering to synchronize with reads of `specsigflags`. This step completes the write operation and announces to all other threads that all fields have been set. +2. When reading all of `invoke`, `specsigflags`, and `specptr`: + 1. Read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `initial_invoke`. + 2. If `initial_invoke` is NULL, the codeinst is not yet executable. `invoke` is NULL, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL. + 3. Read the `specptr` field with at least an acquire memory ordering. + 4. If `specptr` is NULL, then the `initial_invoke` pointer must not be relying on `specptr` to guarantee correct execution. Therefore, `invoke` is non-null, `specsigflags` may be treated as 0b00, `specptr` may be treated as NULL. + 5. If `specptr` is non-null, then `initial_invoke` might not be the final `invoke` field that uses `specptr`. This can occur if `specptr` has been written, but `invoke` has not yet been written. Therefore, spin on the second bit of `specsigflags` until it is set to 1 with at least acquire memory ordering. + 6. Re-read the `invoke` field with at least an acquire memory ordering. This load will be referred to as `final_invoke`. + 7. Read the `specsigflags` field with any memory ordering. + 8. `invoke` is `final_invoke`, `specsigflags` is the value read in step 7, `specptr` is the value read in step 3. +3. When updating a `specptr` to a different but equivalent function pointer: + 1. Perform a release store of the new function pointer to `specptr`. Races here must be benign, as the old function pointer is required to still be valid, and any new ones are also required to be valid as well. Once a pointer has been written to `specptr`, it must always be callable whether or not it is later overwritten. + +Although these write, read, and update steps are complicated, they ensure that the JIT can update codeinsts without invalidating existing codeinsts, and that the JIT can update codeinsts without invalidating existing `invoke` pointers. This allows the JIT to potentially reoptimize functions at higher optimization levels in the future, and also will allow the JIT to support concurrent compilation of functions in the future. From 41e8d7f8e273f2e05187a80c03b2720ac785233e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Sun, 18 Jun 2023 17:42:04 +0100 Subject: [PATCH 193/290] [build] Allow automatically applying Julia's patches to LLVM source --- deps/llvm.mk | 15 +++++++++++++++ deps/llvm.version | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/deps/llvm.mk b/deps/llvm.mk index 83b9a66ec608e..37f8f9d6ea7e7 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -239,6 +239,21 @@ endif # declare that all patches must be applied before running ./configure $(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV) +# Apply Julia's specific patches if requested, e.g. if not using Julia's fork of LLVM. +ifeq ($(LLVM_APPLY_JULIA_PATCHES), 1) +# Download Julia's patchset. +$(BUILDDIR)/julia-patches.patch: + $(JLDOWNLOAD) $@ $(LLVM_JULIA_DIFF_GITHUB_REPO)/compare/$(LLVM_BASE_REF)...$(LLVM_JULIA_REF).diff + +# Apply the patch. +$(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied: $(BUILDDIR)/julia-patches.patch $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted + cd $(SRCCACHE)/$(LLVM_SRC_DIR) && patch -p1 < $(realpath $<) + echo 1 > $@ + +# Require application of Julia's patchset before configuring LLVM. +$(LLVM_BUILDDIR_withtype)/build-configured: | $(SRCCACHE)/$(LLVM_SRC_DIR)/julia-patches.patch-applied +endif + $(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted mkdir -p $(dir $@) cd $(dir $@) && \ diff --git a/deps/llvm.version b/deps/llvm.version index e35db3bd6aed2..4e7969994141e 100644 --- a/deps/llvm.version +++ b/deps/llvm.version @@ -1,7 +1,23 @@ +# -*- makefile -*- + ## jll artifact LLVM_JLL_NAME := libLLVM LLVM_ASSERT_JLL_VER := 15.0.7+5 ## source build +# Version number of LLVM LLVM_VER := 15.0.7 +# Git branch name in `LLVM_GIT_URL` repository LLVM_BRANCH=julia-15.0.7-5 +# Git ref in `LLVM_GIT_URL` repository LLVM_SHA1=julia-15.0.7-5 + +## Following options are used to automatically fetch patchset from Julia's fork. This is +## useful if you want to build an external LLVM while still applying Julia's patches. +# Set to 1 if you want to automatically apply Julia's patches to a different fork of LLVM. +LLVM_APPLY_JULIA_PATCHES := 0 +# GitHub repository to use for fetching the Julia patches to apply to LLVM source code. +LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project +# Base GitHub ref for generating the diff. +LLVM_BASE_REF := llvm:llvmorg-15.0.7 +# Julia fork's GitHub ref for generating the diff. +LLVM_JULIA_REF := JuliaLang:julia-15.0.7-5 From 90521dd63913aafc08eabbe2f4f8e0806cfeecb9 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Mon, 19 Jun 2023 03:54:21 +0000 Subject: [PATCH 194/290] Emphasize the default base of multiversioning in the documentation, and discourage the implicit default base (#50211) --- doc/src/devdocs/sysimg.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md index 6706e30ce97b1..40fcd3fa602f8 100644 --- a/doc/src/devdocs/sysimg.md +++ b/doc/src/devdocs/sysimg.md @@ -42,6 +42,9 @@ All features supported by LLVM are supported and a feature can be disabled with (`+` prefix is also allowed and ignored to be consistent with LLVM syntax). Additionally, a few special features are supported to control the function cloning behavior. +!!! note + It is good practice to specify either `clone_all` or `base()` for every target apart from the first one. This makes it explicit which targets have all functions cloned, and which targets are based on other targets. If this is not done, the default behavior is to not clone every function, and to use the first target's function definition as the fallback when not cloning a function. + 1. `clone_all` By default, only functions that are the most likely to benefit from From f555b4b67d44986dc471f08fd66db40805f2f22d Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Jun 2023 12:03:23 -0400 Subject: [PATCH 195/290] Tweak cache pidlocking (#50214) --- base/loading.jl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index f5c7aa28395ef..b9742ec045b19 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2819,11 +2819,20 @@ global mkpidlock_hook global trymkpidlock_hook global parse_pidfile_hook -# allows processes to wait if another process is precompiling a given source already -function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String) +# The preferences hash is only known after precompilation so just assume no preferences +# meaning that if all other conditions are equal, the same package cannot be precompiled +# with different preferences at the same time. +compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0)) * ".pidfile" + +# Allows processes to wait if another process is precompiling a given source already. +# The lock file is deleted and precompilation will proceed after `stale_age` seconds if +# - the locking process no longer exists +# - the lock is held by another host, since processes cannot be checked remotely +# or after `stale_age * 25` seconds if it does still exist. +function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=60) if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook) - pidfile = string(srcpath, ".pidlock") - cachefile = invokelatest(trymkpidlock_hook, f, pidfile) + pidfile = compilecache_pidfile_path(pkg) + cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age) if cachefile === false pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile) verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug @@ -2834,7 +2843,7 @@ function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String) end # wait until the lock is available, but don't actually acquire it # returning nothing indicates a process waited for another - return invokelatest(mkpidlock_hook, Returns(nothing), pidfile) + return invokelatest(mkpidlock_hook, Returns(nothing), pidfile; stale_age) end return cachefile else From da9cc1a39276451f7934b360fa4e68c936643482 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Mon, 19 Jun 2023 16:45:41 +0000 Subject: [PATCH 196/290] Expose the newpm pipeline to a C api (#50210) --- src/codegen-stubs.c | 3 +++ src/jl_exported_funcs.inc | 1 + src/pipeline.cpp | 31 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 0853a090183dd..f2c8c705bd3dc 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -109,6 +109,9 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr) JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE +JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size, + int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE + JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 898656c142480..33b431fe12a76 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -558,6 +558,7 @@ YY(jl_getUnwindInfo) \ YY(jl_get_libllvm) \ YY(jl_add_optimization_passes) \ + YY(jl_build_newpm_pipeline) \ YY(LLVMExtraAddLowerSimdLoopPass) \ YY(LLVMExtraAddFinalLowerGCPass) \ YY(LLVMExtraAddPropagateJuliaAddrspaces) \ diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 6afcac5ea8b7c..06d5aa2d3bfa8 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -571,6 +571,37 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL MPM.addPass(AfterOptimizationMarkerPass()); } +extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size, + int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT +{ + OptimizationLevel O; + switch (Size) { + case 1: + O = OptimizationLevel::Os; + break; + default: + O = OptimizationLevel::Oz; + break; + case 0: + switch (Speedup) { + case 0: + O = OptimizationLevel::O0; + break; + case 1: + O = OptimizationLevel::O1; + break; + case 2: + O = OptimizationLevel::O2; + break; + default: + O = OptimizationLevel::O3; + break; + } + } + buildPipeline(*reinterpret_cast(MPM), reinterpret_cast(PB), O, + OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only}); +} + #undef JULIA_PASS namespace { From 690a5f67c13fd23c7b48e60c31bfa565c0eee861 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 19 Jun 2023 15:23:17 -0400 Subject: [PATCH 197/290] Thread through lattice in a couple more places (#50216) Now that the lattice code has stabilized, we should probably consider getting rid of the fallback methods to catch all the remaining places where we're relying on it (and to prevent any new ones from being introduced), but for now, just fix two cases I happened to run into. --- base/compiler/abstractinterpretation.jl | 2 +- base/compiler/typelimits.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 5fe0014ef3e60..3aa2366b48aa3 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1588,7 +1588,7 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si:: call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods) seen += 1 push!(retinfos, ApplyCallInfo(call.info, arginfo)) - res = tmerge(res, call.rt) + res = tmerge(typeinf_lattice(interp), res, call.rt) effects = merge_effects(effects, call.effects) if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv) add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.") diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 957796f6f5c49..11b09aeacead5 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -385,7 +385,7 @@ function tmerge(lattice::OptimizerLattice, @nospecialize(typea), @nospecialize(t # type-lattice for MaybeUndef wrapper if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef) - return MaybeUndef(tmerge( + return MaybeUndef(tmerge(widenlattice(lattice), isa(typea, MaybeUndef) ? typea.typ : typea, isa(typeb, MaybeUndef) ? typeb.typ : typeb)) end From 0da46e25c865a390b5c2de20c2d40afb41fcac0a Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Tue, 20 Jun 2023 02:23:18 +0000 Subject: [PATCH 198/290] Name LLVM variables from codegen (#50094) --- src/aotcompile.cpp | 11 +++ src/ccall.cpp | 57 +++++++++++++- src/cgutils.cpp | 145 ++++++++++++++++++++++++++++++------ src/codegen.cpp | 138 ++++++++++++++++++++++++++++------ src/intrinsics.cpp | 110 +++++++++++++++++++-------- src/jitlayers.cpp | 4 + src/jitlayers.h | 1 + test/cmdlineargs.jl | 51 +++++++------ test/llvmpasses/llvmcall.jl | 101 +++++++++++++++++++++++-- 9 files changed, 506 insertions(+), 112 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index cf6378b4f926b..b1bef232e0915 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -305,6 +305,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second)); params.params = cgparams; params.imaging = imaging; + params.debug_level = jl_options.debug_level; params.external_linkage = _external_linkage; size_t compile_for[] = { jl_typeinf_world, _world }; for (int worlds = 0; worlds < 2; worlds++) { @@ -2082,6 +2083,16 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz jl_codegen_params_t output(*ctx, std::move(target_info.first), std::move(target_info.second)); output.world = world; output.params = ¶ms; + output.imaging = imaging_default(); + // This would be nice, but currently it causes some assembly regressions that make printed output + // differ very significantly from the actual non-imaging mode code. + // // Force imaging mode for names of pointers + // output.imaging = true; + // This would also be nice, but it seems to cause OOMs on the windows32 builder + // // Force at least medium debug info for introspection + // No debug info = no variable names, + // max debug info = llvm.dbg.declare/value intrinsics which clutter IR output + output.debug_level = jl_options.debug_level; auto decls = jl_emit_code(m, mi, src, jlrettype, output); JL_UNLOCK(&jl_codegen_lock); // Might GC diff --git a/src/ccall.cpp b/src/ccall.cpp index 90f7417c03524..47496a3a91ba6 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -135,6 +135,7 @@ static Value *runtime_sym_lookup( BasicBlock *ccall_bb = BasicBlock::Create(irbuilder.getContext(), "ccall"); Constant *initnul = ConstantPointerNull::get(T_pvoidfunc); LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, Align(sizeof(void*))); + setName(emission_context, llvmf_orig, f_name + StringRef(".cached")); // This in principle needs a consume ordering so that load from // this pointer sees a valid value. However, this is not supported by // LLVM (or agreed on in the C/C++ standard FWIW) and should be @@ -143,8 +144,10 @@ static Value *runtime_sym_lookup( // invalid load from the `cglobal` but doesn't depend on the `cglobal` // value for this to happen. llvmf_orig->setAtomic(AtomicOrdering::Unordered); + auto nonnull = irbuilder.CreateICmpNE(llvmf_orig, initnul); + setName(emission_context, nonnull, "is_cached"); irbuilder.CreateCondBr( - irbuilder.CreateICmpNE(llvmf_orig, initnul), + nonnull, ccall_bb, dlsym_lookup); @@ -170,6 +173,7 @@ static Value *runtime_sym_lookup( llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func), { libname, nameval, libptrgv }); } + setName(emission_context, llvmf, f_name + StringRef(".found")); StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, Align(sizeof(void*))); store->setAtomic(AtomicOrdering::Release); irbuilder.CreateBr(ccall_bb); @@ -179,6 +183,7 @@ static Value *runtime_sym_lookup( PHINode *p = irbuilder.CreatePHI(T_pvoidfunc, 2); p->addIncoming(llvmf_orig, enter_bb); p->addIncoming(llvmf, llvmf->getParent()); + setName(emission_context, p, f_name); return irbuilder.CreateBitCast(p, funcptype); } @@ -320,6 +325,7 @@ static Value *emit_plt( } GlobalVariable *got = prepare_global_in(jl_Module, sharedgot); LoadInst *got_val = ctx.builder.CreateAlignedLoad(got->getValueType(), got, Align(sizeof(void*))); + setName(ctx.emission_context, got_val, f_name); // See comment in `runtime_sym_lookup` above. This in principle needs a // consume ordering too. This is even less likely to cause issues though // since the only thing we do to this loaded pointer is to call it @@ -442,16 +448,20 @@ static Value *llvm_type_rewrite( unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type)); if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { to = emit_static_alloca(ctx, target_type); + setName(ctx.emission_context, to, "type_rewrite_buffer"); cast(to)->setAlignment(Align(align)); from = emit_bitcast(ctx, to, from_type->getPointerTo()); } else { from = emit_static_alloca(ctx, from_type); + setName(ctx.emission_context, from, "type_rewrite_buffer"); cast(from)->setAlignment(Align(align)); to = emit_bitcast(ctx, from, target_type->getPointerTo()); } ctx.builder.CreateAlignedStore(v, from, Align(align)); - return ctx.builder.CreateAlignedLoad(target_type, to, Align(align)); + auto pun = ctx.builder.CreateAlignedLoad(target_type, to, Align(align)); + setName(ctx.emission_context, pun, "type_rewrite"); + return pun; } // --- argument passing and scratch space utilities --- @@ -508,6 +518,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val Value *istype = ctx.builder.CreateICmpNE( ctx.builder.CreateCall(prepare_call(jlisa_func), { vx, boxed(ctx, jlto_runtime) }), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)); + setName(ctx.emission_context, istype, "istype"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass", ctx.f); ctx.builder.CreateCondBr(istype, passBB, failBB); @@ -545,6 +556,7 @@ static Value *julia_to_native( // pass the address of an alloca'd thing, not a box // since those are immutable. Value *slot = emit_static_alloca(ctx, to); + setName(ctx.emission_context, slot, "native_convert_buffer"); if (!jvinfo.ispointer()) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot)); @@ -1009,6 +1021,7 @@ static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_ unsigned nb = DL.getTypeStoreSize(result->getType()); MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut; Value *strct = emit_allocobj(ctx, nb, runtime_dt); + setName(ctx.emission_context, strct, "ccall_result_box"); init_bits_value(ctx, strct, result, tbaa); return strct; } @@ -1253,6 +1266,7 @@ static bool verify_ref_type(jl_codectx_t &ctx, jl_value_t* ref, jl_unionall_t *u Value *notany = ctx.builder.CreateICmpNE( boxed(ctx, runtime_sp), track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_any_type))); + setName(ctx.emission_context, notany, "any_type.not"); error_unless(ctx, notany, make_errmsg(fname, n, rt_err_msg_notany)); always_error = false; } @@ -1587,7 +1601,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext())); const int tid_offset = offsetof(jl_task_t, tid); Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(ctx.types().T_size, tid_offset / sizeof(int16_t))); + setName(ctx.emission_context, ptid, "thread_id_ptr"); LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t))); + setName(ctx.emission_context, tid, "thread_id"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); ai.decorateInst(tid); return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt); @@ -1601,15 +1617,19 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext())); const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited); Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(ctx.types().T_size, finh_offset / 4)); + setName(ctx.emission_context, pfinh, "finalizers_inhibited_ptr"); LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t))); + setName(ctx.emission_context, finh, "finalizers_inhibited"); Value *newval; if (is_libjulia_func(jl_gc_disable_finalizers_internal)) { newval = ctx.builder.CreateAdd(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, newval, "finalizers_inhibited_inc"); } else { newval = ctx.builder.CreateSelect(ctx.builder.CreateICmpEQ(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0), ctx.builder.CreateSub(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1))); + setName(ctx.emission_context, newval, "finalizers_inhibited_dec"); } ctx.builder.CreateStore(newval, pfinh); return ghostValue(ctx, jl_nothing_type); @@ -1630,6 +1650,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue); const int nt_offset = offsetof(jl_tls_states_t, next_task); Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(ctx.types().T_size, nt_offset / sizeof(void*))); + setName(ctx.emission_context, pnt, "next_task_ptr"); ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt); return ghostValue(ctx, jl_nothing_type); } @@ -1640,8 +1661,11 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); Value *pdefer_sig = emit_defer_signal(ctx); + setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr"); Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig); + setName(ctx.emission_context, defer_sig, "defer_signal"); defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1)); + setName(ctx.emission_context, defer_sig, "defer_signal_inc"); ctx.builder.CreateStore(defer_sig, pdefer_sig); emit_signal_fence(ctx); return ghostValue(ctx, jl_nothing_type); @@ -1653,7 +1677,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); ctx.builder.CreateCall(prepare_call(gcroot_flush_func)); Value *pdefer_sig = emit_defer_signal(ctx); + setName(ctx.emission_context, pdefer_sig, "defer_signal_ptr"); Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig); + setName(ctx.emission_context, defer_sig, "defer_signal"); emit_signal_fence(ctx); error_unless(ctx, ctx.builder.CreateICmpNE(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)), @@ -1661,19 +1687,23 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) defer_sig = ctx.builder.CreateSub( defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1)); + setName(ctx.emission_context, defer_sig, "defer_signal_dec"); ctx.builder.CreateStore(defer_sig, pdefer_sig); BasicBlock *checkBB = BasicBlock::Create(ctx.builder.getContext(), "check", ctx.f); BasicBlock *contBB = BasicBlock::Create(ctx.builder.getContext(), "cont"); + auto not_deferred = ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)); + setName(ctx.emission_context, not_deferred, "deferred.not"); ctx.builder.CreateCondBr( - ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)), + not_deferred, checkBB, contBB); ctx.builder.SetInsertPoint(checkBB); - ctx.builder.CreateLoad( + auto signal_page_load = ctx.builder.CreateLoad( ctx.types().T_size, ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1), true); + setName(ctx.emission_context, signal_page_load, "signal_page_load"); ctx.builder.CreateBr(contBB); ctx.f->getBasicBlockList().push_back(contBB); ctx.builder.SetInsertPoint(contBB); @@ -1690,7 +1720,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) } else { auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_size->getPointerTo()); + setName(ctx.emission_context, ptr, "svec_len_ptr"); len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ptr, ctx.types().alignof_ptr); + setName(ctx.emission_context, len, "svec_len"); // Only mark with TBAA if we are sure about the type. // This could otherwise be in a dead branch if (svecv.typ == (jl_value_t*)jl_simplevector_type) { @@ -1713,11 +1745,15 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) const jl_cgval_t &idxv = argv[1]; Value *idx = emit_unbox(ctx, ctx.types().T_size, idxv, (jl_value_t*)jl_long_type); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, idx, "svec_idx"); auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue); + setName(ctx.emission_context, ptr, "svec_data_ptr"); Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, ptr), idx); + setName(ctx.emission_context, slot_addr, "svec_slot_addr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*))); + setName(ctx.emission_context, load, "svec_slot"); load->setAtomic(AtomicOrdering::Unordered); // Only mark with TBAA if we are sure about the type. // This could otherwise be in a dead branch @@ -1754,9 +1790,12 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) if (stride != 1) idx = ctx.builder.CreateMul(idx, ConstantInt::get(ctx.types().T_size, stride)); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ((jl_datatype_t*)ety)->layout->first_ptr)); + setName(ctx.emission_context, idx, "array_idx"); } Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx); + setName(ctx.emission_context, slot_addr, "array_slot_addr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*))); + setName(ctx.emission_context, load, "array_slot"); load->setAtomic(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_ptrarraybuf); ai.decorateInst(load); @@ -1776,6 +1815,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) // a null pointer. auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1); strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size); + setName(ctx.emission_context, strp, "string_ptr"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } @@ -1790,6 +1830,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) auto strp = ctx.builder.CreateConstInBoundsGEP1_32( ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*)); strp = ctx.builder.CreatePtrToInt(strp, ctx.types().T_size); + setName(ctx.emission_context, strp, "symbol_name"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } @@ -1860,7 +1901,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) const int hash_offset = offsetof(jl_sym_t, hash); Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), ctx.types().T_size->getPointerTo()); Value *ph2 = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, ph1, ConstantInt::get(ctx.types().T_size, hash_offset / ctx.types().sizeof_ptr)); + setName(ctx.emission_context, ph2, "object_id_ptr"); LoadInst *hashval = ctx.builder.CreateAlignedLoad(ctx.types().T_size, ph2, ctx.types().alignof_ptr); + setName(ctx.emission_context, hashval, "object_id"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); ai.decorateInst(hashval); return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt); @@ -1879,6 +1922,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) T_pint8_derived) }; Value *ret = ctx.builder.CreateCall(prepare_call(jl_object_id__func), makeArrayRef(args)); + setName(ctx.emission_context, ret, "object_id"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, ret, retboxed, rt, unionall, static_rt); } @@ -1967,6 +2011,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid"); if (jl_is_pointerfree(rt)) { result = emit_static_alloca(ctx, lrt); + setName(ctx.emission_context, result, "ccall_sret"); sretty = lrt; argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0)); } @@ -1975,6 +2020,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( // and has incorrect write barriers. // instead this code path should behave like `unsafe_load` result = emit_allocobj(ctx, (jl_datatype_t*)rt); + setName(ctx.emission_context, result, "ccall_sret_box"); sretty = ctx.types().T_jlvalue; sretboxed = true; gc_uses.push_back(result); @@ -2123,6 +2169,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( return mark_julia_slot(result, rt, NULL, ctx.tbaa().tbaa_stack); ++SRetCCalls; result = ctx.builder.CreateLoad(sretty, result); + setName(ctx.emission_context, result, "returned"); } } else { @@ -2137,6 +2184,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(jl_is_datatype(rt)); if (static_rt) { Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt); + setName(ctx.emission_context, strct, "ccall_ret_box"); MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut; int boxalign = julia_alignment(rt); // copy the data from the return value to the new struct @@ -2147,6 +2195,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( // ARM and AArch64 can use a LLVM type larger than the julia type. // When this happens, cast through memory. auto slot = emit_static_alloca(ctx, resultTy); + setName(ctx.emission_context, slot, "type_pun_slot"); slot->setAlignment(Align(boxalign)); ctx.builder.CreateAlignedStore(result, slot, Align(boxalign)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index b400e510e0cde..8442ba99bb411 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -119,10 +119,12 @@ static Value *stringConstPtr( GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M); Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0); Value *Args[] = { zero, zero }; - return irbuilder.CreateInBoundsGEP(gv->getValueType(), + auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(), // Addrspacecast in case globals are in non-0 AS irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)), Args); + setName(emission_context, gep, "string_const_ptr"); + return gep; } @@ -515,9 +517,11 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) return literal_static_pointer_val(p, ctx.types().T_pjlvalue); Value *pgv = literal_pointer_val_slot(ctx, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(maybe_mark_load_dereferenceable( + auto load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))), false, jl_typeof(p))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } // Returns ctx.types().T_pjlvalue @@ -532,9 +536,11 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p) jl_globalref_t *gr = p->globalref; Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(maybe_mark_load_dereferenceable( + auto load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))), false, sizeof(jl_binding_t), alignof(jl_binding_t))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } // bitcast a value, but preserve its address space when dealing with pointer types @@ -573,7 +579,9 @@ static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b) jl_globalref_t *gr = b->globalref; Value *pgv = gr ? julia_pgv(ctx, "*", gr->name, gr->mod, b) : julia_pgv(ctx, "*jl_bnd#", b); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*)))); + auto load = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*)))); + setName(ctx.emission_context, load, pgv->getName()); + return load; } else { return literal_static_pointer_val(b, ctx.types().T_pjlvalue); @@ -976,6 +984,10 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const src = emit_bitcast(ctx, src, dstty); } if (directel) { + if (isa(src) && !src->hasName()) + setName(ctx.emission_context, src, "memcpy_refined_src"); + if (isa(dst) && !dst->hasName()) + setName(ctx.emission_context, dst, "memcpy_refined_dst"); auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile)); dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile)); ++SkippedMemcpys; @@ -1035,6 +1047,7 @@ static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDN ctx.types().T_prjlvalue, emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue), idx); + setName(ctx.emission_context, vptr, "arraysize_ptr"); LoadInst *load = ctx.builder.CreateLoad(type, emit_bitcast(ctx, vptr, PointerType::get(type, 0))); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.decorateInst(load); @@ -1045,7 +1058,9 @@ static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt) { if (dt->smalltag) return ConstantInt::get(ctx.types().T_size, dt->smalltag << 4); - return ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size); + auto tag = ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, (jl_value_t*)dt), ctx.types().T_size); + setName(ctx.emission_context, tag, jl_symbol_name(dt->name->name)); + return tag; } // Returns justtag ? ctx.types.T_size : ctx.types().T_prjlvalue @@ -1110,6 +1125,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull else ptr = ConstantExpr::getAddrSpaceCast(literal_static_pointer_val((jl_value_t*)jt, ctx.types().T_pjlvalue), expr_type); datatype_or_p = ctx.builder.CreateSelect(cmp, ptr, datatype_or_p); + setName(ctx.emission_context, datatype_or_p, "typetag_ptr"); }, p.typ, counter); @@ -1117,6 +1133,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); if (ctx.emission_context.imaging) { Value *datatype = ai.decorateInst(ctx.builder.CreateAlignedLoad(expr_type, datatype_or_p, Align(sizeof(void*)))); + setName(ctx.emission_context, datatype, "typetag"); return justtag ? datatype : track_pjlvalue(ctx, datatype); } return datatype_or_p; @@ -1124,6 +1141,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull Value *res; if (!allunboxed) { Value *isnull = ctx.builder.CreateIsNull(datatype_or_p); + setName(ctx.emission_context, isnull, "typetag_isnull"); BasicBlock *boxBB = BasicBlock::Create(ctx.builder.getContext(), "boxed", ctx.f); BasicBlock *unboxBB = BasicBlock::Create(ctx.builder.getContext(), "unboxed", ctx.f); BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f); @@ -1141,6 +1159,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull phi->addIncoming(boxTy, boxBB); phi->addIncoming(unboxTy, unboxBB); res = phi; + setName(ctx.emission_context, res, "typetag"); } else { res = emit_unboxty(); @@ -1155,15 +1174,19 @@ static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt) Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue); Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad( + auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad( ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*)))); + setName(ctx.emission_context, types, "datatype_types"); + return types; } static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt) { Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), ctx.types().T_size->getPointerTo()); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*)))); + auto nfields = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_size, type_svec, Align(sizeof(void*)))); + setName(ctx.emission_context, nfields, "datatype_nfields"); + return nfields; } static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) @@ -1175,7 +1198,9 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt) Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32PtrTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*)))); Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t)); Ptr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); + auto Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t)))); + setName(ctx.emission_context, Size, "datatype_size"); + return Size; } /* this is valid code, it's simply unused @@ -1250,6 +1275,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ) isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1))); isprimitive = ctx.builder.CreateLShr(isprimitive, 7); isprimitive = ctx.builder.CreateTrunc(isprimitive, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, isprimitive, "datatype_isprimitive"); return isprimitive; } @@ -1261,7 +1287,9 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt) emit_bitcast(ctx, maybe_decay_tracked(ctx, dt), ctx.types().T_ppjlvalue), ConstantInt::get(ctx.types().T_size, n)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*)))); + auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*)))); + setName(ctx.emission_context, name, "datatype_name"); + return name; } // --- generating various error checks --- @@ -1370,6 +1398,7 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2); phi->addIncoming(defval, currBB); phi->addIncoming(res, passBB); + setName(ctx.emission_context, phi, "guard_res"); return phi; } @@ -1414,7 +1443,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just assert(v != NULL && !isa(v) && "expected a conditionally boxed value"); Value *nonnull = maybenull ? null_pointer_cmp(ctx, v) : ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); Function *typeof = prepare_call(jl_typeof_func); - return emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] { + auto val = emit_guarded_test(ctx, nonnull, Constant::getNullValue(justtag ? ctx.types().T_size : typeof->getReturnType()), [&] { // e.g. emit_typeof(ctx, v) Value *typetag = ctx.builder.CreateCall(typeof, {v}); if (notag) @@ -1435,6 +1464,8 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just return ai.decorateInst(small); }); }); + setName(ctx.emission_context, val, "typeof"); + return val; } static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v, bool is_promotable=false); @@ -1493,11 +1524,14 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data if (tindex > 0) { // optimize more when we know that this is a split union-type where tindex = 0 is invalid Value *xtindex = ctx.builder.CreateAnd(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); - return ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)); + auto isa = ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)); + setName(ctx.emission_context, isa, "exactly_isa"); + return isa; } else if (arg.Vboxed) { // test for (arg.TIndex == 0x80 && typeof(arg.V) == type) Value *isboxed = ctx.builder.CreateICmpEQ(arg.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)); + setName(ctx.emission_context, isboxed, "isboxed"); BasicBlock *currBB = ctx.builder.GetInsertBlock(); BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f); BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f); @@ -1510,13 +1544,16 @@ static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_data PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2); istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB); istype->addIncoming(istype_boxed, isaBB); + setName(ctx.emission_context, istype, "exactly_isa"); return istype; } else { // handle the case where we know that `arg` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); } } - return ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt)); + auto isa = ctx.builder.CreateICmpEQ(emit_typeof(ctx, arg, false, true), emit_tagfrom(ctx, dt)); + setName(ctx.emission_context, isa, "exactly_isa"); + return isa; } static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, @@ -1586,6 +1623,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, ctx.builder.CreateOr( ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_unionall_type)), ctx.builder.CreateICmpEQ(typ, emit_tagfrom(ctx, jl_typeofbottom_type)))); + setName(ctx.emission_context, val, "is_kind"); return std::make_pair(val, false); } // intersection with Type needs to be handled specially @@ -1686,6 +1724,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1))); isconcrete = ctx.builder.CreateLShr(isconcrete, 1); isconcrete = ctx.builder.CreateTrunc(isconcrete, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, isconcrete, "isconcrete"); return isconcrete; } @@ -1719,6 +1758,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v if (bounds_check_enabled(ctx, boundscheck)) { ++EmittedBoundschecks; Value *ok = ctx.builder.CreateICmpULT(im1, len); + setName(ctx.emission_context, ok, "boundscheck"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass"); ctx.builder.CreateCondBr(ok, passBB, failBB); @@ -1737,6 +1777,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v else if (!ainfo.ispointer()) { // CreateAlloca is OK here since we are on an error branch Value *tempSpace = ctx.builder.CreateAlloca(a->getType()); + setName(ctx.emission_context, tempSpace, "errorbox"); ctx.builder.CreateStore(a, tempSpace); a = tempSpace; } @@ -1828,12 +1869,15 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type AllocaInst *intcast = NULL; if (Order == AtomicOrdering::NotAtomic) { - if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) + if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "aggregate_load_box"); + } } else { if (!isboxed && !elty->isIntOrPtrTy()) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "atomic_load_box"); elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } } @@ -1949,8 +1993,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); AllocaInst *intcast = nullptr; if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) { - if (!issetfield) + if (!issetfield) { intcast = emit_static_alloca(ctx, elty); + setName(ctx.emission_context, intcast, "atomic_store_box"); + } elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } Type *realelty = elty; @@ -1988,6 +2034,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) { if (isswapfield) { auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); + setName(ctx.emission_context, load, "swapfield_load"); if (isboxed) load->setOrdering(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2011,6 +2058,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, else if (isswapfield && isStrongerThanMonotonic(Order)) { assert(Order != AtomicOrdering::NotAtomic && r); auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order); + setName(ctx.emission_context, store, "swapfield_atomicrmw"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); ai.decorateInst(store); @@ -2035,6 +2083,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ctx.builder.CreateCondBr(SameType, BB, SkipBB); ctx.builder.SetInsertPoint(SkipBB); LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); + setName(ctx.emission_context, load, "atomic_replacefield_initial"); load->setOrdering(FailOrder == AtomicOrdering::NotAtomic && isboxed ? AtomicOrdering::Monotonic : FailOrder); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); @@ -2065,6 +2114,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, else { // swap or modify LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment)); Current->setOrdering(Order == AtomicOrdering::NotAtomic && !isboxed ? Order : AtomicOrdering::Monotonic); + setName(ctx.emission_context, Current, "atomic_initial"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.noalias = MDNode::concatenate(aliasscope, ai.noalias); Compare = ai.decorateInst(Current); @@ -2267,8 +2317,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, // Returns ctx.types().T_pjlvalue static Value *julia_bool(jl_codectx_t &ctx, Value *cond) { - return ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true), + auto boolean = ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true), literal_pointer_val(ctx, jl_false)); + setName(ctx.emission_context, boolean, "bool"); + return boolean; } // --- accessing the representations of built-in data types --- @@ -2354,6 +2406,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)), fld); } + setName(ctx.emission_context, fld, "getfield"); jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type; if (isboxed && maybe_null) null_pointer_check(ctx, fld); @@ -2387,7 +2440,9 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, ctx.types().T_prjlvalue, emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue), idx0()); + setName(ctx.emission_context, fldptr, "getfield_ptr"); LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*))); + setName(ctx.emission_context, fld, "getfield"); fld->setOrdering(AtomicOrdering::Unordered); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, strct.tbaa); ai.decorateInst(fld); @@ -2439,6 +2494,7 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, // move value to an immutable stack slot (excluding tindex) Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al); AllocaInst *lv = emit_static_alloca(ctx, AT); + setName(ctx.emission_context, lv, "immutable_union"); if (al > 1) lv->setAlignment(Align(al)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2509,7 +2565,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx); } if (jl_field_isptr(jt, idx)) { + setName(ctx.emission_context, addr, "getfield_addr"); LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*))); + setName(ctx.emission_context, Load, "getfield"); Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order)); maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -2572,6 +2630,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st IntegerType *ET = cast(T->getStructElementType(st_idx)); unsigned align = (ET->getBitWidth() + 7) / 8; lv = emit_static_alloca(ctx, ET); + setName(ctx.emission_context, lv, "union_split"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align)); // emit all of the align-sized words unsigned i = 0; @@ -2715,6 +2774,7 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value * t, ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)), tbaa, ctx.types().T_size); + setName(ctx.emission_context, load, "arraysize"); MDBuilder MDB(ctx.builder.getContext()); auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ))); load->setMetadata(LLVMContext::MD_range, rng); @@ -2750,7 +2810,9 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo) Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), 1); //index (not offset) of length field in ctx.types().T_pjlarray + setName(ctx.emission_context, addr, "arraylen_ptr"); LoadInst *len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, addr, ctx.types().alignof_ptr); + setName(ctx.emission_context, len, "arraylen"); len->setOrdering(AtomicOrdering::NotAtomic); MDBuilder MDB(ctx.builder.getContext()); auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, arraytype_maxsize(tinfo.typ))); @@ -2769,6 +2831,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, ++EmittedArrayptr; Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0); + setName(ctx.emission_context, addr, "arrayptr_ptr"); // Normally allocated array of 0 dimension always have a inline pointer. // However, we can't rely on that here since arrays can also be constructed from C pointers. PointerType *PT = cast(addr->getType()); @@ -2787,6 +2850,7 @@ static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, } LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *))); + setName(ctx.emission_context, LI, "arrayptr"); LI->setOrdering(AtomicOrdering::NotAtomic); LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None)); jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr); @@ -2827,8 +2891,11 @@ static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo) ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), arrayflag_field); + setName(ctx.emission_context, addr, "arrayflags_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayflags); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + auto flags = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + setName(ctx.emission_context, flags, "arrayflags"); + return flags; } static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) @@ -2838,6 +2905,7 @@ static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary) cast(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None)); flags = ctx.builder.CreateLShr(flags, 2); flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1 + setName(ctx.emission_context, flags, "arrayndims"); return flags; } @@ -2849,8 +2917,11 @@ static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo) Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), elsize_field); + setName(ctx.emission_context, addr, "arrayelsize_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + auto elsize = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t)))); + setName(ctx.emission_context, elsize, "arrayelsize"); + return elsize; } static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd) @@ -2865,8 +2936,11 @@ static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int n ctx.types().T_jlarray, emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray), offset_field); + setName(ctx.emission_context, addr, "arrayoffset_ptr"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayoffset); - return ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t)))); + auto offset = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t)))); + setName(ctx.emission_context, offset, "arrayoffset"); + return offset; } // Returns the size of the array represented by `tinfo` for the given dimension `dim` if @@ -2909,12 +2983,15 @@ static Value *emit_array_nd_index( if (bc) { BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib"); // if !(i < d) goto error - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(ii, d), okBB, failBB); + auto bc = ctx.builder.CreateICmpULT(ii, d); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, okBB, failBB); ctx.f->getBasicBlockList().push_back(okBB); ctx.builder.SetInsertPoint(okBB); } #endif stride = ctx.builder.CreateMul(stride, d); + setName(ctx.emission_context, stride, "stride"); } } #if CHECK_BOUNDS==1 @@ -2924,20 +3001,26 @@ static Value *emit_array_nd_index( if (nidxs == 1) { // Linear indexing: Check against the entire linear span of the array Value *alen = emit_arraylen(ctx, ainfo); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(i, alen), endBB, failBB); + auto bc = ctx.builder.CreateICmpULT(i, alen); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, endBB, failBB); } else if (nidxs >= (size_t)nd){ // No dimensions were omitted; just check the last remaining index assert(nd >= 0); Value *last_index = ii; Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), endBB, failBB); + auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, endBB, failBB); } else { // There were fewer indices than dimensions; check the last remaining index BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib"); assert(nd >= 0); Value *last_index = ii; Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), checktrailingdimsBB, failBB); + auto bc = ctx.builder.CreateICmpULT(last_index, last_dimension); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, checktrailingdimsBB, failBB); ctx.f->getBasicBlockList().push_back(checktrailingdimsBB); ctx.builder.SetInsertPoint(checktrailingdimsBB); // And then also make sure that all dimensions that weren't explicitly @@ -2945,18 +3028,23 @@ static Value *emit_array_nd_index( for (size_t k = nidxs+1; k < (size_t)nd; k++) { BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok"); Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), dimsokBB, failBB); + auto bc = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, bc, "inbounds"); + ctx.builder.CreateCondBr(bc, dimsokBB, failBB); ctx.f->getBasicBlockList().push_back(dimsokBB); ctx.builder.SetInsertPoint(dimsokBB); } Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)), endBB, failBB); + auto bc2 = ctx.builder.CreateICmpEQ(dim, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, bc2, "inbounds"); + ctx.builder.CreateCondBr(bc2, endBB, failBB); } ctx.f->getBasicBlockList().push_back(failBB); ctx.builder.SetInsertPoint(failBB); // CreateAlloca is OK here since we are on an error branch Value *tmp = ctx.builder.CreateAlloca(ctx.types().T_size, ConstantInt::get(ctx.types().T_size, nidxs)); + setName(ctx.emission_context, tmp, "errorbox"); for (size_t k = 0; k < nidxs; k++) { ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(ctx.types().T_size, tmp, ConstantInt::get(ctx.types().T_size, k)), ctx.types().alignof_ptr); } @@ -3174,6 +3262,7 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype_tag, jl_valu }, ut, counter); + setName(ctx.emission_context, tindex, "tindex"); return tindex; } @@ -3224,6 +3313,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, // try to pick an Integer type size such that SROA will emit reasonable code Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align); AllocaInst *lv = emit_static_alloca(ctx, AT); + setName(ctx.emission_context, lv, "unionalloca"); if (align > 1) lv->setAlignment(Align(align)); return lv; @@ -3280,6 +3370,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB box = _boxed_special(ctx, vinfo_r, t); if (!box) { box = emit_allocobj(ctx, jt); + setName(ctx.emission_context, box, "unionbox"); init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } } @@ -3408,6 +3499,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab box = emit_allocobj(ctx, (jl_datatype_t*)jt); Value *decayed = decay_derived(ctx, box); AllocaInst *originalAlloca = cast(vinfo.V); + box->takeName(originalAlloca); decayed = maybe_bitcast(ctx, decayed, PointerType::getWithSamePointeeType(originalAlloca->getType(), AddressSpace::Derived)); // Warning: Very illegal IR here temporarily originalAlloca->mutateType(decayed->getType()); @@ -3418,6 +3510,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab ctx.builder.restoreIP(IP); } else { box = emit_allocobj(ctx, (jl_datatype_t*)jt); + setName(ctx.emission_context, box, "box"); init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut); } } @@ -3528,6 +3621,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std Value *istype = ctx.builder.CreateICmpEQ(emit_datatype_name(ctx, t), literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename)); + setName(ctx.emission_context, istype, "istype"); BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass"); ctx.builder.CreateCondBr(istype, passBB, failBB); @@ -3769,6 +3863,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } else { strct = emit_static_alloca(ctx, lt); + setName(ctx.emission_context, strct, "newstruct"); if (tracked.count) undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack); } @@ -3847,6 +3942,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al); assert(lt->getStructElementType(llvm_idx) == ET); AllocaInst *lv = emit_static_alloca(ctx, ET); + setName(ctx.emission_context, lv, "unioninit"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al)); emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr); // emit all of the align-sized words @@ -3937,6 +4033,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } } Value *strct = emit_allocobj(ctx, sty); + setName(ctx.emission_context, strct, "newstruct"); jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty); strct = decay_derived(ctx, strct); undef_derived_strct(ctx, strct, sty, strctinfo.tbaa); diff --git a/src/codegen.cpp b/src/codegen.cpp index 37281ed3038ec..137d3d78814af 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -167,6 +167,13 @@ typedef Instruction TerminatorInst; #undef DEBUG_TYPE //LLVM occasionally likes to set DEBUG_TYPE in a header... #define DEBUG_TYPE "julia_irgen_codegen" +void setName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) +{ + if (params.debug_level) { + V->setName(Name); + } +} + STATISTIC(EmittedAllocas, "Number of allocas emitted"); STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted"); STATISTIC(ModulesCreated, "Number of LLVM Modules created"); @@ -1604,7 +1611,6 @@ class jl_codectx_t { Value *pgcstack = NULL; Instruction *topalloca = NULL; - bool debug_enabled = false; bool use_cache = false; bool external_linkage = false; const jl_cgparams_t *params = NULL; @@ -2024,6 +2030,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi) assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things"); if (vi.usedUndef) { vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext())); + setName(ctx.emission_context, vi.defFlag, "isdefined"); store_def_flag(ctx, vi, false); } } @@ -2122,6 +2129,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & v.typ, counter); } + setName(ctx.emission_context, new_tindex, "tindex"); // some of the values are still unboxed if (!isa(new_tindex)) { @@ -2136,6 +2144,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)); new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex); wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); + setName(ctx.emission_context, wasboxed, "wasboxed"); BasicBlock *currBB = ctx.builder.GetInsertBlock(); @@ -2172,6 +2181,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & }, typ, counter); + setName(ctx.emission_context, union_box_tindex, "union_box_tindex"); if (union_box_dt) { BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f); ctx.builder.CreateBr(postBB); @@ -2183,6 +2193,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t & tindex_phi->addIncoming(new_tindex, currBB); tindex_phi->addIncoming(union_box_tindex, post_union_isaBB); new_tindex = tindex_phi; + setName(ctx.emission_context, new_tindex, "tindex"); } } if (!skip_box.all()) { @@ -2341,6 +2352,21 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte return m; } +static void jl_name_jlfunc_args(jl_codegen_params_t ¶ms, Function *F) { + assert(F->arg_size() == 3); + setName(params, F->getArg(0), "function"); + setName(params, F->getArg(1), "args"); + setName(params, F->getArg(2), "nargs"); +} + +static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) { + assert(F->arg_size() == 4); + setName(params, F->getArg(0), "function"); + setName(params, F->getArg(1), "args"); + setName(params, F->getArg(2), "nargs"); + setName(params, F->getArg(3), "sparams"); +} + static void jl_init_function(Function *F, const Triple &TT) { // set any attributes that *must* be set on all functions @@ -2822,6 +2848,7 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t * if (bnd->constp) return mark_julia_const(ctx, v); LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))); + setName(ctx.emission_context, v, jl_symbol_name(name)); v->setOrdering(order); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_binding); ai.decorateInst(v); @@ -2896,6 +2923,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, Value *tindex2 = arg2.TIndex; tindex2 = ctx.builder.CreateAnd(tindex2, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f)); Value *typeeq = ctx.builder.CreateICmpEQ(tindex, tindex2); + setName(ctx.emission_context, typeeq, "typematch"); tindex = ctx.builder.CreateSelect(typeeq, tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x00)); BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "unionbits_is_boxed", ctx.f); SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB); @@ -2927,6 +2955,7 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, ctx.builder.CreateCall(trap_func); ctx.builder.CreateUnreachable(); ctx.builder.SetInsertPoint(postBB); + setName(ctx.emission_context, phi, "unionbits_is"); return phi; } @@ -3410,14 +3439,16 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } else { Value *idx_dyn = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type); - error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size)), - "arraysize: dimension out of range"); + auto positive = ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(ctx.types().T_size)); + setName(ctx.emission_context, positive, "ispositive"); + error_unless(ctx, positive, "arraysize: dimension out of range"); BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f); BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange"); BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize"); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpSLE(idx_dyn, - ConstantInt::get(ctx.types().T_size, ndims)), - inBB, outBB); + auto oor = ctx.builder.CreateICmpSLE(idx_dyn, + ConstantInt::get(ctx.types().T_size, ndims)); + setName(ctx.emission_context, oor, "sizeddim"); + ctx.builder.CreateCondBr(oor, inBB, outBB); ctx.builder.SetInsertPoint(outBB); Value *v_one = ConstantInt::get(ctx.types().T_size, 1); ctx.builder.CreateBr(ansBB); @@ -3431,6 +3462,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, PHINode *result = ctx.builder.CreatePHI(ctx.types().T_size, 2); result->addIncoming(v_one, outBB); result->addIncoming(v_sz, inBB); + setName(ctx.emission_context, result, "arraysize"); *ret = mark_julia_type(ctx, result, false, jl_long_type); return true; } @@ -3478,17 +3510,25 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, data = emit_bitcast(ctx, data, AT->getPointerTo()); // isbits union selector bytes are stored after a->maxsize Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd)); + setName(ctx.emission_context, ndims, "ndims"); Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, is_vector, "isvec"); Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size)); + setName(ctx.emission_context, selidx_v, "selidx_v"); Value *selidx_m = emit_arraylen(ctx, ary); Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m); + setName(ctx.emission_context, selidx, "selidx"); ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx); + setName(ctx.emission_context, ptindex, "ptindex"); data = ctx.builder.CreateInBoundsGEP(AT, data, idx); + setName(ctx.emission_context, data, "data"); } ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext())); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx); *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte); + if (ret->V) + setName(ctx.emission_context, ret->V, "arrayref"); } else { MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.noalias().aliasscope.current : nullptr; @@ -3499,6 +3539,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, aliasscope, isboxed, AtomicOrdering::NotAtomic); + if (ret->V) + setName(ctx.emission_context, ret->V, "arrayref"); } return true; } @@ -3545,6 +3587,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, // the owner of the data is ary itself except if ary->how == 3 flags = ctx.builder.CreateAnd(flags, 3); Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3)); + setName(ctx.emission_context, is_owned, "has_owner"); BasicBlock *curBB = ctx.builder.GetInsertBlock(); BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f); BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f); @@ -3558,6 +3601,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue), jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)), Align(sizeof(void*))); + setName(ctx.emission_context, own_ptr, "external_owner"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); ai.decorateInst(maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type)); } @@ -3571,6 +3615,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2); data_owner->addIncoming(aryv, curBB); data_owner->addIncoming(own_ptr, ownedBB); + setName(ctx.emission_context, data_owner, "data_owner"); } if (!isboxed && jl_is_uniontype(ety)) { Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al); @@ -3587,15 +3632,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, else { Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd)); Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1)); + setName(ctx.emission_context, is_vector, "is_vector"); Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, ctx.types().T_size)); + setName(ctx.emission_context, selidx_v, "selidx_v"); Value *selidx_m = emit_arraylen(ctx, ary); Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m); + setName(ctx.emission_context, selidx, "selidx"); ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx); + setName(ctx.emission_context, ptindex, "ptindex"); data = ctx.builder.CreateInBoundsGEP(AT, data, idx); + setName(ctx.emission_context, data, "data"); } ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext())); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset); ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx); + setName(ctx.emission_context, ptindex, "ptindex"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_arrayselbyte); ai.decorateInst(ctx.builder.CreateStore(tindex, ptindex)); if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) { @@ -3694,6 +3745,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)); Instruction *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, idx), Align(sizeof(void*))); + setName(ctx.emission_context, v, "getfield"); // if we know the result type of this load, we will mark that information here too jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_value); ai.decorateInst(maybe_mark_load_dereferenceable(v, false, rt)); @@ -3862,6 +3914,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *fieldtyp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*)))); + setName(ctx.emission_context, fieldtyp, "fieldtype"); *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type); return true; } @@ -3900,6 +3953,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, INTPTR_MAX)); cast(len)->setMetadata(LLVMContext::MD_range, rng); } + setName(ctx.emission_context, len, "sizeof"); *ret = mark_julia_type(ctx, len, false, jl_long_type); return true; } @@ -3914,6 +3968,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), ctx.types().T_size); } *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type); + if (ret->V) + setName(ctx.emission_context, ret->V, "sizeof"); return true; } } @@ -4030,6 +4086,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } } Value *isdef = ctx.builder.CreateIsNotNull(fldv); + setName(ctx.emission_context, isdef, "isdefined"); *ret = mark_julia_type(ctx, isdef, false, jl_bool_type); } else { @@ -4131,6 +4188,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos break; case jl_returninfo_t::Union: result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes)); + setName(ctx.emission_context, result, "sret_box"); if (returninfo.union_align > 1) result->setAlignment(Align(returninfo.union_align)); argvals[idx] = result; @@ -4207,6 +4265,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos } jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); TheCallee = ai.decorateInst(ctx.builder.CreateAlignedLoad(TheCallee->getType(), GV, Align(sizeof(void*)))); + setName(ctx.emission_context, TheCallee, namep); } CallInst *call = ctx.builder.CreateCall(cft, TheCallee, argvals); call->setAttributes(returninfo.attrs); @@ -4276,6 +4335,7 @@ static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty } jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); theFptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(pfunc, GV, Align(sizeof(void*)))); + setName(ctx.emission_context, theFptr, namep); } else { theFptr = jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee(); @@ -4574,16 +4634,20 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue, false, GlobalVariable::PrivateLinkage, initnul); LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*))); + setName(ctx.emission_context, cachedval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".cached"); cachedval->setOrdering(AtomicOrdering::Unordered); BasicBlock *have_val = BasicBlock::Create(ctx.builder.getContext(), "found"); BasicBlock *not_found = BasicBlock::Create(ctx.builder.getContext(), "notfound"); BasicBlock *currentbb = ctx.builder.GetInsertBlock(); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found); + auto iscached = ctx.builder.CreateICmpNE(cachedval, initnul); + setName(ctx.emission_context, iscached, "iscached"); + ctx.builder.CreateCondBr(iscached, have_val, not_found); ctx.f->getBasicBlockList().push_back(not_found); ctx.builder.SetInsertPoint(not_found); Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func), { literal_pointer_val(ctx, (jl_value_t*)m), literal_pointer_val(ctx, (jl_value_t*)s) }); + setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found"); ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release); ctx.builder.CreateBr(have_val); ctx.f->getBasicBlockList().push_back(have_val); @@ -4591,6 +4655,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2); p->addIncoming(cachedval, currentbb); p->addIncoming(bval, not_found); + setName(ctx.emission_context, p, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s)); return p; } if (assign) { @@ -4614,6 +4679,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa) { LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))); + setName(ctx.emission_context, v, jl_symbol_name(name) + StringRef(".checked")); if (isvol) v->setVolatile(true); v->setOrdering(AtomicOrdering::Unordered); @@ -4640,6 +4706,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); + setName(ctx.emission_context, sp, "sparam"); Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type)); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; for (size_t j = 0; j < i; j++) { @@ -4745,9 +4812,11 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va else { // copy value to a non-mutable (non-volatile SSA) location AllocaInst *varslot = cast(vi.value.V); + setName(ctx.emission_context, varslot, jl_symbol_name(varname)); Type *T = varslot->getAllocatedType(); assert(!varslot->isArrayAllocation() && "variables not expected to be VLA"); AllocaInst *ssaslot = cast(varslot->clone()); + setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa")); ssaslot->insertAfter(varslot); if (vi.isVolatile) { Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, @@ -4800,8 +4869,10 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va isnull = box_isnull; } } - if (isnull) + if (isnull) { + setName(ctx.emission_context, isnull, jl_symbol_name(varname) + StringRef("_is_null")); undef_var_error_ifnot(ctx, isnull, varname); + } return v; } @@ -5270,6 +5341,7 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met Function::ExternalLinkage, fname, jl_Module); jl_init_function(F, ctx.emission_context.TargetTriple); + jl_name_jlfunc_args(ctx.emission_context, F); F->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), F->getAttributes()})); } Function *specF = NULL; @@ -5742,6 +5814,7 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod GlobalVariable::InternalLinkage, name, M); jl_init_function(f, params.TargetTriple); + jl_name_jlfunc_args(params, f); //f->setAlwaysInline(); ctx.f = f; // for jl_Module BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f); @@ -6230,6 +6303,7 @@ static Function* gen_cfun_wrapper( theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage, fname, jl_Module); jl_init_function(theFptr, ctx.emission_context.TargetTriple); + jl_name_jlfunc_args(ctx.emission_context, theFptr); addRetAttr(theFptr, Attribute::NonNull); } else { @@ -6284,9 +6358,11 @@ static Function* gen_cfun_wrapper( else { if (jlfunc_sret) { result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType()); + setName(ctx.emission_context, result, "sret"); assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); } else { result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes)); + setName(ctx.emission_context, result, "result_union"); assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); } } @@ -6294,6 +6370,7 @@ static Function* gen_cfun_wrapper( } if (returninfo.return_roots) { AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots)); + setName(ctx.emission_context, return_roots, "return_roots"); args.push_back(return_roots); } if (gcstack_arg) @@ -6433,6 +6510,8 @@ static Function* gen_cfun_wrapper( GlobalVariable::ExternalLinkage, funcName, M); jl_init_function(cw_make, ctx.emission_context.TargetTriple); + cw_make->getArg(0)->setName("wrapper"); + cw_make->getArg(1)->setName("newval"); BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make); IRBuilder<> cwbuilder(b0); Function::arg_iterator AI = cw_make->arg_begin(); @@ -6596,6 +6675,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con if (outboxed) { assert(jl_datatype_size(output_type) == sizeof(void*) * 4); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)output_type); + setName(ctx.emission_context, strct, "cfun_result"); Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), ctx.types().T_size->getPointerTo()); MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -6674,6 +6754,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret ++GeneratedInvokeWrappers; Function *w = Function::Create(get_func_sig(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M); jl_init_function(w, params.TargetTriple); + jl_name_jlfunc_args(params, w); w->setAttributes(AttributeList::get(M->getContext(), {get_func_attrs(M->getContext()), w->getAttributes()})); w->addFnAttr(Attribute::OptimizeNone); w->addFnAttr(Attribute::NoInline); @@ -6710,6 +6791,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret case jl_returninfo_t::SRet: assert(cast(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType())); result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()); + setName(ctx.emission_context, result, "sret"); args[idx] = result; idx++; break; @@ -6719,10 +6801,12 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret result->setAlignment(Align(f.union_align)); args[idx] = result; idx++; + setName(ctx.emission_context, result, "result_union"); break; } if (f.return_roots) { AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots)); + setName(ctx.emission_context, return_roots, "return_roots"); args[idx] = return_roots; idx++; } @@ -7108,11 +7192,9 @@ static jl_llvm_functions_t // jl_printf(JL_STDERR, "\n*** compiling %s at %s:%d\n\n", // jl_symbol_name(ctx.name), ctx.file.str().c_str(), toplineno); - ctx.debug_enabled = true; + bool debug_enabled = ctx.emission_context.debug_level != 0; if (dbgFuncName.empty()) // Should never happen anymore? - ctx.debug_enabled = 0; - if (jl_options.debug_level == 0) - ctx.debug_enabled = 0; + debug_enabled = false; // step 2. process var-info lists to see what vars need boxing int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes); @@ -7236,6 +7318,11 @@ static jl_llvm_functions_t GlobalVariable::ExternalLinkage, declarations.specFunctionObject, M); jl_init_function(f, ctx.emission_context.TargetTriple); + if (needsparams) { + jl_name_jlfuncparams_args(ctx.emission_context, f); + } else { + jl_name_jlfunc_args(ctx.emission_context, f); + } f->setAttributes(AttributeList::get(ctx.builder.getContext(), {get_func_attrs(ctx.builder.getContext()), f->getAttributes()})); returninfo.decl = f; declarations.functionObject = needsparams ? "jl_fptr_sparam" : "jl_fptr_args"; @@ -7281,14 +7368,14 @@ static jl_llvm_functions_t tableKind = DICompileUnit::DebugNameTableKind::GNU; else tableKind = DICompileUnit::DebugNameTableKind::None; - DIBuilder dbuilder(*M, true, ctx.debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL); + DIBuilder dbuilder(*M, true, debug_enabled ? getOrCreateJuliaCU(*M, emissionKind, tableKind) : NULL); DIFile *topfile = NULL; DISubprogram *SP = NULL; DebugLoc noDbg, topdebugloc; - if (ctx.debug_enabled) { + if (debug_enabled) { topfile = dbuilder.createFile(ctx.file, "."); DISubroutineType *subrty; - if (jl_options.debug_level <= 1) + if (ctx.emission_context.debug_level <= 1) subrty = debuginfo.jl_di_func_null_sig; else if (!specsig) subrty = debuginfo.jl_di_func_sig; @@ -7309,7 +7396,7 @@ static jl_llvm_functions_t ); topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL); f->setSubprogram(SP); - if (jl_options.debug_level >= 2) { + if (ctx.emission_context.debug_level >= 2) { const bool AlwaysPreserve = true; // Go over all arguments and local variables and initialize their debug information for (i = 0; i < nreq; i++) { @@ -7371,6 +7458,7 @@ static jl_llvm_functions_t fArg = &*AI++; argArray = &*AI++; pargArray = ctx.builder.CreateAlloca(argArray->getType()); + setName(ctx.emission_context, pargArray, "stackargs"); ctx.builder.CreateStore(argArray, pargArray, true/*volatile store to prevent removal of this alloca*/); argCount = &*AI++; ctx.argArray = argArray; @@ -7412,6 +7500,7 @@ static jl_llvm_functions_t lv->setName(jl_symbol_name(s)); varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + setName(ctx.emission_context, varinfo.pTIndex, "tindex"); } else if (allunbox) { // all ghost values just need a selector allocated @@ -7438,7 +7527,7 @@ static jl_llvm_functions_t } varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); alloc_def_flag(ctx, varinfo); - if (ctx.debug_enabled && varinfo.dinfo) { + if (debug_enabled && varinfo.dinfo) { assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt); dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(), topdebugloc, @@ -7455,7 +7544,7 @@ static jl_llvm_functions_t StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*))); SI->insertAfter(ctx.topalloca); varinfo.boxroot = av; - if (ctx.debug_enabled && varinfo.dinfo) { + if (debug_enabled && varinfo.dinfo) { DIExpression *expr; if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) { expr = dbuilder.createExpression(); @@ -7656,7 +7745,7 @@ static jl_llvm_functions_t ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), false, vi.value.typ)); theArg = mark_julia_type(ctx, load, true, vi.value.typ); - if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { + if (debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { SmallVector addr; addr.push_back(llvm::dwarf::DW_OP_deref); addr.push_back(llvm::dwarf::DW_OP_plus_uconst); @@ -7675,7 +7764,7 @@ static jl_llvm_functions_t assert(vi.value.V == NULL && "unexpected variable slot created for argument"); // keep track of original (possibly boxed) value to avoid re-boxing or moving vi.value = theArg; - if (specsig && theArg.V && ctx.debug_enabled && vi.dinfo) { + if (specsig && theArg.V && debug_enabled && vi.dinfo) { SmallVector addr; Value *parg; if (theArg.ispointer()) { @@ -7801,7 +7890,7 @@ static jl_llvm_functions_t else info.is_user_code = in_user_mod(module); info.is_tracked = in_tracked_path(info.file); - if (ctx.debug_enabled) { + if (debug_enabled) { StringRef fname; if (jl_is_method_instance(method)) method = ((jl_method_instance_t*)method)->def.value; @@ -8042,7 +8131,7 @@ static jl_llvm_functions_t while (cursor != -1) { int32_t debuginfoloc = ((int32_t*)jl_array_data(src->codelocs))[cursor]; if (debuginfoloc > 0) { - if (ctx.debug_enabled) + if (debug_enabled) ctx.builder.SetCurrentDebugLocation(linetable.at(debuginfoloc).loc); coverageVisitStmt(debuginfoloc); } @@ -8444,7 +8533,7 @@ static jl_llvm_functions_t for (auto &I : BB) { CallBase *call = dyn_cast(&I); if (call) { - if (ctx.debug_enabled && !I.getDebugLoc()) { + if (debug_enabled && !I.getDebugLoc()) { // LLVM Verifier: inlinable function call in a function with debug info must have a !dbg location // make sure that anything we attempt to call has some inlining info, just in case optimization messed up // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram) @@ -8469,7 +8558,7 @@ static jl_llvm_functions_t in_prologue = false; } } - if (ctx.debug_enabled) + if (debug_enabled) dbuilder.finalize(); if (ctx.vaSlot > 0) { @@ -8665,6 +8754,7 @@ jl_llvm_functions_t jl_emit_codeinst( if (// keep code when keeping everything !(JL_DELETE_NON_INLINEABLE) || // aggressively keep code when debugging level >= 2 + // note that this uses the global jl_options.debug_level, not the local emission_ctx.debug_level jl_options.debug_level > 1) { // update the stored code if (inferred != (jl_value_t*)src) { diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index ee1ded5a51e44..09e04eb683af1 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -345,12 +345,14 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) { assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to)); AllocaInst *cast = ctx.builder.CreateAlloca(ty); + setName(ctx.emission_context, cast, "coercion"); ctx.builder.CreateStore(unboxed, cast); unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo())); } else if (frompointer) { Type *INTT_to = INTT(to, DL); unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to); + setName(ctx.emission_context, unboxed, "coercion"); if (INTT_to != to) unboxed = ctx.builder.CreateBitCast(unboxed, to); } @@ -359,6 +361,7 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) if (to != INTT_to) unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to); unboxed = emit_inttoptr(ctx, unboxed, to); + setName(ctx.emission_context, unboxed, "coercion"); } else { unboxed = ctx.builder.CreateBitCast(unboxed, to); @@ -394,6 +397,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va if (jt == (jl_value_t*)jl_bool_type || to->isIntegerTy(1)) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); Instruction *unbox_load = ai.decorateInst(ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext())))); + setName(ctx.emission_context, unbox_load, "unbox"); if (jt == (jl_value_t*)jl_bool_type) unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), { ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)), @@ -421,12 +425,14 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) && DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) { Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment)); + setName(ctx.emission_context, load, "unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); return emit_unboxed_coercion(ctx, to, ai.decorateInst(load)); } } p = maybe_bitcast(ctx, p, ptype); Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment)); + setName(ctx.emission_context, load, "unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa); return ai.decorateInst(load); } @@ -531,8 +537,10 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) } else { Value *size = emit_datatype_size(ctx, typ); + auto sizecheck = ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)); + setName(ctx.emission_context, sizecheck, "sizecheck"); error_unless(ctx, - ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)), + sizecheck, "bitcast: argument size does not match size of target type"); } } @@ -555,20 +563,25 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) storage_type, emit_bitcast(ctx, data_pointer(ctx, v), storage_type->getPointerTo()))); + setName(ctx.emission_context, vx, "bitcast"); } vxt = vx->getType(); if (vxt != llvmt) { - if (llvmt->isIntegerTy(1)) + if (llvmt->isIntegerTy(1)) { vx = ctx.builder.CreateTrunc(vx, llvmt); - else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8)) + } else if (vxt->isIntegerTy(1) && llvmt->isIntegerTy(8)) { vx = ctx.builder.CreateZExt(vx, llvmt); - else if (vxt->isPointerTy() && !llvmt->isPointerTy()) + } else if (vxt->isPointerTy() && !llvmt->isPointerTy()) { vx = ctx.builder.CreatePtrToInt(vx, llvmt); - else if (!vxt->isPointerTy() && llvmt->isPointerTy()) + setName(ctx.emission_context, vx, "bitcast_coercion"); + } else if (!vxt->isPointerTy() && llvmt->isPointerTy()) { vx = emit_inttoptr(ctx, vx, llvmt); - else + setName(ctx.emission_context, vx, "bitcast_coercion"); + } else { vx = emit_bitcast(ctx, vx, llvmt); + setName(ctx.emission_context, vx, "bitcast_coercion"); + } } if (jl_is_concrete_type((jl_value_t*)bt)) { @@ -576,6 +589,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) } else { Value *box = emit_allocobj(ctx, nb, bt_value_rt); + setName(ctx.emission_context, box, "bitcast_box"); init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut); return mark_julia_type(ctx, box, true, bt->name->wrapper); } @@ -618,8 +632,10 @@ static jl_cgval_t generic_cast( // but if we start looking at more bits we need to actually do the // rounding first instead of carrying around incorrect low bits. Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType()); + setName(ctx.emission_context, jlfloattemp_var, "rounding_slot"); ctx.builder.CreateStore(from, jlfloattemp_var); from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true); + setName(ctx.emission_context, from, "rounded"); } } Value *ans = ctx.builder.CreateCast(Op, from, to); @@ -632,6 +648,7 @@ static jl_cgval_t generic_cast( Value *targ_rt = boxed(ctx, targ); emit_concretecheck(ctx, targ_rt, std::string(jl_intrinsic_name(f)) + ": target type not a leaf primitive type"); Value *box = emit_allocobj(ctx, nb, targ_rt); + setName(ctx.emission_context, box, "cast_box"); init_bits_value(ctx, box, ans, ctx.tbaa().tbaa_immut); return mark_julia_type(ctx, box, true, jlto->name->wrapper); } @@ -667,10 +684,13 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type); Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, im1, "pointerref_idx"); if (ety == (jl_value_t*)jl_any_type) { Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ); + setName(ctx.emission_context, thePtr, "unbox_any_ptr"); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb)); + setName(ctx.emission_context, load, "any_unbox"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(load); return mark_julia_type(ctx, load, true, ety); @@ -679,10 +699,13 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(jl_is_datatype(ety)); uint64_t size = jl_datatype_size(ety); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); + setName(ctx.emission_context, strct, "pointerref_box"); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); + setName(ctx.emission_context, im1, "pointerref_offset"); Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1); + setName(ctx.emission_context, thePtr, "pointerref_src"); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1); return mark_julia_type(ctx, strct, true, ety); @@ -693,7 +716,9 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - return typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb); + auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb); + setName(ctx.emission_context, load.V, "pointerref"); + return load; } else { return ghostValue(ctx, ety); @@ -736,14 +761,17 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type); Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1)); + setName(ctx.emission_context, im1, "pointerset_idx"); Value *thePtr; if (ety == (jl_value_t*)jl_any_type) { // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots. thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ); - Instruction *store = ctx.builder.CreateAlignedStore( - ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size), - ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1), Align(align_nb)); + auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1); + setName(ctx.emission_context, gep, "pointerset_ptr"); + auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size); + setName(ctx.emission_context, val, "pointerset_val"); + Instruction *store = ctx.builder.CreateAlignedStore(val, gep, Align(align_nb)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(store); } @@ -752,7 +780,10 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) uint64_t size = jl_datatype_size(ety); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); - emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1), jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb); + setName(ctx.emission_context, im1, "pointerset_offset"); + auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); + setName(ctx.emission_context, gep, "pointerset_ptr"); + emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb); } else { bool isboxed; @@ -803,6 +834,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) if (ety == (jl_value_t*)jl_any_type) { Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ); LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, thePtr, Align(sizeof(jl_value_t*))); + setName(ctx.emission_context, load, "atomic_pointerref"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(load); load->setOrdering(llvm_order); @@ -823,11 +855,13 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) if (!jl_isbits(ety)) { assert(jl_is_datatype(ety)); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); + setName(ctx.emission_context, strct, "atomic_pointerref_box"); Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8); thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo()); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb)); + setName(ctx.emission_context, load, "atomic_pointerref"); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.decorateInst(load); load->setOrdering(llvm_order); @@ -842,7 +876,9 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - return typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb); + auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb); + setName(ctx.emission_context, load.V, "atomic_pointerref"); + return load; } else { if (order > jl_memory_order_monotonic) @@ -942,15 +978,18 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den) { Type *t = den->getType(); + auto ndivby0 = ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)); + setName(ctx.emission_context, ndivby0, "ndivby0"); raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)), + ndivby0, literal_pointer_val(ctx, jl_diverror_exception)); BasicBlock *m1BB = BasicBlock::Create(ctx.builder.getContext(), "minus1", ctx.f); BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "oksrem", ctx.f); BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_srem", ctx.f); PHINode *ret = PHINode::Create(t, 2); - ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(den ,ConstantInt::get(t, -1, true)), - m1BB, okBB); + auto divbym1 = ctx.builder.CreateICmpEQ(den, ConstantInt::get(t, -1, true)); + setName(ctx.emission_context, divbym1, "divbym1"); + ctx.builder.CreateCondBr(divbym1, m1BB, okBB); ctx.builder.SetInsertPoint(m1BB); ctx.builder.CreateBr(cont); ctx.builder.SetInsertPoint(okBB); @@ -960,6 +999,7 @@ static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den) ret->addIncoming(// rem(typemin, -1) is undefined ConstantInt::get(t, 0), m1BB); ret->addIncoming(sremval, okBB); + setName(ctx.emission_context, ret, "checked_srem"); ctx.builder.Insert(ret); return ret; } @@ -995,6 +1035,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_cgval_t y, jl_value_t *rt_hint) { Value *isfalse = emit_condition(ctx, c, "ifelse"); + setName(ctx.emission_context, isfalse, "ifelse_cond"); jl_value_t *t1 = x.typ; jl_value_t *t2 = y.typ; // handle cases where the condition is irrelevant based on type info @@ -1067,6 +1108,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ if (x_ptr->getType() != y_ptr->getType()) y_ptr = ctx.builder.CreateBitCast(y_ptr, x_ptr->getType()); ifelse_result = ctx.builder.CreateSelect(isfalse, y_ptr, x_ptr); + setName(ctx.emission_context, ifelse_result, "ifelse_result"); ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa); if (ifelse_tbaa == NULL) { // LLVM won't return a TBAA result for the root, but mark_julia_struct requires it: make it now @@ -1111,6 +1153,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ ctx.builder.SetInsertPoint(post); ctx.builder.Insert(ret); tindex = ret; + setName(ctx.emission_context, tindex, "ifelse_tindex"); } jl_cgval_t ret = mark_julia_slot(ifelse_result, rt_hint, tindex, ifelse_tbaa); if (x_vboxed || y_vboxed) { @@ -1119,6 +1162,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ if (!y_vboxed) y_vboxed = ConstantPointerNull::get(cast(x_vboxed->getType())); ret.Vboxed = ctx.builder.CreateSelect(isfalse, y_vboxed, x_vboxed); + setName(ctx.emission_context, ret.Vboxed, "ifelse_vboxed"); assert(ret.Vboxed->getType() == ctx.types().T_prjlvalue); } return ret; @@ -1126,6 +1170,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_ ifelse_result = ctx.builder.CreateSelect(isfalse, boxed(ctx, y), boxed(ctx, x)); + setName(ctx.emission_context, ifelse_result, "ifelse_result"); } jl_value_t *jt = (t1 == t2 ? t1 : rt_hint); return mark_julia_type(ctx, ifelse_result, isboxed, jt); @@ -1411,7 +1456,9 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, intr_id, makeArrayRef(t)); Value *res = ctx.builder.CreateCall(intr, {x, y}); Value *val = ctx.builder.CreateExtractValue(res, ArrayRef(0)); + setName(ctx.emission_context, val, "checked"); Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef(1)); + setName(ctx.emission_context, obit, "overflow"); Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext())); jl_value_t *params[2]; @@ -1429,30 +1476,31 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg case checked_sdiv_int: { Value *typemin = ctx.builder.CreateShl(ConstantInt::get(t, 1), t->getPrimitiveSizeInBits() - 1); - raise_exception_unless(ctx, - ctx.builder.CreateAnd( - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - ctx.builder.CreateOr( - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)), - ctx.builder.CreateICmpNE(x, typemin))), - literal_pointer_val(ctx, jl_diverror_exception)); + auto cond = ctx.builder.CreateAnd( + ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), + ctx.builder.CreateOr( + ctx.builder.CreateICmpNE(y, ConstantInt::get(t, -1, true)), + ctx.builder.CreateICmpNE(x, typemin))); + setName(ctx.emission_context, cond, "divisor_valid"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateSDiv(x, y); } - case checked_udiv_int: - raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - literal_pointer_val(ctx, jl_diverror_exception)); + case checked_udiv_int: { + auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)); + setName(ctx.emission_context, cond, "ndivby0"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateUDiv(x, y); - + } case checked_srem_int: return emit_checked_srem_int(ctx, x, y); - case checked_urem_int: - raise_exception_unless(ctx, - ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)), - literal_pointer_val(ctx, jl_diverror_exception)); + case checked_urem_int: { + auto cond = ctx.builder.CreateICmpNE(y, ConstantInt::get(t, 0)); + setName(ctx.emission_context, cond, "ndivby0"); + raise_exception_unless(ctx, cond, literal_pointer_val(ctx, jl_diverror_exception)); return ctx.builder.CreateURem(x, y); + } case eq_int: *newtyp = jl_bool_type; return ctx.builder.CreateICmpEQ(x, y); case ne_int: *newtyp = jl_bool_type; return ctx.builder.CreateICmpNE(x, y); diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 1468b4f55f5a7..88e4b0f97927f 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -195,6 +195,8 @@ static jl_callptr_t _jl_compile_codeinst( jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context params.cache = true; params.world = world; + params.imaging = imaging_default(); + params.debug_level = jl_options.debug_level; jl_workqueue_t emitted; { orc::ThreadSafeModule result_m = @@ -358,6 +360,8 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void * return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple())); }); jl_codegen_params_t params(into->getContext(), std::move(target_info.first), std::move(target_info.second)); + params.imaging = imaging_default(); + params.debug_level = jl_options.debug_level; if (pparams == NULL) pparams = ¶ms; assert(pparams->tsctx.getContext() == into->getContext().getContext()); diff --git a/src/jitlayers.h b/src/jitlayers.h index 4e53bbed6e766..3aa3998d3ac23 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -237,6 +237,7 @@ typedef struct _jl_codegen_params_t { bool cache = false; bool external_linkage = false; bool imaging; + int debug_level; _jl_codegen_params_t(orc::ThreadSafeContext ctx, DataLayout DL, Triple triple) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), DL(std::move(DL)), TargetTriple(std::move(triple)), imaging(imaging_default()) {} diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 13a68be2927de..21567468ffe9e 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -519,29 +519,34 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` # -g @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2" - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g0`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test !occursin("llvm.dbg.cu", code) - @test !occursin("int.jl", code) - @test !occursin("Int64", code) - end - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g1`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test occursin("llvm.dbg.cu", code) - @test occursin("int.jl", code) - @test !occursin("Int64", code) - end - let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g2`) - @test code[2] - code = code[1] - @test occursin("llvm.module.flags", code) - @test occursin("llvm.dbg.cu", code) - @test occursin("int.jl", code) - @test occursin("\"Int64\"", code) + # --print-before/--print-after with pass names is broken on Windows due to no-gnu-unique issues + if !Sys.iswindows() + withenv("JULIA_LLVM_ARGS" => "--print-before=FinalLowerGC") do + let code = readchomperrors(`$exename -g0 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test !occursin("llvm.dbg.cu", code) + @test !occursin("int.jl", code) + @test !occursin("\"Int64\"", code) + end + let code = readchomperrors(`$exename -g1 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test occursin("llvm.dbg.cu", code) + @test occursin("int.jl", code) + @test !occursin("\"Int64\"", code) + end + let code = readchomperrors(`$exename -g2 -E "@eval Int64(1)+Int64(1)"`) + @test code[1] + code = code[3] + @test occursin("llvm.module.flags", code) + @test occursin("llvm.dbg.cu", code) + @test occursin("int.jl", code) + @test occursin("\"Int64\"", code) + end + end end # --check-bounds diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index a55201c3e3bc3..d39f64d5f839c 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -13,20 +13,109 @@ end @generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x)) bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2) -# CHECK: call half @foo(half %{{[0-9]+}}) +# CHECK: define +# CHECK-SAME: half @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] = call half @foo(half [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret half +# CHECK-NOT: define +# CHECK: } emit(foo, Float16) -# CHECK: call [2 x half] @foo([2 x half] %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: [2 x half] @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] = call [2 x half] @foo([2 x half] [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret [2 x half] +# CHECK-NOT: define +# CHECK: } emit(foo, NTuple{2, Float16}) -# CHECK: call <2 x half> @foo(<2 x half> %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: <2 x half> @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] call <2 x half> @foo(<2 x half> [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret <2 x half> +# CHECK-NOT: define +# CHECK: } emit(foo, NTuple{2, VecElement{Float16}}) -# CHECK: call i8 addrspace(3)* @foo(i8 addrspace(3)* %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: i8 addrspace(3)* @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] call i8 addrspace(3)* @foo(i8 addrspace(3)* [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret i8 addrspace(3)* +# CHECK-NOT: define +# CHECK: } emit(foo, Core.LLVMPtr{Float32, 3}) -# CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}}) +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: [2 x i32] @julia_foo +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: [[FOO_RET:%.*]] call { i32, i32 } @foo({ i32, i32 } [[FOO_ARG:%.*]]) +# CHECK-NOT: define +# CHECK: ret [2 x i32] +# CHECK-NOT: define +# CHECK: } emit(foo, Foo) -# CHECK: define {{(swiftcc )?}}<2 x half> @julia_bar_{{[0-9]+}}( +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { + +# CHECK: define +# CHECK-SAME: <2 x half> @julia_bar +# CHECK-SAME: [2 x half] +# CHECK-SAME: { +# CHECK-NOT: define +# CHECK: ret <2 x half> +# CHECK-NOT: define +# CHECK: } emit(bar, NTuple{2, Float16}) + +# COM: Make sure that we don't miss a function by accident (helps localize errors) +# CHECK-NOT: { +# CHECK-NOT: } +# CHECK: define +# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# CHECK-SAME: { From a74e9f97d43984bc8ceae5b1a866c4e6b04a7124 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 20 Jun 2023 02:05:04 -0400 Subject: [PATCH 199/290] Install `libclang_rt.asan-` properly --- Make.inc | 2 +- Makefile | 8 ++++++++ deps/Makefile | 7 ++++++- deps/sanitizers.mk | 29 +++++++++++++++++++++++++++++ src/flisp/Makefile | 7 +++++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 deps/sanitizers.mk diff --git a/Make.inc b/Make.inc index ac06dbfcc464f..937f146b0150c 100644 --- a/Make.inc +++ b/Make.inc @@ -723,7 +723,7 @@ endif # OS Linux or FreeBSD endif # SANITIZE_MEMORY=1 ifeq ($(SANITIZE_ADDRESS),1) SANITIZE_OPTS += -fsanitize=address -SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan -Wl,-rpath=$(dir $(shell $(CC) --print-file-name libclang_rt.asan-x86_64.so)) +SANITIZE_LDFLAGS += -fsanitize=address -shared-libasan endif ifeq ($(SANITIZE_THREAD),1) SANITIZE_OPTS += -fsanitize=thread diff --git a/Makefile b/Makefile index 046f18492bc3e..958dbae4bb9fb 100644 --- a/Makefile +++ b/Makefile @@ -237,6 +237,14 @@ JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libwinpthread else JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread endif +ifeq ($(SANITIZE),1) +ifeq ($(USECLANG),1) +JL_PRIVATE_LIBS-1 += libclang_rt.asan +else +JL_PRIVATE_LIBS-1 += libasan +endif +endif + ifeq ($(WITH_TRACY),1) JL_PRIVATE_LIBS-0 += libTracyClient endif diff --git a/deps/Makefile b/deps/Makefile index 62bb85e72c492..8bbe16b4e5cf9 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -46,6 +46,10 @@ ifeq ($(USE_SYSTEM_CSL), 0) DEP_LIBS += csl endif +ifeq ($(SANITIZE), 1) +DEP_LIBS += sanitizers +endif + ifeq ($(USE_SYSTEM_LIBUV), 0) DEP_LIBS += libuv endif @@ -188,7 +192,7 @@ DEP_LIBS_STAGED := $(DEP_LIBS) DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \ openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \ objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \ - libsuitesparse lld libtracyclient ittapi + sanitizers libsuitesparse lld libtracyclient ittapi DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL) ifneq ($(USE_BINARYBUILDER_OPENBLAS),0) @@ -223,6 +227,7 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL)) getall: $(addprefix get-, $(DEP_LIBS_ALL)) include $(SRCDIR)/csl.mk +include $(SRCDIR)/sanitizers.mk include $(SRCDIR)/ittapi.mk include $(SRCDIR)/llvm.mk include $(SRCDIR)/libuv.mk diff --git a/deps/sanitizers.mk b/deps/sanitizers.mk new file mode 100644 index 0000000000000..81db75a4ee63e --- /dev/null +++ b/deps/sanitizers.mk @@ -0,0 +1,29 @@ +# Interrogate the compiler about where it is keeping its sanitizer libraries +ifeq ($(USECLANG),1) +SANITIZER_LIB_PATH := $(shell LANG=C $(CC) -print-runtime-dir) +else +SANITIZER_LIB_PATH := $(dir $(shell LANG=C $(CC) -print-file-name=libasan.so)) +endif + +# Given a colon-separated list of paths in $(2), find the location of the library given in $(1) +define pathsearch +$(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))) +endef + +define copy_sanitizer_lib +install-sanitizers: $$(addprefix $$(build_libdir)/, $$(notdir $$(call pathsearch,$(1),$$(SANITIZER_LIB_PATH)))) | $$(build_shlibdir) +$$(addprefix $$(build_shlibdir)/,$(2)): $$(addprefix $$(SANITIZER_LIB_PATH)/,$(2)) | $$(build_shlibdir) + -cp $$< $$@ +endef + +ifeq ($(USECLANG),1) + +## Clang libraries +$(eval $(call copy_sanitizer_lib,$(call versioned_libname,libclang_rt.asan-*),$(call versioned_libname,libclang_rt.asan-%))) + +endif + +get-sanitizers: +clean-sanitizers: + -rm -f $(build_shlibdir)/libclang_rt.asan*$(SHLIB_EXT)* +distclean-sanitizers: clean-sanitizers diff --git a/src/flisp/Makefile b/src/flisp/Makefile index c2bf30300b041..17292d301115b 100644 --- a/src/flisp/Makefile +++ b/src/flisp/Makefile @@ -95,6 +95,13 @@ $(BUILDDIR)/$(LIBTARGET).a: $(OBJS) | $(BUILDDIR) CCLD := $(CC) +# Override `-shared-libasan` from root Make.inc +ifeq ($(SANITIZE),1) +ifeq ($(SANITIZE_ADDRESS),1) +JLDFLAGS += -static-libsan +endif +endif + $(BUILDDIR)/$(EXENAME)-debug$(EXE): $(DOBJS) $(LIBFILES_debug) $(BUILDDIR)/$(LIBTARGET)-debug.a $(BUILDDIR)/flmain.dbg.obj | $(BUILDDIR)/flisp.boot @$(call PRINT_LINK, $(CCLD) $(DEBUGFLAGS) $(JLDFLAGS) $(DOBJS) $(BUILDDIR)/flmain.dbg.obj -o $@ $(BUILDDIR)/$(LIBTARGET)-debug.a $(LIBFILES_debug) $(LIBS) $(OSLIBS)) From d48e17ba0f245e1d8b9f703933869dd956fc63cb Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 20 Jun 2023 14:50:59 +0000 Subject: [PATCH 200/290] Don't reverse! load_path cache As noted in https://github.com/JuliaLang/julia/pull/50119#discussion_r1235329616. --- base/loading.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/loading.jl b/base/loading.jl index b9742ec045b19..ae0681608844e 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2727,7 +2727,7 @@ end function get_preferences(uuid::Union{UUID,Nothing} = nothing) merged_prefs = Dict{String,Any}() - for env in reverse!(load_path()) + for env in reverse(load_path()) project_toml = env_project_file(env) if !isa(project_toml, String) continue From 9032926c0641e2a62fbe4ebc91ee81d763db7308 Mon Sep 17 00:00:00 2001 From: Elliot Saba Date: Tue, 20 Jun 2023 10:05:58 -0700 Subject: [PATCH 201/290] Throw precompilation error if dependency load failure during incremental precompilation In rare cases, if we fail to load a dependency during precompilation, we can fall-through to the "load locally" fallthrough in `_require()`. However, if this happens during incremental precompilation, this ends up emitting `.ji` files that have multiple modules embedded within, which can cause massive precompilation issues further down the chain, as dependencies which try to load our `.ji` file themselves get corrupted. This catches the error at the source, refusing to generate such a `.ji` file in the first place. --- base/loading.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/loading.jl b/base/loading.jl index ae0681608844e..56c40af68479f 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1919,7 +1919,7 @@ function _require(pkg::PkgId, env=nothing) else @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m end - # fall-through to loading the file locally + # fall-through to loading the file locally if not incremental else cachefile, ocachefile = cachefile::Tuple{String, Union{Nothing, String}} m = _tryrequire_from_serialized(pkg, cachefile, ocachefile) @@ -1929,6 +1929,10 @@ function _require(pkg::PkgId, env=nothing) return m end end + if JLOptions().incremental != 0 + # during incremental precompilation, this should be fail-fast + throw(PrecompilableError()) + end end end From 509864600afc18f2b07321dff5ed4eb9640d0381 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Tue, 20 Jun 2023 23:47:24 +0200 Subject: [PATCH 202/290] LICENSE.md: update copyright years --- LICENSE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE.md b/LICENSE.md index fdf24e7603d73..d4125f4fba221 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2009-2022: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors +Copyright (c) 2009-2023: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the From 95749c378418134100951582c78d8553a5ad4211 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 20 Jun 2023 20:52:53 -0400 Subject: [PATCH 203/290] Avoid creating invalid PhiNodes in IR passes (#50235) As of #50158, irverify catches cases where PhiNodes show up in the middle of a basic block (which is illegal). Unfortunately, it turns out there were two cases in Base, where we created just such code: 1. When cfg_simplify! merged basic blocks, it didn't bother to delete (resp, replace by the one incoming edge) the PhiNodes in the basic block it was merging. 2. In irinterp we try to delete instructions that result in constants. This is not legal if the instruction is a PhiNode. The second of these is somewhat unfortunate, but any subsequent compaction will of course take care of it, so I don't think it's a huge issue to just disable the replacement. --- base/compiler/ssair/irinterp.jl | 2 +- base/compiler/ssair/passes.jl | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index fc5085af426a1..1ba751f6d3fd4 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -154,7 +154,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union if rt !== nothing if isa(rt, Const) ir.stmts[idx][:type] = rt - if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & IR_FLAG_EFFECT_FREE) != 0 + if is_inlineable_constant(rt.val) && !isa(inst, PhiNode) && (ir.stmts[idx][:flag] & IR_FLAG_EFFECT_FREE) != 0 ir.stmts[idx][:inst] = quoted(rt.val) end return true diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 518a6512fc166..3b0159f5c3b85 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -2246,6 +2246,7 @@ function cfg_simplify!(ir::IRCode) result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb + bb_start = true while ms != 0 for i in bbs[ms].stmts node = ir.stmts[i] @@ -2304,7 +2305,14 @@ function cfg_simplify!(ir::IRCode) isassigned(renamed_values, old_index) && kill_current_use!(compact, renamed_values[old_index]) end end - compact.result[compact.result_idx][:inst] = PhiNode(edges, values) + if length(edges) == 0 || (length(edges) == 1 && !isassigned(values, 1)) + compact.result[compact.result_idx][:inst] = nothing + elseif length(edges) == 1 && !bb_start + compact.result[compact.result_idx][:inst] = values[1] + else + @assert bb_start + compact.result[compact.result_idx][:inst] = PhiNode(edges, values) + end else ri = process_node!(compact, compact.result_idx, node, i, i, ms, true) if ri == compact.result_idx @@ -2318,6 +2326,7 @@ function cfg_simplify!(ir::IRCode) compact.result_idx += 1 end ms = merged_succ[ms] + bb_start = false end end compact.idx = length(ir.stmts) From 94dd5cf2aa543255cc60d2a2d703637adad5477b Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 21 Jun 2023 20:26:36 +0900 Subject: [PATCH 204/290] compiler: add few more type annotations (#50242) --- base/compiler/ssair/ir.jl | 45 ++++++++++++++++++----------------- base/compiler/ssair/passes.jl | 12 +++++----- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index debad8bfb0d66..00ac44d22c0a2 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -23,14 +23,14 @@ function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) preds = cfg.blocks[to].preds succs = cfg.blocks[from].succs # Assumes that blocks appear at most once in preds and succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + deleteat!(preds, findfirst(x::Int->x==from, preds)::Int) + deleteat!(succs, findfirst(x::Int->x==to, succs)::Int) nothing end function bb_ordering() - lt=(<=) - by=x->first(x.stmts) + lt = (<=) + by = x::BasicBlock -> first(x.stmts) ord(lt, by, nothing, Forward) end @@ -594,7 +594,7 @@ function CFGTransformState!(blocks::Vector{BasicBlock}, allow_cfg_transforms::Bo end end # Dead blocks get removed from the predecessor list - filter!(x->x !== -1, preds) + filter!(x::Int->x≠-1, preds) # Rename succs for j = 1:length(succs) succs[j] = bb_rename[succs[j]] @@ -637,7 +637,7 @@ mutable struct IncrementalCompact function IncrementalCompact(code::IRCode, cfg_transform::CFGTransformState) # Sort by position with attach after nodes after regular ones info = code.new_nodes.info - perm = sort!(collect(eachindex(info)); by=i->(2info[i].pos+info[i].attach_after, i)) + perm = sort!(collect(eachindex(info)); by=i::Int->(2info[i].pos+info[i].attach_after, i)) new_len = length(code.stmts) + length(info) result = InstructionStream(new_len) used_ssas = fill(0, new_len) @@ -656,7 +656,7 @@ mutable struct IncrementalCompact # For inlining function IncrementalCompact(parent::IncrementalCompact, code::IRCode, result_offset) info = code.new_nodes.info - perm = sort!(collect(eachindex(info)); by=i->(info[i].pos, i)) + perm = sort!(collect(eachindex(info)); by=i::Int->(info[i].pos, i)) new_len = length(code.stmts) + length(info) ssa_rename = Any[SSAValue(i) for i = 1:new_len] bb_rename = Vector{Int}() @@ -783,7 +783,7 @@ function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAV return dominates(domtree, xb, yb) end -function _count_added_node!(compact, @nospecialize(val)) +function _count_added_node!(compact::IncrementalCompact, @nospecialize(val)) if isa(val, SSAValue) compact.used_ssas[val.id] += 1 return false @@ -805,7 +805,7 @@ end function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool) node = add_inst!(compact.pending_nodes, pos, attach_after) - heappush!(compact.pending_perm, length(compact.pending_nodes), By(x -> compact.pending_nodes.info[x].pos)) + heappush!(compact.pending_perm, length(compact.pending_nodes), By(x::Int->compact.pending_nodes.info[x].pos)) return node end @@ -997,7 +997,7 @@ const __check_ssa_counts__ = fill(false) should_check_ssa_counts() = __check_ssa_counts__[] # specifically meant to be used with body1 = compact.result and body2 = compact.new_new_nodes, with nvals == length(compact.used_ssas) -function find_ssavalue_uses1(compact) +function find_ssavalue_uses1(compact::IncrementalCompact) body1, body2 = compact.result.inst, compact.new_new_nodes.stmts.inst nvals = length(compact.used_ssas) nvalsnew = length(compact.new_new_used_ssas) @@ -1166,8 +1166,8 @@ end # Used in inlining before we start compacting - Only works at the CFG level function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int, callback=nothing) preds, succs = bbs[to].preds, bbs[from].succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + deleteat!(preds, findfirst(x::Int->x==from, preds)::Int) + deleteat!(succs, findfirst(x::Int->x==to, succs)::Int) if length(preds) == 0 for succ in copy(bbs[to].succs) kill_edge!(bbs, to, succ, callback) @@ -1190,12 +1190,12 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: (; bb_rename_pred, bb_rename_succ, result_bbs) = compact.cfg_transform preds = result_bbs[bb_rename_succ[to]].preds succs = result_bbs[bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === bb_rename_succ[to], succs)::Int) + deleteat!(preds, findfirst(x::Int->x==bb_rename_pred[from], preds)::Int) + deleteat!(succs, findfirst(x::Int->x==bb_rename_succ[to], succs)::Int) # Check if the block is now dead if length(preds) == 0 for succ in copy(result_bbs[bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, bb_rename_pred)::Int) + kill_edge!(compact, active_bb, to, findfirst(x::Int->x==succ, bb_rename_pred)::Int) end if to < active_bb # Kill all statements in the block @@ -1222,7 +1222,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: stmt = compact.result[idx][:inst] stmt === nothing && continue isa(stmt, PhiNode) || break - i = findfirst(x-> x == bb_rename_pred[from], stmt.edges) + i = findfirst(x::Int32->x==bb_rename_pred[from], stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) @@ -1234,7 +1234,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) stmt === nothing && continue isa(stmt, PhiNode) || break - i = findfirst(x-> x == from, stmt.edges) + i = findfirst(x::Int32->x==from, stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) @@ -1350,7 +1350,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr if cfg_transforms_enabled # Rename phi node edges let bb_rename_pred=bb_rename_pred - map!(i::Int32 -> bb_rename_pred[i], stmt.edges, stmt.edges) + map!(i::Int32->bb_rename_pred[i], stmt.edges, stmt.edges) end # Remove edges and values associated with dead blocks. Entries in @@ -1424,7 +1424,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr return result_idx end -function resize!(compact::IncrementalCompact, nnewnodes) +function resize!(compact::IncrementalCompact, nnewnodes::Int) old_length = length(compact.result) resize!(compact.result, nnewnodes) resize!(compact.used_ssas, nnewnodes) @@ -1434,7 +1434,8 @@ function resize!(compact::IncrementalCompact, nnewnodes) return compact end -function finish_current_bb!(compact::IncrementalCompact, active_bb, old_result_idx=compact.result_idx, unreachable=false) +function finish_current_bb!(compact::IncrementalCompact, active_bb::Int, + old_result_idx::Int=compact.result_idx, unreachable::Bool=false) (;result_bbs, cfg_transforms_enabled, bb_rename_succ) = compact.cfg_transform if compact.active_result_bb > length(result_bbs) #@assert compact.bb_rename[active_bb] == -1 @@ -1573,7 +1574,7 @@ function iterate_compact(compact::IncrementalCompact) if !(info.attach_after ? info.pos <= compact.idx - 1 : info.pos <= compact.idx) break end - heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) + heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos)) end # Move to next block compact.idx += 1 @@ -1600,7 +1601,7 @@ function iterate_compact(compact::IncrementalCompact) elseif !isempty(compact.pending_perm) && (info = compact.pending_nodes.info[compact.pending_perm[1]]; info.attach_after ? info.pos == idx - 1 : info.pos == idx) - new_idx = heappop!(compact.pending_perm, By(x -> compact.pending_nodes.info[x].pos)) + new_idx = heappop!(compact.pending_perm, By(x::Int -> compact.pending_nodes.info[x].pos)) new_node_entry = compact.pending_nodes.stmts[new_idx] new_node_info = compact.pending_nodes.info[new_idx] new_idx += length(compact.ir.stmts) + length(compact.ir.new_nodes) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 3b0159f5c3b85..780273ce7cd2e 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -841,11 +841,11 @@ end length(sig.parameters) >= 1 || return nothing i = let sig=sig - findfirst(j->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters)) + findfirst(j::Int->has_typevar(sig.parameters[j], tvar), 1:length(sig.parameters)) end i === nothing && return nothing let sig=sig - any(j->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters)) + any(j::Int->has_typevar(sig.parameters[j], tvar), i+1:length(sig.parameters)) end && return nothing arg = sig.parameters[i] @@ -2182,15 +2182,15 @@ function cfg_simplify!(ir::IRCode) bb_rename_succ = bb_rename_succ # Compute (renamed) successors and predecessors given (renamed) block - function compute_succs(i) + function compute_succs(i::Int) orig_bb = follow_merged_succ(result_bbs[i]) return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs] end - function compute_preds(i) + function compute_preds(i::Int) orig_bb = result_bbs[i] preds = bbs[orig_bb].preds res = Int[] - function scan_preds!(preds) + function scan_preds!(preds::Vector{Int}) for pred in preds if pred == 0 push!(res, 0) @@ -2223,7 +2223,7 @@ function cfg_simplify!(ir::IRCode) @assert length(new_bb.succs) <= 2 length(new_bb.succs) <= 1 && continue if new_bb.succs[1] == new_bb.succs[2] - old_bb2 = findfirst(x->x==bbidx, bb_rename_pred) + old_bb2 = findfirst(x::Int->x==bbidx, bb_rename_pred) terminator = ir[SSAValue(last(bbs[old_bb2].stmts))] @assert terminator[:inst] isa GotoIfNot # N.B.: The dest will be renamed in process_node! below From 4f0216a133aa2de4442f7ee6a974d82e316e4a93 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Wed, 21 Jun 2023 07:26:54 -0400 Subject: [PATCH 205/290] Clear specsigflags correctly for copied code instance (#50238) As provided by Jameson to fix an issue encountered in a downstream project. Co-authored-by: Jameson Nash --- src/gf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gf.c b/src/gf.c index ac2ac9ff4a7a6..294e1fccb8783 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2392,7 +2392,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) { jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1); jl_atomic_store_release(&codeinst->invoke, invoke); - jl_atomic_store_release(&codeinst->specsigflags, specsigflags); + // unspec is probably not specsig, but might be using specptr + jl_atomic_store_release(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag } else { // someone else already compiled it while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) { From e7d00e1b6829beb5bbe11e43a3ad8d2efe3f46ad Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 21 Jun 2023 19:41:04 -0400 Subject: [PATCH 206/290] [LibGit2] fix wrong definition of ConfigStruct (#50247) --- stdlib/LibGit2/src/types.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl index 1ea6c797d1636..0b653f9b6ad21 100644 --- a/stdlib/LibGit2/src/types.jl +++ b/stdlib/LibGit2/src/types.jl @@ -904,12 +904,13 @@ end Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_config_entry) struct. """ -@kwdef struct ConfigEntry - name::Cstring = Cstring(C_NULL) - value::Cstring = Cstring(C_NULL) - level::GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT - free::Ptr{Cvoid} = C_NULL - payload::Any = nothing +struct ConfigEntry + name::Cstring + value::Cstring + include_depth::Cuint + level::GIT_CONFIG + free::Ptr{Cvoid} + payload::Ptr{Cvoid} # User is not permitted to read or write this field end @assert Base.allocatedinline(ConfigEntry) From fbb8d6cc459b859c604a4436a4e51a65496b612d Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 22 Jun 2023 10:28:30 +1000 Subject: [PATCH 207/290] Bump JuliaSyntax to 0.4.5 (#50253) Fixes an issue with error formatting in Meta.parse with nontrivial starting indices (#50245) --- deps/JuliaSyntax.version | 2 +- .../md5 | 1 + .../sha512 | 1 + .../md5 | 1 - .../sha512 | 1 - 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 create mode 100644 deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 delete mode 100644 deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 delete mode 100644 deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 diff --git a/deps/JuliaSyntax.version b/deps/JuliaSyntax.version index 2bd765e6f4535..b604eedaa43dd 100644 --- a/deps/JuliaSyntax.version +++ b/deps/JuliaSyntax.version @@ -1,4 +1,4 @@ JULIASYNTAX_BRANCH = main -JULIASYNTAX_SHA1 = ec51994833d78f8c5525bc1647f448dfadc370c1 +JULIASYNTAX_SHA1 = 8731bab86f14762cca8cf24224d8c7a6a89c21c5 JULIASYNTAX_GIT_URL := https://github.com/JuliaLang/JuliaSyntax.jl.git JULIASYNTAX_TAR_URL = https://api.github.com/repos/JuliaLang/JuliaSyntax.jl/tarball/$1 diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 new file mode 100644 index 0000000000000..8bec9dde7fbae --- /dev/null +++ b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/md5 @@ -0,0 +1 @@ +6fdeb9332af478502be39af642027387 diff --git a/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 new file mode 100644 index 0000000000000..50c676f808c5c --- /dev/null +++ b/deps/checksums/JuliaSyntax-8731bab86f14762cca8cf24224d8c7a6a89c21c5.tar.gz/sha512 @@ -0,0 +1 @@ +fbb4ab0b99de7e1f86b918b401c2d42883a2bf8e80f6af4d6b85b7ca263d97cca1c47b25aca48359f14dee91b658684c0c590b7f20240bd9e0ce6e960ccf6647 diff --git a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 deleted file mode 100644 index e1f51dd3d711a..0000000000000 --- a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -b1d1ccb00e422eb8b70b2120d7083bf3 diff --git a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 b/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 deleted file mode 100644 index 2ac2b9ed7c903..0000000000000 --- a/deps/checksums/JuliaSyntax-ec51994833d78f8c5525bc1647f448dfadc370c1.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -e6df6dc2b5d2a5618da0d553eed793e1192147175d84d51f725c0ea8f7b6be92fbeb37de9abee2b2f548b0f0736f836ec7e3e20e93c12f77e1a2b2058bbfd6db From a34ff20549a8d87fc88b4dfaa87d5b1f0fd5298a Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 22 Jun 2023 15:15:33 +0900 Subject: [PATCH 208/290] allow `Meta.parse` to take optional `filename` argument (#50224) Code analysis tools such as JET.jl use `Meta.parse` to get a Julia code representation from textual data, or to display diagnostic messages when the input data is invalid. In such scenario it would be beneficial if these diagnostic messages could refer to the original file name of the input data. This commit allows `Meta.parse` to accept the optional `filename::AbstractString` argument for this purpose. If an error occurs, the `filename` can be included in the diagnostic information. The default value is set to `"none"`, thus preserving existing behavior. --- base/meta.jl | 22 ++++++++++++---------- test/meta.jl | 21 +++++++++++++++++++-- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/base/meta.jl b/base/meta.jl index ba2a5eeb6858b..31fef1b9697e3 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -202,7 +202,7 @@ function _parse_string(text::AbstractString, filename::AbstractString, end """ - parse(str, start; greedy=true, raise=true, depwarn=true) + parse(str, start; greedy=true, raise=true, depwarn=true, filename="none") Parse the expression string and return an expression (which could later be passed to eval for execution). `start` is the code unit index into `str` of the @@ -214,6 +214,7 @@ return `Expr(:incomplete, "(error message)")`. If `raise` is `true` (default), syntax errors other than incomplete expressions will raise an error. If `raise` is `false`, `parse` will return an expression that will raise an error upon evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed. +The `filename` argument is used to display diagnostics when an error is raised. ```jldoctest julia> Meta.parse("(α, β) = 3, 5", 1) # start of string @@ -232,10 +233,10 @@ julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false) (3, 13) ``` """ -function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true, - depwarn::Bool=true) - ex, pos = _parse_string(str, "none", 1, pos, greedy ? :statement : :atom) - if raise && isa(ex,Expr) && ex.head === :error +function parse(str::AbstractString, pos::Integer; + filename="none", greedy::Bool=true, raise::Bool=true, depwarn::Bool=true) + ex, pos = _parse_string(str, String(filename), 1, pos, greedy ? :statement : :atom) + if raise && isexpr(ex, :error) err = ex.args[1] if err isa String err = ParseError(err) # For flisp parser @@ -246,13 +247,13 @@ function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool end """ - parse(str; raise=true, depwarn=true) + parse(str; raise=true, depwarn=true, filename="none") Parse the expression string greedily, returning a single expression. An error is thrown if there are additional characters after the first expression. If `raise` is `true` (default), syntax errors will raise an error; otherwise, `parse` will return an expression that will raise an error upon evaluation. If `depwarn` is `false`, deprecation warnings will be -suppressed. +suppressed. The `filename` argument is used to display diagnostics when an error is raised. ```jldoctest; filter=r"(?<=Expr\\(:error).*|(?<=Expr\\(:incomplete).*" julia> Meta.parse("x = 3") @@ -272,9 +273,10 @@ julia> Meta.parse("x = ") :(\$(Expr(:incomplete, "incomplete: premature end of input"))) ``` """ -function parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true) - ex, pos = parse(str, 1, greedy=true, raise=raise, depwarn=depwarn) - if isa(ex,Expr) && ex.head === :error +function parse(str::AbstractString; + filename="none", raise::Bool=true, depwarn::Bool=true) + ex, pos = parse(str, 1; filename, greedy=true, raise, depwarn) + if isexpr(ex, :error) return ex end if pos <= ncodeunits(str) diff --git a/test/meta.jl b/test/meta.jl index 399e106684a81..36a8acbfe08dd 100644 --- a/test/meta.jl +++ b/test/meta.jl @@ -221,8 +221,25 @@ let a = 1 @test @macroexpand @is_dollar_expr $a end -@test Meta.parseatom("@foo", 1, filename=:bar)[1].args[2].file === :bar -@test Meta.parseall("@foo", filename=:bar).args[1].file === :bar +let ex = Meta.parse("@foo"; filename=:bar) + @test Meta.isexpr(ex, :macrocall) + arg2 = ex.args[2] + @test isa(arg2, LineNumberNode) && arg2.file === :bar +end +let ex = Meta.parseatom("@foo", 1, filename=:bar)[1] + @test Meta.isexpr(ex, :macrocall) + arg2 = ex.args[2] + @test isa(arg2, LineNumberNode) && arg2.file === :bar +end +let ex = Meta.parseall("@foo", filename=:bar) + @test Meta.isexpr(ex, :toplevel) + arg1 = ex.args[1] + @test isa(arg1, LineNumberNode) && arg1.file === :bar + arg2 = ex.args[2] + @test Meta.isexpr(arg2, :macrocall) + arg2arg2 = arg2.args[2] + @test isa(arg2arg2, LineNumberNode) && arg2arg2.file === :bar +end _lower(m::Module, ex, world::UInt) = ccall(:jl_expand_in_world, Any, (Any, Ref{Module}, Cstring, Cint, Csize_t), ex, m, "none", 0, world) From de7670e8aa6050fe080559c288c407a6d7875193 Mon Sep 17 00:00:00 2001 From: Neal Gafter Date: Thu, 22 Jun 2023 05:58:54 -0700 Subject: [PATCH 209/290] Correct the documentation for `pkgdir`. (#50255) --- base/loading.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 56c40af68479f..4aefa9a08a210 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -491,12 +491,12 @@ end """ pkgdir(m::Module[, paths::String...]) -Return the root directory of the package that imported module `m`, -or `nothing` if `m` was not imported from a package. Optionally further +Return the root directory of the package that declared module `m`, +or `nothing` if `m` was not declared in a package. Optionally further path component strings can be provided to construct a path within the package root. -To get the root directory of the package that imported the current module +To get the root directory of the package that implements the current module the form `pkgdir(@__MODULE__)` can be used. ```julia-repl From ad120f49bd606d91351e71fd5e5261daf9a981f9 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Thu, 22 Jun 2023 15:45:44 +0200 Subject: [PATCH 210/290] Fix jl_gc_internal_obj_base_ptr segfault regression (#50231) The function `jl_gc_internal_obj_base_ptr` takes a pointer and tries to determine if it is a valid object pointer. As such it has to carefully validate all data it reads, and abort whenever there are obvious inconsistencies. This patch adds a check which aborts when `meta->osize` is zero, just before we perform a division-with-remainder by this value, thus avoiding a potential division-by-zero exception. This fixes a crash we are seeing in our code. The crash did not happen before PR #49644 was merged because back then there was a check for `meta->ages` not being zero, which apparently was enough to detect invalid values for `meta` (e.g. when `meta` points into a null page). --- src/gc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gc.c b/src/gc.c index 00b0102f72653..930edbe9c67a8 100644 --- a/src/gc.c +++ b/src/gc.c @@ -4040,6 +4040,8 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) // offset within object size_t off2 = (off - GC_PAGE_OFFSET); size_t osize = meta->osize; + if (osize == 0) + return NULL; off2 %= osize; if (off - off2 + osize > GC_PAGE_SZ) return NULL; From 49ac54a6a45d5dae08b82154a217be73edef5a16 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Wed, 21 Jun 2023 14:06:56 -0400 Subject: [PATCH 211/290] Fix ITTAPI for offline build of LLVM --- deps/llvm.mk | 4 ++- deps/patches/llvm-ittapi-cmake.patch | 47 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 deps/patches/llvm-ittapi-cmake.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index 37f8f9d6ea7e7..2a8365dd73e75 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -230,6 +230,8 @@ $$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patc LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied endef +$(eval $(call LLVM_PATCH,llvm-ittapi-cmake)) + ifeq ($(USE_SYSTEM_ZLIB), 0) $(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib endif @@ -303,7 +305,7 @@ fastcheck-llvm: #none check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked ifeq ($(USE_INTEL_JITEVENTS),1) -extract-llvm: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted +$(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/source-extracted endif #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere) diff --git a/deps/patches/llvm-ittapi-cmake.patch b/deps/patches/llvm-ittapi-cmake.patch new file mode 100644 index 0000000000000..6746d21754283 --- /dev/null +++ b/deps/patches/llvm-ittapi-cmake.patch @@ -0,0 +1,47 @@ +diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +index 0c5017c359d6..92777133e9de 100644 +--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt ++++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +@@ -12,23 +12,23 @@ if(NOT DEFINED ITTAPI_SOURCE_DIR) + set(ITTAPI_SOURCE_DIR ${PROJECT_BINARY_DIR}) + endif() + +-if(NOT EXISTS ${ITTAPI_SOURCE_DIR}/ittapi) +- execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY} +- WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR} ++if(NOT EXISTS ${ITTAPI_SOURCE_DIR}) ++ execute_process(COMMAND ${GIT_EXECUTABLE} clone ${ITTAPI_GIT_REPOSITORY} ${ITTAPI_SOURCE_DIR} ++ WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/.. + RESULT_VARIABLE GIT_CLONE_RESULT) + if(NOT GIT_CLONE_RESULT EQUAL "0") + message(FATAL_ERROR "git clone ${ITTAPI_GIT_REPOSITORY} failed with ${GIT_CLONE_RESULT}, please clone ${ITTAPI_GIT_REPOSITORY}") + endif() +-endif() + +-execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG} +- WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR}/ittapi +- RESULT_VARIABLE GIT_CHECKOUT_RESULT) +-if(NOT GIT_CHECKOUT_RESULT EQUAL "0") +- message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}/ittapi") ++ execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ITTAPI_GIT_TAG} ++ WORKING_DIRECTORY ${ITTAPI_SOURCE_DIR} ++ RESULT_VARIABLE GIT_CHECKOUT_RESULT) ++ if(NOT GIT_CHECKOUT_RESULT EQUAL "0") ++ message(FATAL_ERROR "git checkout ${ITTAPI_GIT_TAG} failed with ${GIT_CHECKOUT_RESULT}, please checkout ${ITTAPI_GIT_TAG} at ${ITTAPI_SOURCE_DIR}") ++ endif() + endif() + +-include_directories( ${ITTAPI_SOURCE_DIR}/ittapi/include/ ) ++include_directories( ${ITTAPI_SOURCE_DIR}/include/ ) + + if( HAVE_LIBDL ) + set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS}) +@@ -40,7 +40,7 @@ set(LLVM_INTEL_JIT_LIBS ${LLVM_PTHREAD_LIB} ${LLVM_INTEL_JIT_LIBS}) + add_llvm_component_library(LLVMIntelJITEvents + IntelJITEventListener.cpp + jitprofiling.c +- ${ITTAPI_SOURCE_DIR}/ittapi/src/ittnotify/ittnotify_static.c ++ ${ITTAPI_SOURCE_DIR}/src/ittnotify/ittnotify_static.c + + LINK_LIBS ${LLVM_INTEL_JIT_LIBS} + From 8af82ea2b89682f3cf2ea5cec3b57889295e2ab0 Mon Sep 17 00:00:00 2001 From: K Pamnany Date: Wed, 21 Jun 2023 14:43:53 -0400 Subject: [PATCH 212/290] Set `CMAKE_INSTALL_LIBDIR` for non-Windows platforms This makes it so libgit2.so and libssh2.so go into `usr/lib` where they should be, rather than into `usr/lib64` on some platforms. --- deps/tools/common.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deps/tools/common.mk b/deps/tools/common.mk index c19886114c14e..5290ac475b665 100644 --- a/deps/tools/common.mk +++ b/deps/tools/common.mk @@ -20,6 +20,10 @@ CMAKE_CXX_ARG := $(CXX_ARG) CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix) CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir) +ifneq ($(OS),WINNT) +CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir) +endif + ifeq ($(OS), Darwin) CMAKE_COMMON += -DCMAKE_MACOSX_RPATH=1 endif From ef6d900d441ae37313ec6711f05ea5953a0c4ab8 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 22 Jun 2023 10:17:35 -0400 Subject: [PATCH 213/290] codegen: handle dead code with unsafe_store of FCA pointers (#50164) Fix #50125 --- src/intrinsics.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 09e04eb683af1..810982370de19 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -695,7 +695,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) ai.decorateInst(load); return mark_julia_type(ctx, load, true, ety); } - else if (!jl_isbits(ety)) { + else if (!deserves_stack(ety)) { assert(jl_is_datatype(ety)); uint64_t size = jl_datatype_size(ety); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); @@ -716,7 +716,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb); + auto load = typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, false, align_nb); setName(ctx.emission_context, load.V, "pointerref"); return load; } @@ -775,7 +775,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data); ai.decorateInst(store); } - else if (!jl_isbits(ety)) { + else if (x.ispointer()) { thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); uint64_t size = jl_datatype_size(ety); im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, @@ -852,7 +852,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) return jl_cgval_t(); } - if (!jl_isbits(ety)) { + if (!deserves_stack(ety)) { assert(jl_is_datatype(ety)); Value *strct = emit_allocobj(ctx, (jl_datatype_t*)ety); setName(ctx.emission_context, strct, "atomic_pointerref_box"); @@ -876,7 +876,7 @@ static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) assert(!isboxed); if (!type_is_ghost(ptrty)) { Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ); - auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb); + auto load = typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, false, nb); setName(ctx.emission_context, load.V, "atomic_pointerref"); return load; } @@ -954,6 +954,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl } if (!jl_isbits(ety)) { + //if (!deserves_stack(ety)) //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ); //uint64_t size = jl_datatype_size(ety); return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations From 330c79da4f2c7d4b77fdc661b857950887a4f035 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 22 Jun 2023 10:39:15 -0500 Subject: [PATCH 214/290] Add note to `load_path` docstring stating that it is not safe to mutate its return value (#50233) --- base/initdefs.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/initdefs.jl b/base/initdefs.jl index d1d37839a7c13..ed0aa3856f339 100644 --- a/base/initdefs.jl +++ b/base/initdefs.jl @@ -336,6 +336,10 @@ end Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for projects and packages. + +!!! note + `load_path` may return a reference to a cached value so it is not safe to modify the + returned vector. """ function load_path() cache = LOADING_CACHE[] From ec3319430bd88a32b588b09491aa9fd94bf75733 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 22 Jun 2023 12:04:24 -0400 Subject: [PATCH 215/290] Remove type_lift_pass!/OptimizerLattice (#50257) This pass (which has become somehwat misnamed) was inserting error checks, both for undefined slots and undefrefs from sroa'd getfields (at least in theory - this broke at some point - see #50250). It accomplished this partly by using the OptimizerLattice, which adjoins the `MaybeUndef` lattice element to the ordinary incidence lattice. This lattice element indicates that the SSAValue may potentially have come from an undef slot and gives it special semantics inside `:isdefined` and `:undefcheck`. However, in our more recent formalization of lattices, this element is ill-defined. It is not valid to widen it, because doing so would change semantics and cause crashes. It would be possible to have a correct version of this element, but it would require inverting the meaning (i.e. having all types be maybe-undef and using a NotUndef lattice element). However, such a change would be expensive and not worth it. This has been causing me some headaches downstream when trying to use custom lattices and custom pass pipelines, so I had some extra motivation to do something about it. This PR just does away with all this complexity. SSA conversion and SROA now directly insert the requisite `:throw_undef_if_not` checks. This does increase the size of the IR somewhat earlier in the pipeline, but on the other hand it saves a full scan over the IR later in the pipeline, so it's probably a was overall. While we're here, we fix #50250 by properly inserting the requistie phi nest inside SROA. --- base/compiler/abstractlattice.jl | 13 -- base/compiler/optimize.jl | 1 - .../ssair/EscapeAnalysis/EscapeAnalysis.jl | 5 +- base/compiler/ssair/inlining.jl | 2 +- base/compiler/ssair/ir.jl | 2 +- base/compiler/ssair/passes.jl | 188 +++--------------- base/compiler/ssair/slot2ssa.jl | 146 ++++++++------ base/compiler/ssair/verify.jl | 2 +- base/compiler/tfuncs.jl | 7 - base/compiler/typelattice.jl | 35 +--- base/compiler/typelimits.jl | 15 -- base/compiler/types.jl | 6 +- doc/src/devdocs/ast.md | 4 - test/compiler/AbstractInterpreter.jl | 8 +- test/compiler/EscapeAnalysis/EAUtils.jl | 3 +- test/compiler/EscapeAnalysis/local.jl | 3 +- test/compiler/inference.jl | 28 +-- test/core.jl | 23 +++ 18 files changed, 157 insertions(+), 334 deletions(-) diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl index 719b5fcf325e4..3c6c874a9a09c 100644 --- a/base/compiler/abstractlattice.jl +++ b/base/compiler/abstractlattice.jl @@ -95,19 +95,6 @@ end widenlattice(𝕃::InferenceLattice) = 𝕃.parent is_valid_lattice_norec(::InferenceLattice, @nospecialize(elem)) = isa(elem, LimitedAccuracy) -""" - struct OptimizerLattice{𝕃<:AbstractLattice} <: AbstractLattice - -The lattice used by the optimizer. -Extends a base lattice `𝕃` and adjoins `MaybeUndef`. -""" -struct OptimizerLattice{𝕃<:AbstractLattice} <: AbstractLattice - parent::𝕃 -end -OptimizerLattice() = OptimizerLattice(SimpleInferenceLattice.instance) -widenlattice(𝕃::OptimizerLattice) = 𝕃.parent -is_valid_lattice_norec(::OptimizerLattice, @nospecialize(elem)) = isa(elem, MaybeUndef) - """ tmeet(𝕃::AbstractLattice, a, b::Type) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 8810857ce81a7..3a8de06811cc2 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -512,7 +512,6 @@ function run_passes( @pass "compact 2" ir = compact!(ir) @pass "SROA" ir = sroa_pass!(ir, sv.inlining) @pass "ADCE" ir = adce_pass!(ir, sv.inlining) - @pass "type lift" ir = type_lift_pass!(ir) @pass "compact 3" ir = compact!(ir) if JLOptions().debug_level == 2 @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable)) diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl index 8bc173add6eaa..2469507fd3699 100644 --- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl +++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl @@ -27,7 +27,7 @@ import ._TOP_MOD: # Base definitions pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall, ismutabletype import Core.Compiler: # Core.Compiler specific definitions - Bottom, OptimizerLattice, InferenceResult, IRCode, IR_FLAG_NOTHROW, + Bottom, InferenceResult, IRCode, IR_FLAG_NOTHROW, SimpleInferenceLattice, isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type, fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑, intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck, @@ -42,7 +42,7 @@ end const AInfo = IdSet{Any} const LivenessSet = BitSet -const 𝕃ₒ = OptimizerLattice() +const 𝕃ₒ = SimpleInferenceLattice.instance """ x::EscapeInfo @@ -707,7 +707,6 @@ function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape continue elseif head === :static_parameter || # this exists statically, not interested in its escape head === :copyast || # XXX can this account for some escapes? - head === :undefcheck || # XXX can this account for some escapes? head === :isdefined || # just returns `Bool`, nothing accounts for any escapes head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere head === :gc_preserve_end # `GC.@preserve` expressions themselves won't be used anywhere diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 17df27bd5f637..c3ba033efa356 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -488,7 +488,7 @@ function fix_va_argexprs!(insert_node!::Inserter, inline_target::Union{IRCode, I push!(tuple_call.args, arg) push!(tuple_typs, argextype(arg, inline_target)) end - tuple_typ = tuple_tfunc(OptimizerLattice(), tuple_typs) + tuple_typ = tuple_tfunc(SimpleInferenceLattice.instance, tuple_typs) tuple_inst = NewInstruction(tuple_call, tuple_typ, line_idx) push!(newargexprs, insert_node!(tuple_inst)) return newargexprs diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 00ac44d22c0a2..2d8c2a076b2eb 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -458,7 +458,7 @@ function is_relevant_expr(e::Expr) :new, :splatnew, :(=), :(&), :gc_preserve_begin, :gc_preserve_end, :foreigncall, :isdefined, :copyast, - :undefcheck, :throw_undef_if_not, + :throw_undef_if_not, :cfunction, :method, :pop_exception, :new_opaque_closure) end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 780273ce7cd2e..882944810313c 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -360,6 +360,7 @@ struct LiftedValue LiftedValue(@nospecialize val) = new(val) end const LiftedLeaves = IdDict{Any, Union{Nothing,LiftedValue}} +const LiftedDefs = IdDict{Any, Bool} # try to compute lifted values that can replace `getfield(x, field)` call # where `x` is an immutable struct that are defined at any of `leaves` @@ -505,8 +506,6 @@ function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) return Pair{Any, Any}(def, leaf) end -make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ) - """ lift_comparison!(cmp, compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice) @@ -620,7 +619,7 @@ end struct SkipToken end; const SKIP_TOKEN = SkipToken() function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value), - lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::LiftedLeaves, reverse_mapping::IdDict{AnySSAValue, Int}) + lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int}) val = old_value if is_old(compact, old_node_ssa) && isa(val, SSAValue) val = OldSSAValue(val.id) @@ -630,6 +629,9 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=: end if val in keys(lifted_leaves) lifted_val = lifted_leaves[val] + if isa(lifted_leaves, LiftedDefs) + return lifted_val + end lifted_val === nothing && return UNDEF_TOKEN val = lifted_val.val if isa(val, AnySSAValue) @@ -653,7 +655,7 @@ end function perform_lifting!(compact::IncrementalCompact, visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key), lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, - @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val), + @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val), lazydomtree::Union{LazyDomtree,Nothing}) reverse_mapping = IdDict{AnySSAValue, Int}() for id in 1:length(visited_philikes) @@ -912,10 +914,11 @@ In a case when all usages are fully eliminated, `struct` allocation may also be a result of succeeding dead code elimination. """ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) - 𝕃ₒ = inlining === nothing ? OptimizerLattice() : optimizer_lattice(inlining.interp) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() + def_lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() # initialization of domtree is delayed to avoid the expensive computation in many cases lazydomtree = LazyDomtree(ir) for ((_, idx), stmt) in compact @@ -1082,23 +1085,30 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) lifted_result === nothing && continue lifted_leaves, any_undef = lifted_result - if any_undef - result_t = make_MaybeUndef(result_t) - end - - val = perform_lifting!(compact, + lifted_val = perform_lifting!(compact, visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) # Insert the undef check if necessary - if any_undef && val === nothing + if any_undef + if lifted_val === nothing + def_val = false + else + lifted_leaves_def = LiftedDefs() + for (k, v) in pairs(lifted_leaves) + lifted_leaves_def[k] = v === nothing ? false : true + end + def_val = perform_lifting!(compact, + visited_philikes, field, def_lifting_cache, result_t, lifted_leaves_def, val, lazydomtree).val + end insert_node!(compact, SSAValue(idx), non_effect_free(NewInstruction( - Expr(:throw_undef_if_not, Symbol("##getfield##"), false), Nothing))) + Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing))) + else # val must be defined - @assert val !== nothing + @assert lifted_val !== nothing end - compact[idx] = val === nothing ? nothing : val.val + compact[idx] = lifted_val === nothing ? nothing : lifted_val.val compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED end @@ -1572,7 +1582,6 @@ function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::I end function is_some_union(@nospecialize(t)) - isa(t, MaybeUndef) && (t = t.typ) return isa(t, Union) end @@ -1626,7 +1635,7 @@ the `typeassert` elimination depends on the transformation by `canonicalize_type within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`. """ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) - 𝕃ₒ = inlining === nothing ? OptimizerLattice() : optimizer_lattice(inlining.interp) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes)) all_phis = Int[] unionphis = Pair{Int,Any}[] # sorted @@ -1751,153 +1760,6 @@ function adce_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) return complete(compact) end -function type_lift_pass!(ir::IRCode) - lifted_undef = IdDict{Int, Any}() - insts = ir.stmts - for idx in 1:length(insts) - stmt = insts[idx][:inst] - stmt isa Expr || continue - if (stmt.head === :isdefined || stmt.head === :undefcheck) - # after optimization, undef can only show up by being introduced in - # a phi node (or an UpsilonNode() argument to a PhiC node), so lift - # all these nodes that have maybe undef values - val = stmt.args[(stmt.head === :isdefined) ? 1 : 2] - if stmt.head === :isdefined && (val isa GlobalRef || isexpr(val, :static_parameter) || - val isa Argument || val isa Symbol) - # this is a legal node, so assume it was not introduced by - # slot2ssa (at worst, we might leave in a runtime check that - # shouldn't have been there) - continue - end - # otherwise, we definitely have a corrupt node from slot2ssa, and - # must fix or delete that now - processed = IdDict{Int, Union{SSAValue, Bool}}() - def = val - while true - # peek through PiNodes - isa(val, SSAValue) || break - def = insts[val.id][:inst] - isa(def, PiNode) || break - val = def.val - end - if !isa(val, SSAValue) || (!isa(def, PhiNode) && !isa(def, PhiCNode)) - # in most cases, reaching this statement implies we had a value - if stmt.head === :undefcheck - insts[idx][:inst] = nothing - else - insts[idx][:inst] = true - end - continue - end - stmt_id = val.id - worklist = Tuple{Int, Int, SSAValue, Int}[(stmt_id, 0, SSAValue(0), 0)] - if !haskey(lifted_undef, stmt_id) - first = true - while !isempty(worklist) - item, w_up_id, which, use = pop!(worklist) - def = insts[item][:inst] - if isa(def, PhiNode) - edges = copy(def.edges) - values = Vector{Any}(undef, length(edges)) - new_phi = if length(values) == 0 - false - else - insert_node!(ir, item, NewInstruction(PhiNode(edges, values), Bool)) - end - else - def = def::PhiCNode - values = Vector{Any}(undef, length(def.values)) - new_phi = if length(values) == 0 - false - else - insert_node!(ir, item, NewInstruction(PhiCNode(values), Bool)) - end - end - processed[item] = new_phi - if first - lifted_undef[stmt_id] = new_phi - first = false - end - local id::Int = 0 - all_same = true - local last_val - for i = 1:length(values) - if !isassigned(def.values, i) - val = false - elseif !isa(def.values[i], SSAValue) - val = true - else - up_id = id = (def.values[i]::SSAValue).id - @label restart - if !isa(ir.stmts[id][:type], MaybeUndef) - val = true - else - node = insts[id][:inst] - if isa(node, UpsilonNode) - if !isdefined(node, :val) - val = false - elseif !isa(node.val, SSAValue) - val = true - else - id = (node.val::SSAValue).id - @goto restart - end - else - while isa(node, PiNode) - id = (node.val::SSAValue).id - node = insts[id][:inst] - end - if isa(node, Union{PhiNode, PhiCNode}) - if haskey(processed, id) - val = processed[id] - else - # TODO: Re-check after convergence whether all the values are the same - all_same = false - push!(worklist, (id, up_id, new_phi::SSAValue, i)) - continue - end - else - val = true - end - end - end - end - if isa(def, PhiNode) - if !@isdefined(last_val) - last_val = val - elseif all_same - all_same &= last_val === val - end - values[i] = val - else - values[i] = insert_node!(ir, up_id, NewInstruction(UpsilonNode(val), Bool)) - end - end - if all_same && @isdefined(last_val) - # Decay the PhiNode back to the single value - ir[new_phi][:inst] = last_val - isa(last_val, Bool) && (processed[item] = last_val) - end - if which !== SSAValue(0) - phi = ir[which][:inst] - if isa(phi, PhiNode) - phi.values[use] = new_phi - elseif isa(phi, PhiCNode) - phi.values[use] = insert_node!(ir, w_up_id, NewInstruction(UpsilonNode(new_phi), Bool)) - end - end - end - end - inst = lifted_undef[stmt_id] - if stmt.head === :undefcheck - inst = Expr(:throw_undef_if_not, stmt.args[1], inst) - end - insts[idx][:inst] = inst - end - end - ir -end - function is_bb_empty(ir::IRCode, bb::BasicBlock) isempty(bb.stmts) && return true if length(bb.stmts) == 1 diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 757fa1b98bedc..73bdb51702ded 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -89,18 +89,16 @@ function new_to_regular(@nospecialize(stmt), new_offset::Int) return urs[] end -function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa)) +function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, stmt::UnoptSlot, @nospecialize(ssa), @nospecialize(def_ssa)) # We don't really have the information here to get rid of these. # We'll do so later if ssa === UNDEF_TOKEN insert_node!(ir, idx, NewInstruction( Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any)) return UNDEF_TOKEN - end - if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0) - # insert a temporary node. type_lift_pass! will remove it + elseif def_ssa !== true insert_node!(ir, idx, NewInstruction( - Expr(:undefcheck, ci.slotnames[slot], ssa), Any)) + Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any)) end if isa(stmt, SlotNumber) return ssa @@ -112,7 +110,7 @@ end function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt)) if isa(stmt, UnoptSlot) && slot_filter(stmt) - return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)) + return fixup_slot!(ir, ci, idx, slot_id(stmt), stmt, rename_slot(stmt)...) end if isexpr(stmt, :(=)) stmt.args[2] = fixemup!(slot_filter, rename_slot, ir, ci, idx, stmt.args[2]) @@ -126,7 +124,7 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode slot_filter(val) || continue bb_idx = block_for_inst(ir.cfg, Int(stmt.edges[i])) from_bb_terminator = last(ir.cfg.blocks[bb_idx].stmts) - stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)) + stmt.values[i] = fixup_slot!(ir, ci, from_bb_terminator, slot_id(val), val, rename_slot(val)...) end return stmt end @@ -137,15 +135,14 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode if (ci.slotflags[slot] & SLOT_USEDUNDEF) == 0 return true else - ssa = rename_slot(val) + ssa, undef_ssa = rename_slot(val) if ssa === UNDEF_TOKEN return false elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue) return true end + return undef_ssa end - # temporarily corrupt the isdefined node. type_lift_pass! will fix it - stmt.args[1] = ssa end return stmt end @@ -153,7 +150,7 @@ function fixemup!(@specialize(slot_filter), @specialize(rename_slot), ir::IRCode for op in urs val = op[] if isa(val, UnoptSlot) && slot_filter(val) - x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)) + x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename_slot(val)...) # We inserted an undef error node. Delete subsequent statement # to avoid confusing the optimizer if x === UNDEF_TOKEN @@ -178,11 +175,11 @@ end function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa)) for use in uses - code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->ssa, ir, ci, use, code[use]) + code[use] = fixemup!(x::UnoptSlot->slot_id(x)==slot, stmt::UnoptSlot->(ssa, true), ir, ci, use, code[use]) end end -function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Any}) +function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}}) return fixemup!(stmt::UnoptSlot->true, stmt::UnoptSlot->renames[slot_id(stmt)], ir, ci, idx, stmt) end @@ -577,22 +574,13 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode new_typ = Union{} for i = 1:length(node.values) if isa(node, PhiNode) && !isassigned(node.values, i) - if !isa(new_typ, MaybeUndef) - new_typ = MaybeUndef(new_typ) - end continue end typ = typ_for_val(node.values[i], ci, sptypes, -1, slottypes) - was_maybe_undef = false - if isa(typ, MaybeUndef) - typ = typ.typ - was_maybe_undef = true - end - @assert !isa(typ, MaybeUndef) while isa(typ, DelayedTyp) typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)] end - new_typ = tmerge(𝕃ₒ, new_typ, was_maybe_undef ? MaybeUndef(typ) : typ) + new_typ = tmerge(𝕃ₒ, new_typ, typ) end return new_typ end @@ -601,14 +589,18 @@ struct TryCatchRegion enter_block::Int leave_block::Int end -struct NewPhiNode +struct NewSlotPhi{Phi} ssaval::NewSSAValue - node::PhiNode + node::Phi + undef_ssaval::Union{NewSSAValue, Nothing} + undef_node::Union{Phi, Nothing} end -struct NewPhiCNode + +const NewPhiNode2 = NewSlotPhi{PhiNode} + +struct NewPhiCNode2 slot::SlotNumber - ssaval::NewSSAValue - node::PhiCNode + insert::NewSlotPhi{PhiCNode} end function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, @@ -638,10 +630,10 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end phi_slots = Vector{Int}[Int[] for _ = 1:length(ir.cfg.blocks)] - new_phi_nodes = Vector{NewPhiNode}[NewPhiNode[] for _ = 1:length(cfg.blocks)] - new_phic_nodes = IdDict{Int, Vector{NewPhiCNode}}() + new_phi_nodes = Vector{NewPhiNode2}[NewPhiNode2[] for _ = 1:length(cfg.blocks)] + new_phic_nodes = IdDict{Int, Vector{NewPhiCNode2}}() for (; leave_block) in catch_entry_blocks - new_phic_nodes[leave_block] = NewPhiCNode[] + new_phic_nodes[leave_block] = NewPhiCNode2[] end @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses) # No uses => no need for phi nodes @@ -653,7 +645,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, ssaval = Argument(idx) fixup_uses!(ir, ci, code, slot.uses, idx, ssaval) elseif isa(code[slot.defs[]], NewvarNode) - typ = MaybeUndef(Union{}) + typ = Union{} ssaval = nothing for use in slot.uses[] insert_node!(ir, use, @@ -676,10 +668,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, # Create a PhiC node in the catch entry block and # an upsilon node in the corresponding enter block node = PhiCNode(Any[]) + insertpoint = first_insert_for_bb(code, cfg, li) phic_ssa = NewSSAValue( - insert_node!(ir, first_insert_for_bb(code, cfg, li), + insert_node!(ir, insertpoint, NewInstruction(node, Union{})).id - length(ir.stmts)) - push!(new_phic_nodes[li], NewPhiCNode(SlotNumber(idx), phic_ssa, node)) + undef_node = undef_ssaval = nothing + if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0 + undef_node = PhiCNode(Any[]) + undef_ssaval = NewSSAValue(insert_node!(ir, + insertpoint, NewInstruction(undef_node, Bool)).id - length(ir.stmts)) + end + push!(new_phic_nodes[li], NewPhiCNode2(SlotNumber(idx), + NewSlotPhi{PhiCNode}(phic_ssa, node, undef_ssaval, undef_node))) # Inform IDF that we now have a def in the catch block if !(li in live.def_bbs) push!(live.def_bbs, li) @@ -692,20 +692,26 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, node = PhiNode() ssaval = NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts)) - push!(new_phi_nodes[block], NewPhiNode(ssaval, node)) + undef_node = undef_ssaval = nothing + if (ci.slotflags[idx] & SLOT_USEDUNDEF) != 0 + undef_node = PhiNode() + undef_ssaval = NewSSAValue(insert_node!(ir, + first_insert_for_bb(code, cfg, block), NewInstruction(undef_node, Bool)).id - length(ir.stmts)) + end + push!(new_phi_nodes[block], NewPhiNode2(ssaval, node, undef_ssaval, undef_node)) end end # Perform SSA renaming - initial_incoming_vals = Any[ + initial_incoming_vals = Pair{Any, Any}[ if 0 in defuses[x].defs - Argument(x) + Pair{Any, Any}(Argument(x), true) elseif !defuses[x].any_newvar - UNDEF_TOKEN + Pair{Any, Any}(UNDEF_TOKEN, false) else - SSAValue(-2) + Pair{Any, Any}(SSAValue(-2), false) end for x in 1:length(ci.slotflags) ] - worklist = Tuple{Int, Int, Vector{Any}}[(1, 0, initial_incoming_vals)] + worklist = Tuple{Int, Int, Vector{Pair{Any, Any}}}[(1, 0, initial_incoming_vals)] visited = BitSet() type_refine_phi = BitSet() new_nodes = ir.new_nodes @@ -729,8 +735,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end # Insert phi nodes if necessary for (idx, slot) in Iterators.enumerate(phi_slots[item]) - (; ssaval, node) = new_phi_nodes[item][idx] - incoming_val = incoming_vals[slot] + (; ssaval, node, undef_ssaval, undef_node) = new_phi_nodes[item][idx] + (incoming_val, incoming_def) = incoming_vals[slot] if incoming_val === SSAValue(-1) # Optimistically omit this path. # Liveness analysis would probably have prevented us from inserting this phi node @@ -742,12 +748,18 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, else push!(node.values, incoming_val) end + outgoing_def = true + if (ci.slotflags[slot] & SLOT_USEDUNDEF) != 0 + push!(undef_node.edges, pred) + push!(undef_node.values, incoming_def) + outgoing_def = undef_ssaval + end # TODO: Remove the next line, it shouldn't be necessary push!(type_refine_phi, ssaval.id) if isa(incoming_val, NewSSAValue) push!(type_refine_phi, ssaval.id) end - typ = incoming_val === UNDEF_TOKEN ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes) + typ = incoming_val === UNDEF_TOKEN ? Union{} : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes) old_entry = new_nodes.stmts[ssaval.id] if isa(typ, DelayedTyp) push!(type_refine_phi, ssaval.id) @@ -755,26 +767,34 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, new_typ = isa(typ, DelayedTyp) ? Union{} : tmerge(𝕃ₒ, old_entry[:type], typ) old_entry[:type] = new_typ old_entry[:inst] = node - incoming_vals[slot] = ssaval + incoming_vals[slot] = Pair{Any, Any}(ssaval, outgoing_def) end (item in visited) && continue # Record phi_C nodes if necessary if haskey(new_phic_nodes, item) - for (; slot, ssaval) in new_phic_nodes[item] - incoming_vals[slot_id(slot)] = ssaval + for (; slot, insert) in new_phic_nodes[item] + (; ssaval, undef_ssaval) = insert + incoming_vals[slot_id(slot)] = Pair{Any, Any}(ssaval, undef_ssaval === nothing ? true : undef_ssaval) end end # Record initial upsilon nodes if necessary eidx = findfirst((; enter_block)::TryCatchRegion->enter_block==item, catch_entry_blocks) if eidx !== nothing - for (; slot, node) in new_phic_nodes[catch_entry_blocks[eidx].leave_block] - ival = incoming_vals[slot_id(slot)] + for (; slot, insert) in new_phic_nodes[catch_entry_blocks[eidx].leave_block] + (; node, undef_node) = insert + (ival, idef) = incoming_vals[slot_id(slot)] ivalundef = ival === UNDEF_TOKEN Υ = NewInstruction(ivalundef ? UpsilonNode() : UpsilonNode(ival), - ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)) + ivalundef ? Union{} : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)) + insertpos = first_insert_for_bb(code, cfg, item) # insert `UpsilonNode` immediately before the `:enter` expression - Υssa = insert_node!(ir, first_insert_for_bb(code, cfg, item), Υ) + Υssa = insert_node!(ir, insertpos, Υ) push!(node.values, NewSSAValue(Υssa.id - length(ir.stmts))) + if undef_node !== nothing + Υundef = NewInstruction(UpsilonNode(idef), Bool) + Υssaundef = insert_node!(ir, insertpos, Υundef) + push!(undef_node.values, NewSSAValue(Υssaundef.id - length(ir.stmts))) + end end end push!(visited, item) @@ -782,7 +802,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, stmt = code[idx] (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue if isa(stmt, NewvarNode) - incoming_vals[slot_id(stmt.slot)] = UNDEF_TOKEN + incoming_vals[slot_id(stmt.slot)] = Pair{Any, Any}(UNDEF_TOKEN, false) code[idx] = nothing else stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals) @@ -801,23 +821,31 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch. # Do something reasonable here, by marking the LHS as undef as well. if val !== UNDEF_TOKEN - incoming_vals[id] = make_ssa!(ci, code, idx, typ) + thisdef = true + thisval = make_ssa!(ci, code, idx, typ) else code[idx] = nothing - incoming_vals[id] = UNDEF_TOKEN + thisval = UNDEF_TOKEN + thisdef = false end + incoming_vals[id] = Pair{Any, Any}(thisval, thisdef) enter_block = item while haskey(exc_handlers, enter_block) (; enter_block, leave_block) = exc_handlers[enter_block] - cidx = findfirst((; slot)::NewPhiCNode->slot_id(slot)==id, new_phic_nodes[leave_block]) + cidx = findfirst((; slot)::NewPhiCNode2->slot_id(slot)==id, new_phic_nodes[leave_block]) if cidx !== nothing - node = UpsilonNode(incoming_vals[id]) + node = thisdef ? UpsilonNode(thisval) : UpsilonNode() if incoming_vals[id] === UNDEF_TOKEN node = UpsilonNode() - typ = MaybeUndef(Union{}) + typ = Union{} end - push!(new_phic_nodes[leave_block][cidx].node.values, + insert = new_phic_nodes[leave_block][cidx].insert + push!(insert.node.values, NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts))) + if insert.undef_node !== nothing + push!(insert.undef_node.values, + NewSSAValue(insert_node!(ir, idx, NewInstruction(UpsilonNode(thisdef), Bool), true).id - length(ir.stmts))) + end end end end @@ -876,7 +904,8 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, end end for (_, nodes) in new_phic_nodes - for (; ssaval, node) in nodes + for (; insert) in nodes + (; node, ssaval) = insert new_typ = Union{} # TODO: This could just be the ones that depend on other phis push!(type_refine_phi, ssaval.id) @@ -885,7 +914,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, phic_values = (node[:inst]::PhiCNode).values for i = 1:length(phic_values) orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes) - @assert !isa(typ, MaybeUndef) while isa(typ, DelayedTyp) typ = types(ir)[new_to_regular(typ.phi::NewSSAValue, nstmts)] end diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 8df42bd499631..316f7a7f6e4b9 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -87,7 +87,7 @@ end function verify_ir(ir::IRCode, print::Bool=true, allow_frontend_forms::Bool=false, - 𝕃ₒ::AbstractLattice = OptimizerLattice()) + 𝕃ₒ::AbstractLattice = SimpleInferenceLattice.instance) # For now require compact IR # @assert isempty(ir.new_nodes) # Verify CFG diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 79e3cfefc7ff1..7ead9dcb7fa6b 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1086,13 +1086,6 @@ end return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield) end -@nospecs function _getfield_tfunc(𝕃::OptimizerLattice, s00, name, setfield::Bool) - # If undef, that's a Union, but that doesn't affect the rt when tmerged - # into the unwrapped result. - isa(s00, MaybeUndef) && (s00 = s00.typ) - return _getfield_tfunc(widenlattice(𝕃), s00, name, setfield) -end - @nospecs function _getfield_tfunc(𝕃::AnyConditionalsLattice, s00, name, setfield::Bool) if isa(s00, AnyConditional) return Bottom # Bool has no fields diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl index 75071d2a8a2e0..324f2b600cc44 100644 --- a/base/compiler/typelattice.jl +++ b/base/compiler/typelattice.jl @@ -156,14 +156,6 @@ struct PartialTypeVar PartialTypeVar(tv::TypeVar, lb_certain::Bool, ub_certain::Bool) = new(tv, lb_certain, ub_certain) end -# Wraps a type and represents that the value may also be undef at this point. -# (only used in optimize, not abstractinterpret) -# N.B. in the lattice, this is epsilon bigger than `typ` (even Any) -struct MaybeUndef - typ - MaybeUndef(@nospecialize(typ)) = new(typ) -end - struct StateUpdate var::SlotNumber vtype::VarState @@ -232,7 +224,7 @@ struct NotFound end const NOT_FOUND = NotFound() -const CompilerTypes = Union{MaybeUndef, Const, Conditional, MustAlias, NotFound, PartialStruct} +const CompilerTypes = Union{Const, Conditional, MustAlias, NotFound, PartialStruct} ==(x::CompilerTypes, y::CompilerTypes) = x === y ==(x::Type, y::CompilerTypes) = false ==(x::CompilerTypes, y::Type) = false @@ -420,16 +412,6 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ return b.causes ⊆ a.causes end -@nospecializeinfer function ⊑(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) - if isa(a, MaybeUndef) - isa(b, MaybeUndef) || return false - a, b = a.typ, b.typ - elseif isa(b, MaybeUndef) - b = b.typ - end - return ⊑(widenlattice(lattice), a, b) -end - @nospecializeinfer function ⊑(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) # Fast paths for common cases b === Any && return true @@ -560,14 +542,6 @@ end return is_lattice_equal(widenlattice(lattice), a, b) end -@nospecializeinfer function is_lattice_equal(lattice::OptimizerLattice, @nospecialize(a), @nospecialize(b)) - if isa(a, MaybeUndef) || isa(b, MaybeUndef) - # TODO: Unwrap these and recurse to is_lattice_equal - return ⊑(lattice, a, b) && ⊑(lattice, b, a) - end - return is_lattice_equal(widenlattice(lattice), a, b) -end - @nospecializeinfer function is_lattice_equal(lattice::AnyConditionalsLattice, @nospecialize(a), @nospecialize(b)) ConditionalT = isa(lattice, ConditionalsLattice) ? Conditional : InterConditional if isa(a, ConditionalT) || isa(b, ConditionalT) @@ -709,12 +683,6 @@ end return tmeet(widenlattice(𝕃), v, t) end -@nospecializeinfer function tmeet(lattice::OptimizerLattice, @nospecialize(v), @nospecialize(t::Type)) - # TODO: This can probably happen and should be handled - @assert !isa(v, MaybeUndef) - tmeet(widenlattice(lattice), v, t) -end - """ widenconst(x) -> t::Type @@ -723,7 +691,6 @@ Widens extended lattice element `x` to native `Type` representation. widenconst(::AnyConditional) = Bool widenconst(a::AnyMustAlias) = widenconst(widenmustalias(a)) widenconst(c::Const) = (v = c.val; isa(v, Type) ? Type{v} : typeof(v)) -widenconst(m::MaybeUndef) = widenconst(m.typ) widenconst(::PartialTypeVar) = TypeVar widenconst(t::PartialStruct) = t.typ widenconst(t::PartialOpaque) = t.typ diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 11b09aeacead5..81a176dff1c9d 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -305,8 +305,6 @@ end # A simplified type_more_complex query over the extended lattice # (assumes typeb ⊑ typea) @nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb)) - typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference - typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference @assert !isa(typea, LimitedAccuracy) && !isa(typeb, LimitedAccuracy) "LimitedAccuracy not supported by simplertype lattice" # n.b. the caller was supposed to handle these typea === typeb && return true if typea isa PartialStruct @@ -379,19 +377,6 @@ end return nothing end -function tmerge(lattice::OptimizerLattice, @nospecialize(typea), @nospecialize(typeb)) - r = tmerge_fast_path(lattice, typea, typeb) - r !== nothing && return r - - # type-lattice for MaybeUndef wrapper - if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef) - return MaybeUndef(tmerge(widenlattice(lattice), - isa(typea, MaybeUndef) ? typea.typ : typea, - isa(typeb, MaybeUndef) ? typeb.typ : typeb)) - end - return tmerge(widenlattice(lattice), typea, typeb) -end - function union_causes(causesa::IdSet{InferenceState}, causesb::IdSet{InferenceState}) if causesa ⊆ causesb return causesb diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 71ec3670ad688..14f1c90dca0e9 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -465,15 +465,15 @@ infer_compilation_signature(::NativeInterpreter) = true typeinf_lattice(::AbstractInterpreter) = InferenceLattice(BaseInferenceLattice.instance) ipo_lattice(::AbstractInterpreter) = InferenceLattice(IPOResultLattice.instance) -optimizer_lattice(::AbstractInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance) +optimizer_lattice(::AbstractInterpreter) = SimpleInferenceLattice.instance typeinf_lattice(interp::NativeInterpreter) = interp.irinterp ? - OptimizerLattice(InferenceLattice(SimpleInferenceLattice.instance)) : + InferenceLattice(SimpleInferenceLattice.instance) : InferenceLattice(BaseInferenceLattice.instance) ipo_lattice(interp::NativeInterpreter) = interp.irinterp ? InferenceLattice(SimpleInferenceLattice.instance) : InferenceLattice(IPOResultLattice.instance) -optimizer_lattice(interp::NativeInterpreter) = OptimizerLattice(SimpleInferenceLattice.instance) +optimizer_lattice(interp::NativeInterpreter) = SimpleInferenceLattice.instance """ switch_to_irinterp(interp::AbstractInterpreter) -> irinterp::AbstractInterpreter diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md index 9fd03ad9a667a..1a11a5918d091 100644 --- a/doc/src/devdocs/ast.md +++ b/doc/src/devdocs/ast.md @@ -438,10 +438,6 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form. Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`. - * `undefcheck` - - Temporary node inserted by the compiler and will be processed in `type_lift_pass!`. - * `enter` Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index 2cac29e76098b..632f4851cd248 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -125,7 +125,7 @@ end using Core: SlotNumber, Argument using Core.Compiler: slot_id, tmerge_fast_path import .CC: - AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice, OptimizerLattice, + AbstractLattice, BaseInferenceLattice, IPOResultLattice, InferenceLattice, widenlattice, is_valid_lattice_norec, typeinf_lattice, ipo_lattice, optimizer_lattice, widenconst, tmeet, tmerge, ⊑, abstract_eval_special_value, widenreturn @@ -146,7 +146,7 @@ const AnyTaintLattice{L} = Union{TaintLattice{L},InterTaintLattice{L}} CC.typeinf_lattice(::TaintInterpreter) = InferenceLattice(TaintLattice(BaseInferenceLattice.instance)) CC.ipo_lattice(::TaintInterpreter) = InferenceLattice(InterTaintLattice(IPOResultLattice.instance)) -CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(OptimizerLattice()) +CC.optimizer_lattice(::TaintInterpreter) = InterTaintLattice(SimpleInferenceLattice.instance) struct Taint typ @@ -246,13 +246,13 @@ end # External lattice without `Conditional` import .CC: - AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice, OptimizerLattice, + AbstractLattice, ConstsLattice, PartialsLattice, InferenceLattice, typeinf_lattice, ipo_lattice, optimizer_lattice @newinterp NonconditionalInterpreter CC.typeinf_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice())) CC.ipo_lattice(::NonconditionalInterpreter) = InferenceLattice(PartialsLattice(ConstsLattice())) -CC.optimizer_lattice(::NonconditionalInterpreter) = OptimizerLattice(PartialsLattice(ConstsLattice())) +CC.optimizer_lattice(::NonconditionalInterpreter) = PartialsLattice(ConstsLattice()) @test Base.return_types((Any,); interp=NonconditionalInterpreter()) do x c = isa(x, Int) || isa(x, Float64) diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl index 6894733e0fa45..bb3273b3e707a 100644 --- a/test/compiler/EscapeAnalysis/EAUtils.jl +++ b/test/compiler/EscapeAnalysis/EAUtils.jl @@ -64,7 +64,7 @@ import Core: import .CC: InferenceResult, OptimizationState, IRCode, copy as cccopy, @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!, - adce_pass!, type_lift_pass!, JLOptions, verify_ir, verify_linetable + adce_pass!, JLOptions, verify_ir, verify_linetable import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable # when working outside of Core.Compiler, @@ -224,7 +224,6 @@ function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::Optimizati end @timeit "SROA" ir = sroa_pass!(ir) @timeit "ADCE" ir = adce_pass!(ir) - @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) if JLOptions().debug_level == 2 @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable)) diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl index dd324c3619dc7..27e4fdeee28c6 100644 --- a/test/compiler/EscapeAnalysis/local.jl +++ b/test/compiler/EscapeAnalysis/local.jl @@ -63,8 +63,7 @@ include(normpath(@__DIR__, "setup.jl")) return @isdefined(s) end i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement - @test !isnothing(i) - @test has_no_escape(result.state[SSAValue(i)]) + @test isnothing(i) || has_no_escape(result.state[SSAValue(i)]) end let # ϕ-node result = code_escapes((Bool,Any,Any)) do cond, a, b diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index da5772744607d..9511abfb6d202 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -644,7 +644,6 @@ for (codetype, all_ssa) in Any[ local i for i = 1:length(code.ssavaluetypes) typ = code.ssavaluetypes[i] - typ isa Core.Compiler.MaybeUndef && (typ = typ.typ) @test isa(typ, Type) || isa(typ, Const) || isa(typ, Conditional) || typ end test_inferred_static(codetype, all_ssa) @@ -699,7 +698,7 @@ f_infer_abstract_fieldtype() = fieldtype(HasAbstractlyTypedField, :x) let fieldtype_tfunc(@nospecialize args...) = Core.Compiler.fieldtype_tfunc(Core.Compiler.fallback_lattice, args...), fieldtype_nothrow(@nospecialize(s0), @nospecialize(name)) = Core.Compiler.fieldtype_nothrow( - Core.Compiler.OptimizerLattice(), s0, name) + Core.Compiler.SimpleInferenceLattice.instance, s0, name) @test fieldtype_tfunc(Union{}, :x) == Union{} @test fieldtype_tfunc(Union{Type{Int32}, Int32}, Const(:x)) == Union{} @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Array}, Const(:x)) == Type{<:Array} @@ -729,19 +728,6 @@ let fieldtype_tfunc(@nospecialize args...) = @test TypeVar <: fieldtype_tfunc(Any, Any) end -import Core.Compiler: MaybeUndef, builtin_nothrow -let 𝕃ₒ = Core.Compiler.OptimizerLattice() - @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String)], Any) - @test !builtin_nothrow(𝕃ₒ, setfield!, Any[Base.RefValue{String}, Core.Const(:x), MaybeUndef(String), Core.Const(:not_atomic)], Any) - @test !builtin_nothrow(𝕃ₒ, isdefined, Any[Any,MaybeUndef(Symbol)], Bool) - @test !builtin_nothrow(𝕃ₒ, fieldtype, Any[MaybeUndef(Any),Symbol], Any) - @test !builtin_nothrow(𝕃ₒ, isa, Any[Type,MaybeUndef(Type)], Any) - @test !builtin_nothrow(𝕃ₒ, <:, Any[MaybeUndef(Any),MaybeUndef(Any)], Any) - @test !builtin_nothrow(𝕃ₒ, Core.ifelse, Any[MaybeUndef(Bool),Any,Any], Any) - @test !builtin_nothrow(𝕃ₒ, typeassert, Any[MaybeUndef(Any),Type{Symbol}], Any) - @test !builtin_nothrow(𝕃ₒ, Core.get_binding_type, Any[Module,MaybeUndef(Symbol)], Any) -end - # issue #11480 @noinline f11480(x,y) = x let A = Ref @@ -1733,7 +1719,7 @@ let setfield!_tfunc(@nospecialize xs...) = @test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{} end let setfield!_nothrow(@nospecialize xs...) = - Core.Compiler.setfield!_nothrow(Core.Compiler.OptimizerLattice(), xs...) + Core.Compiler.setfield!_nothrow(Core.Compiler.SimpleInferenceLattice.instance, xs...) @test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int) @test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int) @test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int) @@ -2165,15 +2151,15 @@ mutable struct AliasableConstField{S,T} end import Core.Compiler: - InferenceLattice, OptimizerLattice, MustAliasesLattice, InterMustAliasesLattice, - BaseInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice + InferenceLattice, MustAliasesLattice, InterMustAliasesLattice, + BaseInferenceLattice, SimpleInferenceLattice, IPOResultLattice, typeinf_lattice, ipo_lattice, optimizer_lattice include("newinterp.jl") @newinterp MustAliasInterpreter let CC = Core.Compiler CC.typeinf_lattice(::MustAliasInterpreter) = InferenceLattice(MustAliasesLattice(BaseInferenceLattice.instance)) CC.ipo_lattice(::MustAliasInterpreter) = InferenceLattice(InterMustAliasesLattice(IPOResultLattice.instance)) - CC.optimizer_lattice(::MustAliasInterpreter) = OptimizerLattice() + CC.optimizer_lattice(::MustAliasInterpreter) = SimpleInferenceLattice.instance end # lattice @@ -3397,7 +3383,7 @@ const DenseIdx = Union{IntRange,Integer} # Non uniformity in expressions with PartialTypeVar @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar) let N = TypeVar(:N) - 𝕃 = Core.Compiler.OptimizerLattice() + 𝕃 = Core.Compiler.SimpleInferenceLattice.instance argtypes = Any[Core.Compiler.Const(NTuple), Core.Compiler.PartialTypeVar(N, true, true), Core.Compiler.Const(Any)] @@ -5009,7 +4995,7 @@ let src = code_typed1((Bool,Base.RefValue{String}, Base.RefValue{Any},Int,)) do end struct Issue49785{S, T<:S} end -let 𝕃 = Core.Compiler.OptimizerLattice() +let 𝕃 = Core.Compiler.SimpleInferenceLattice.instance argtypes = Any[Core.Compiler.Const(Issue49785), Union{Type{String},Type{Int}}, Union{Type{String},Type{Int}}] diff --git a/test/core.jl b/test/core.jl index f71baa843d25f..7eaa6458d3690 100644 --- a/test/core.jl +++ b/test/core.jl @@ -8010,3 +8010,26 @@ end # donotdelete should not taint consistency of the containing function f_donotdete(x) = (Core.Compiler.donotdelete(x); 1) @test Core.Compiler.is_consistent(Base.infer_effects(f_donotdete, (Tuple{Float64},))) + +# Test conditional UndefRefError (#50250) +struct Foo50250 + a::Int + x + Foo50250(a) = new() + Foo50250(a, x) = new(x) +end + +struct Bar50250 + a::Int + x + Bar50250(a) = new(a) + Bar50250(a, x) = new(a, x) +end + +foo50250(b, y) = (b ? Foo50250(y, y) : Foo50250(y)).x +bar50250(b, y) = (b ? Bar50250(y, y) : Bar50250(y)).x + +@test_throws UndefRefError foo50250(true, 1) +@test_throws UndefRefError foo50250(false, 1) +@test bar50250(true, 1) === 1 +@test_throws UndefRefError bar50250(false, 1) From 07f052573a41986d20b340ee3a3b5242e1b9041b Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 22 Jun 2023 17:20:10 +0100 Subject: [PATCH 216/290] Tweak cache pidlocking pt. 2 (#50254) --- base/loading.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 4aefa9a08a210..1262872ffaf72 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -2226,14 +2226,14 @@ function compilecache_dir(pkg::PkgId) return joinpath(DEPOT_PATH[1], entrypath) end -function compilecache_path(pkg::PkgId, prefs_hash::UInt64)::String +function compilecache_path(pkg::PkgId, prefs_hash::UInt64; project::String=something(Base.active_project(), ""))::String entrypath, entryfile = cache_file_entry(pkg) cachepath = joinpath(DEPOT_PATH[1], entrypath) isdir(cachepath) || mkpath(cachepath) if pkg.uuid === nothing abspath(cachepath, entryfile) * ".ji" else - crc = _crc32c(something(Base.active_project(), "")) + crc = _crc32c(project) crc = _crc32c(unsafe_string(JLOptions().image_file), crc) crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc) crc = _crc32c(ccall(:jl_cache_flags, UInt8, ()), crc) @@ -2823,17 +2823,17 @@ global mkpidlock_hook global trymkpidlock_hook global parse_pidfile_hook -# The preferences hash is only known after precompilation so just assume no preferences -# meaning that if all other conditions are equal, the same package cannot be precompiled -# with different preferences at the same time. -compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0)) * ".pidfile" +# The preferences hash is only known after precompilation so just assume no preferences. +# Also ignore the active project, which means that if all other conditions are equal, +# the same package cannot be precompiled from different projects and/or different preferences at the same time. +compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile" # Allows processes to wait if another process is precompiling a given source already. # The lock file is deleted and precompilation will proceed after `stale_age` seconds if # - the locking process no longer exists # - the lock is held by another host, since processes cannot be checked remotely -# or after `stale_age * 25` seconds if it does still exist. -function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=60) +# or after `stale_age * 25` seconds if the process does still exist. +function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=300) if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook) pidfile = compilecache_pidfile_path(pkg) cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age) From a8d76c674cd28713c57b60fbb9d11d7ff3d089f9 Mon Sep 17 00:00:00 2001 From: N5N3 <2642243996@qq.com> Date: Fri, 23 Jun 2023 00:20:54 +0800 Subject: [PATCH 217/290] typeintersect: fix `constraintkind` for non-covariant var (#50209) Co-authored-by: Jameson Nash --- src/subtype.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- test/subtype.jl | 7 +++++++ 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/subtype.c b/src/subtype.c index 078bcf99e574c..5b05bb288ffc4 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -860,7 +860,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want typedef int (*tvar_callback)(void*, int8_t, jl_stenv_t *, int); -static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var, int inv) JL_NOTSAFEPOINT +static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT { return var_occurs_inside(v, var, 0, 1); } @@ -909,7 +909,7 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 // ( Tuple{Int, Int} <: Tuple{T, T} where T) but // !( Tuple{Int, String} <: Tuple{T, T} where T) // Then check concreteness by checking that the lower bound is not an abstract type. - int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var, 0); + int diagonal = vb.occurs_cov > 1 && !var_occurs_invariant(u->body, u->var); if (ans && (vb.concrete || (diagonal && is_leaf_typevar(u->var)))) { if (vb.concrete && !diagonal && !is_leaf_bound(vb.ub)) { // a non-diagonal var can only be a subtype of a diagonal var if its @@ -941,8 +941,8 @@ static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8 jl_value_t *vl = btemp->lb; // TODO: this takes a significant amount of time if (btemp->depth0 != vb.depth0 && - ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_inside(vu, vb.var, 0, 1)) || - (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_inside(vl, vb.var, 0, 1)))) { + ((vu != (jl_value_t*)vb.var && btemp->var->ub != vu && var_occurs_invariant(vu, vb.var)) || + (vl != (jl_value_t*)vb.var && btemp->var->lb != vl && var_occurs_invariant(vl, vb.var)))) { ans = 0; break; } btemp = btemp->prev; @@ -1988,7 +1988,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su jl_value_t *body = find_var_body(y0, (jl_tvar_t*)b); if (body == NULL) body = y0; - if (var_occurs_invariant(body, (jl_tvar_t*)b, 0)) + if (var_occurs_invariant(body, (jl_tvar_t*)b)) return 0; } if (nparams_expanded_x > npy && jl_is_typevar(b) && concrete_min(a1) > 1) { @@ -2973,7 +2973,7 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv res = intersect(u->body, t, e, param); } vb->concrete |= (vb->occurs_cov > 1 && is_leaf_typevar(u->var) && - !var_occurs_invariant(u->body, u->var, 0)); + !var_occurs_invariant(u->body, u->var)); // handle the "diagonal dispatch" rule, which says that a type var occurring more // than once, and only in covariant position, is constrained to concrete types. E.g. @@ -3014,6 +3014,38 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv return res; } +static int always_occurs_cov(jl_value_t *v, jl_tvar_t *var, int param) JL_NOTSAFEPOINT +{ + if (param > 1) { + return 0; + } + else if (v == (jl_value_t*)var) { + return param == 1; + } + else if (jl_is_uniontype(v)) { + return always_occurs_cov(((jl_uniontype_t*)v)->a, var, param) && + always_occurs_cov(((jl_uniontype_t*)v)->b, var, param); + } + else if (jl_is_unionall(v)) { + jl_unionall_t *ua = (jl_unionall_t*)v; + return ua->var != var && ( + always_occurs_cov(ua->var->ub, var, 0) || + always_occurs_cov(ua->body, var, param)); + } + else if (jl_is_vararg(v)) { + jl_vararg_t *vm = (jl_vararg_t*)v; + return vm->T && always_occurs_cov(vm->T, var, param); + } + else if (jl_is_datatype(v)) { + int nparam = jl_is_tuple_type(v) ? 1 : param; + for (size_t i = 0; i < jl_nparams(v); i++) { + if (always_occurs_cov(jl_tparam(v, i), var, nparam)) + return 1; + } + } + return 0; +} + static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param) { jl_value_t *res = NULL; @@ -3022,7 +3054,8 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_ e->invdepth, NULL, e->vars }; JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars); save_env(e, &se, 1); - if (is_leaf_typevar(u->var) && !var_occurs_invariant(u->body, u->var, 0)) + int noinv = !var_occurs_invariant(u->body, u->var); + if (is_leaf_typevar(u->var) && noinv && always_occurs_cov(u->body, u->var, param)) vb.constraintkind = 1; res = intersect_unionall_(t, u, e, R, param, &vb); if (vb.limited) { @@ -3036,7 +3069,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_ vb.constraintkind = vb.concrete ? 1 : 2; else if (u->var->lb != jl_bottom_type) vb.constraintkind = 2; - else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) + else if (vb.occurs_cov && noinv) vb.constraintkind = 1; int reintersection = constraint1 != vb.constraintkind || vb.concrete; if (reintersection) { diff --git a/test/subtype.jl b/test/subtype.jl index c637fccb6552f..de11689e9e7c4 100644 --- a/test/subtype.jl +++ b/test/subtype.jl @@ -2550,3 +2550,10 @@ end #issue #49857 @test !<:(Type{Vector{Union{Base.BitInteger, Base.IEEEFloat, StridedArray, Missing, Nothing, Val{T}}}} where {T}, Type{Array{T}} where {T}) + +#issue 50195 +T50195{S} = Pair{S,Set{S}} +let a = Tuple{Type{X} where X<:Union{Nothing, Val{X1} where {X4, X1<:(Pair{X2, Val{X2}} where X2<:Val{X4})}}}, + b = Tuple{Type{Y} where Y<:(Val{Y1} where {Y4<:Src, Y1<:(Pair{Y2, Val{Y2}} where Y2<:Union{Val{Y4}, Y4})})} where Src + @test typeintersect(a, b) <: Any +end From 7785db77da5ff55cf8254d83a9fdc66b5d5a694f Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:43:50 +0000 Subject: [PATCH 218/290] Check that we don't name LLVM constants (#50263) --- src/codegen.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 137d3d78814af..632d9cfc6c129 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -169,7 +169,10 @@ typedef Instruction TerminatorInst; void setName(jl_codegen_params_t ¶ms, Value *V, const Twine &Name) { - if (params.debug_level) { + // we do the constant check again later, duplicating it here just makes sure the assertion + // fires on debug builds even if debug info is not enabled + assert((isa(V) || isa(V)) && "Should only set names on instructions!"); + if (params.debug_level && !isa(V)) { V->setName(Name); } } @@ -2354,17 +2357,17 @@ std::unique_ptr jl_create_llvm_module(StringRef name, LLVMContext &conte static void jl_name_jlfunc_args(jl_codegen_params_t ¶ms, Function *F) { assert(F->arg_size() == 3); - setName(params, F->getArg(0), "function"); - setName(params, F->getArg(1), "args"); - setName(params, F->getArg(2), "nargs"); + F->getArg(0)->setName("function"); + F->getArg(1)->setName("args"); + F->getArg(2)->setName("nargs"); } static void jl_name_jlfuncparams_args(jl_codegen_params_t ¶ms, Function *F) { assert(F->arg_size() == 4); - setName(params, F->getArg(0), "function"); - setName(params, F->getArg(1), "args"); - setName(params, F->getArg(2), "nargs"); - setName(params, F->getArg(3), "sparams"); + F->getArg(0)->setName("function"); + F->getArg(1)->setName("args"); + F->getArg(2)->setName("nargs"); + F->getArg(3)->setName("sparams"); } static void jl_init_function(Function *F, const Triple &TT) From c0f623d487f27de9f421d1768a0f8849b4964964 Mon Sep 17 00:00:00 2001 From: "Joe(y) Carpinelli" Date: Thu, 22 Jun 2023 17:34:20 -0400 Subject: [PATCH 219/290] Small fix to running-external-programs.md (#50220) The `run` method no longer throws an `ErrorException` on failure. It currently throws a `ProcessFailedException`. --- doc/src/manual/running-external-programs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md index e643ffff3ee61..ed3fe85194d93 100644 --- a/doc/src/manual/running-external-programs.md +++ b/doc/src/manual/running-external-programs.md @@ -41,7 +41,7 @@ hello ``` The `hello` is the output of the `echo` command, sent to [`stdout`](@ref). If the external command fails to run -successfully, the run method throws an [`ErrorException`](@ref). +successfully, the run method throws an [`ProcessFailedException`](@ref). If you want to read the output of the external command, [`read`](@ref) or [`readchomp`](@ref) can be used instead: From 85f19ded3f4dccd5d98c019e8b071f7e3374177c Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Fri, 23 Jun 2023 14:05:02 -0400 Subject: [PATCH 220/290] Document JULIA_CPU_TARGET in environment-variables.md (#50147) --- doc/src/devdocs/pkgimg.md | 4 +- doc/src/manual/command-line-interface.md | 2 +- doc/src/manual/environment-variables.md | 47 ++++++++++++++++++++++++ doc/src/manual/getting-started.md | 2 +- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/doc/src/devdocs/pkgimg.md b/doc/src/devdocs/pkgimg.md index f97fc36750f18..d9fc1a33a4d24 100644 --- a/doc/src/devdocs/pkgimg.md +++ b/doc/src/devdocs/pkgimg.md @@ -1,4 +1,4 @@ -# Package Images +# [Package Images](@id pkgimages) Julia package images provide object (native code) caches for Julia packages. They are similar to Julia's [system image](@ref dev-sysimg) and support many of the same features. @@ -32,7 +32,7 @@ To avoid having to deal with `link.exe` we use `-flavor gnu`, effectively turnin Dynamic libraries on macOS need to link against `-lSystem`. On recent macOS versions, `-lSystem` is only available for linking when Xcode is available. To that effect we link with `-undefined dynamic_lookup`. -## Package images optimized for multiple microarchitectures +## [Package images optimized for multiple microarchitectures](@id pkgimgs-multi-versioning) Similar to [multi-versioning](@ref sysimg-multi-versioning) for system images, package images support multi-versioning. If you are in a heterogenous environment, with a unified cache, you can set the environment variable `JULIA_CPU_TARGET=generic` to multi-version the object caches. diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 8164299f01250..e1651c61a3ec3 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -1,4 +1,4 @@ -# Command-line Interface +# [Command-line Interface](@id cli) ## Using arguments inside scripts diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md index ac5a6fad6cc08..eb26063a5e61e 100644 --- a/doc/src/manual/environment-variables.md +++ b/doc/src/manual/environment-variables.md @@ -376,6 +376,53 @@ should have at the terminal. The formatting `Base.answer_color()` (default: normal, `"\033[0m"`) that output should have at the terminal. +## System and Package Image Building + +### `JULIA_CPU_TARGET` + +Modify the target machine architecture for (pre)compiling +[system](@ref sysimg-multi-versioning) and [package images](@ref pkgimgs-multi-versioning). +`JULIA_CPU_TARGET` only affects machine code image generation being output to a disk cache. +Unlike the `--cpu-target`, or `-C`, [command line option](@ref cli), it does not influence +just-in-time (JIT) code generation within a Julia session where machine code is only +stored in memory. + +Valid values for `JULIA_CPU_TARGET` can be obtained by executing `julia -C help`. + +Setting `JULIA_CPU_TARGET` is important for heterogeneous compute systems where processors of +distinct types or features may be present. This is commonly encountered in high performance +computing (HPC) clusters since the component nodes may be using distinct processors. + +The CPU target string is a list of strings separated by `;` each string starts with a CPU +or architecture name and followed by an optional list of features separated by `,`. +A `generic` or empty CPU name means the basic required feature set of the target ISA +which is at least the architecture the C/C++ runtime is compiled with. Each string +is interpreted by LLVM. + +A few special features are supported: +1. `clone_all` + + This forces the target to have all functions in sysimg cloned. + When used in negative form (i.e. `-clone_all`), this disables full clone that's + enabled by default for certain targets. + +2. `base([0-9]*)` + + This specifies the (0-based) base target index. The base target is the target + that the current target is based on, i.e. the functions that are not being cloned + will use the version in the base target. This option causes the base target to be + fully cloned (as if `clone_all` is specified for it) if it is not the default target (0). + The index can only be smaller than the current index. + +3. `opt_size` + + Optimize for size with minimum performance impact. Clang/GCC's `-Os`. + +4. `min_size` + + Optimize only for size. Clang's `-Oz`. + + ## Debugging and profiling ### `JULIA_DEBUG` diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md index 16dab24afecf9..e972788022de6 100644 --- a/doc/src/manual/getting-started.md +++ b/doc/src/manual/getting-started.md @@ -34,7 +34,7 @@ command: $ julia script.jl ``` -You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref). +You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available options can be found under [Command-line Interface](@ref cli). ## Resources From 7b565e3d8dcd5b881aac82da866b3e9810d0ad34 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:25:09 +0000 Subject: [PATCH 221/290] Run llvmpasses tests with opaque pointers (#50266) --- .../alloc-opt-gcframe-addrspaces.jl | 40 --- .../alloc-opt-gcframe-addrspaces.ll | 42 +++ ...oc-opt-gcframe.jl => alloc-opt-gcframe.ll} | 287 +++++++++-------- test/llvmpasses/alloc-opt-pass.ll | 28 +- test/llvmpasses/alloc-opt-unsized.ll | 27 +- test/llvmpasses/cpu-features.ll | 7 +- test/llvmpasses/final-lower-gc-addrspaces.ll | 15 +- test/llvmpasses/final-lower-gc.ll | 65 ++-- test/llvmpasses/float16.ll | 7 +- test/llvmpasses/gcroots.ll | 304 ++++++++++++------ test/llvmpasses/julia-licm-fail.ll | 22 +- test/llvmpasses/julia-licm-missed.ll | 25 +- test/llvmpasses/julia-licm.ll | 34 +- test/llvmpasses/late-lower-gc-addrspaces.ll | 107 ++++-- test/llvmpasses/late-lower-gc.ll | 129 +++++--- test/llvmpasses/llvmcall.jl | 39 ++- test/llvmpasses/lower-handlers-addrspaces.ll | 7 +- test/llvmpasses/lower-handlers.ll | 7 +- test/llvmpasses/muladd.ll | 7 +- .../multiversioning-annotate-only.ll | 7 +- test/llvmpasses/multiversioning-clone-only.ll | 13 +- test/llvmpasses/pipeline-o0.jl | 9 + test/llvmpasses/pipeline-o2-allocs.jl | 7 + test/llvmpasses/pipeline-o2-broadcast.jl | 7 + test/llvmpasses/pipeline-o2.jl | 13 + test/llvmpasses/pipeline-prints.ll | 61 ++-- .../propagate-addrspace-non-zero.ll | 7 +- test/llvmpasses/propagate-addrspace.ll | 7 +- test/llvmpasses/refinements.ll | 53 ++- test/llvmpasses/remove-addrspaces.ll | 37 ++- test/llvmpasses/returnstwicegc.ll | 10 +- test/llvmpasses/simdloop.ll | 7 +- 32 files changed, 929 insertions(+), 508 deletions(-) delete mode 100644 test/llvmpasses/alloc-opt-gcframe-addrspaces.jl create mode 100644 test/llvmpasses/alloc-opt-gcframe-addrspaces.ll rename test/llvmpasses/{alloc-opt-gcframe.jl => alloc-opt-gcframe.ll} (50%) diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl b/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl deleted file mode 100644 index 093c062deca64..0000000000000 --- a/test/llvmpasses/alloc-opt-gcframe-addrspaces.jl +++ /dev/null @@ -1,40 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S - | FileCheck %s -# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S - | FileCheck %s - -isz = sizeof(UInt) == 8 ? "i64" : "i32" - -println(""" -target triple = "amdgcn-amd-amdhsa" -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" - -@tag = external addrspace(10) global {} - -declare {}*** @julia.ptls_states() -declare {}*** @julia.get_pgcstack() -declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*) -declare {}* @julia.pointer_from_objref({} addrspace(11)*) -""") - -# Test that non-0 addrspace allocas are properly emitted and handled - -# CHECK-LABEL: @non_zero_addrspace -# CHECK: %1 = alloca i32, align 8, addrspace(5) -# CHECK: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)* -# CHECK: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)* -# CHECK: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)* -# CHECK: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2) -# CHECK: ret void -println(""" -define void @non_zero_addrspace() { - %pgcstack = call {}*** @julia.get_pgcstack() - %ptls = call {}*** @julia.ptls_states() - %ptls_i8 = bitcast {}*** %ptls to i8* - %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 4, {} addrspace(10)* @tag) - %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)* - %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2) - ret void -} -""") -# CHECK-LABEL: }{{$}} diff --git a/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll new file mode 100644 index 0000000000000..b33f2cdac7dd4 --- /dev/null +++ b/test/llvmpasses/alloc-opt-gcframe-addrspaces.ll @@ -0,0 +1,42 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE + +target triple = "amdgcn-amd-amdhsa" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" + +@tag = external addrspace(10) global {} + +declare {}*** @julia.ptls_states() +declare {}*** @julia.get_pgcstack() +declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) +declare {}* @julia.pointer_from_objref({} addrspace(11)*) + +; Test that non-0 addrspace allocas are properly emitted and handled + +; CHECK-LABEL: @non_zero_addrspace +; CHECK: %1 = alloca i32, align 8, addrspace(5) + +; TYPED: %2 = bitcast i32 addrspace(5)* %1 to i8 addrspace(5)* +; TYPED: %3 = bitcast i8 addrspace(5)* %2 to {} addrspace(5)* +; TYPED: %var1 = addrspacecast {} addrspace(5)* %3 to {} addrspace(10)* +; TYPED: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* %2) + +; OPAQUE: %var1 = addrspacecast ptr addrspace(5) %1 to ptr addrspace(10) +; OPAQUE: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %1) + +; CHECK: ret void +define void @non_zero_addrspace() { + %pgcstack = call {}*** @julia.get_pgcstack() + %ptls = call {}*** @julia.ptls_states() + %ptls_i8 = bitcast {}*** %ptls to i8* + %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 4, {} addrspace(10)* @tag) + %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)* + %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2) + ret void +} +; CHECK-LABEL: }{{$}} diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.ll similarity index 50% rename from test/llvmpasses/alloc-opt-gcframe.jl rename to test/llvmpasses/alloc-opt-gcframe.ll index e7ddf12d79bc7..a04d6566cec0a 100644 --- a/test/llvmpasses/alloc-opt-gcframe.jl +++ b/test/llvmpasses/alloc-opt-gcframe.ll @@ -1,49 +1,54 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license +; This file is a part of Julia. License is MIT: https://julialang.org/license -# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s -# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED -isz = sizeof(UInt) == 8 ? "i64" : "i32" +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE -println(""" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @tag = external addrspace(10) global {} -""") - -# CHECK-LABEL: @return_obj -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 -# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -# CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -# CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -# CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -# CHECK-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16) -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" + +; CHECK-LABEL: @return_obj +; CHECK-NOT: @julia.gc_alloc_obj + +; TYPED: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16) +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16) +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 + define {} addrspace(10)* @return_obj() { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) ret {} addrspace(10)* %v } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @return_load -# CHECK: alloca i64 -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8* -# CHECK-NOT: @tag -# CHECK-NOT: @llvm.lifetime.end -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @return_load +; CHECK: alloca i64 +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8* +; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr +; CHECK-NOT: @tag +; CHECK-NOT: @llvm.lifetime.end define i64 @return_load(i64 %i) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)* store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 @@ -51,43 +56,46 @@ define i64 @return_load(i64 %i) { %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4 ret i64 %l } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_obj -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: @ijl_gc_pool_alloc -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_obj +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK: @ijl_gc_pool_alloc +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 define void @ccall_obj(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %f = bitcast i8* %fptr to void ({} addrspace(10)*)* call void %f({} addrspace(10)* %v) ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_ptr -# CHECK: alloca i64 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8* -# CHECK: %f = bitcast i8* %fptr to void (i8*)* -# Currently the GC frame lowering pass strips away all operand bundles -# CHECK-NEXT: call void %f(i8* -# CHECK-NEXT: ret void -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_ptr +; CHECK: alloca i64 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.lifetime.start{{.*}}(i64 8, i8* +; TYPED: %f = bitcast i8* %fptr to void (i8*)* + +; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr +; OPAQUE: %f = bitcast ptr %fptr to ptr +; Currently the GC frame lowering pass strips away all operand bundles +; TYPED-NEXT: call void %f(i8* +; OPAQUE-NEXT: call void %f(ptr +; CHECK-NEXT: ret void define void @ccall_ptr(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -95,20 +103,20 @@ define void @ccall_ptr(i8* %fptr) { call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v), "unknown_bundle"(i8* %ptr) ] ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @ccall_unknown_bundle -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK: @ijl_gc_pool_alloc -# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @ccall_unknown_bundle +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK: @ijl_gc_pool_alloc +; TYPED: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4 +; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 define void @ccall_unknown_bundle(i8* %fptr) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -116,26 +124,32 @@ define void @ccall_unknown_bundle(i8* %fptr) { call void %f(i8* %ptr) [ "jl_not_jl_roots"({} addrspace(10)* %v) ] ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @lifetime_branches -# CHECK: alloca i64 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK: L1: -# CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8, -# CHECK: %f = bitcast i8* %fptr to void (i8*)* -# CHECK-NEXT: call void %f(i8* -# CHECK-NEXT: br i1 %b2, label %L2, label %L3 - -# CHECK: L2: -# CHECK-NEXT: %f2 = bitcast i8* %fptr to void ({}*)* -# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, -# CHECK-NEXT: call void %f2({}* null) - -# CHECK: L3: -# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @lifetime_branches +; CHECK: alloca i64 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK: L1: +; CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8, + +; TYPED: %f = bitcast i8* %fptr to void (i8*)* +; TYPED-NEXT: call void %f(i8* + +; OPAQUE: %f = bitcast ptr %fptr to ptr +; OPAQUE-NEXT: call void %f(ptr + +; CHECK-NEXT: br i1 %b2, label %L2, label %L3 + +; CHECK: L2: +; TYPED-NEXT: %f2 = bitcast i8* %fptr to void ({}*)* +; OPAQUE-NEXT: %f2 = bitcast ptr %fptr to ptr +; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, +; TYPED-NEXT: call void %f2({}* null) +; OPAQUE-NEXT: call void %f2(ptr null) + +; CHECK: L3: +; CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8, define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** @@ -143,7 +157,7 @@ define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) { br i1 %b, label %L1, label %L3 L1: - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va) %ptr = bitcast {}* %ptrj to i8* @@ -159,62 +173,60 @@ L2: L3: ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @object_field -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @object_field +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4 define void @object_field({} addrspace(10)* %field) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)* store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8 ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @memcpy_opt -# CHECK: alloca [16 x i8], align 16 -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @memcpy_opt +; CHECK: alloca [16 x i8], align 16 +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; TYPED: call void @llvm.memcpy.p0i8.p0i8.i64 +; OPAQUE: call void @llvm.memcpy.p0.p0.i64 define void @memcpy_opt(i8* %v22) { top: %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag) + %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag) %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)* %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)* call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false) ret void } -""") -# CHECK-LABEL: }{{$}} - -# CHECK-LABEL: @preserve_opt -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK-NOT: @julia.gc_alloc_obj -# CHECK-NOT: @jl_gc_pool_alloc -# CHECK-NOT: @llvm.lifetime.end -# CHECK: @external_function -println(""" +; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @preserve_opt +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK-NOT: @julia.gc_alloc_obj +; CHECK-NOT: @jl_gc_pool_alloc +; CHECK-NOT: @llvm.lifetime.end +; CHECK: @external_function define void @preserve_opt(i8* %v22) { top: %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12 - %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag) + %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 16, {} addrspace(10)* @tag) %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)* %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)* %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19) @@ -223,21 +235,20 @@ top: call void @external_function() ret void } -""") -# CHECK-LABEL: }{{$}} +; CHECK-LABEL: }{{$}} -# CHECK-LABEL: @preserve_branches -# CHECK: call {}*** @julia.get_pgcstack() -# CHECK: L1: -# CHECK-NEXT: @external_function() -# CHECK-NEXT: br i1 %b2, label %L2, label %L3 +; CHECK-LABEL: @preserve_branches +; TYPED: call {}*** @julia.get_pgcstack() +; OPAQUE: call ptr @julia.get_pgcstack() +; CHECK: L1: +; CHECK-NEXT: @external_function() +; CHECK-NEXT: br i1 %b2, label %L2, label %L3 -# CHECK: L2: -# CHECK: @external_function() -# CHECK-NEXT: br label %L3 +; CHECK: L2: +; CHECK: @external_function() +; CHECK-NEXT: br label %L3 -# CHECK: L3: -println(""" +; CHECK: L3: define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { %pgcstack = call {}*** @julia.get_pgcstack() %gcstack = bitcast {}*** %pgcstack to {}** @@ -245,7 +256,7 @@ define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) { br i1 %b, label %L1, label %L3 L1: - %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag) + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v) call void @external_function() br i1 %b2, label %L2, label %L3 @@ -257,15 +268,16 @@ L2: L3: ret void } -""") -# CHECK-LABEL: }{{$}} +; CHECK-LABEL: }{{$}} + +; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, +; TYPED: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, -# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, -# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, -println(""" +; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc(ptr, +; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr, declare void @external_function() declare {}*** @julia.get_pgcstack() -declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, $isz, {} addrspace(10)*) +declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) declare token @llvm.julia.gc_preserve_begin(...) @@ -278,4 +290,3 @@ declare void @llvm.julia.gc_preserve_end(token) !4 = !{!5, !5, i64 0} !5 = !{!"jtbaa_mutab", !6, i64 0} !6 = !{!"jtbaa_value", !2, i64 0} -""") diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index 4ce152669246f..30ee1754926d7 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -1,14 +1,18 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {} ; Test that the gc_preserve intrinsics are deleted directly. ; CHECK-LABEL: @preserve_branches -; CHECK: call {}*** @julia.ptls_states() +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() ; CHECK: L1: ; CHECK-NOT: @llvm.julia.gc_preserve_begin ; CHECK-NEXT: @external_function() @@ -41,9 +45,11 @@ L3: ; CHECK-LABEL: }{{$}} ; CHECK-LABEL: @preserve_branches2 -; CHECK: call {}*** @julia.ptls_states() +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() ; CHECK: L1: -; CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2 +; TYPED-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2 +; OPAQUE-NEXT: @llvm.julia.gc_preserve_begin{{.*}}ptr addrspace(10) %v2 ; CHECK-NEXT: @external_function() ; CHECK-NEXT: br i1 %b2, label %L2, label %L3 @@ -101,12 +107,16 @@ declare token @llvm.julia.gc_preserve_begin(...) declare void @llvm.julia.gc_preserve_end(token) ; CHECK-LABEL: @memref_collision -; CHECK: call {}*** @julia.ptls_states() -; CHECK-NOT: store {} +; TYPED: call {}*** @julia.ptls_states() +; OPAQUE: call ptr @julia.ptls_states() +; TYPED-NOT: store {} +; OPAQUE-NOT: store ptr ; CHECK: store i -; CHECK-NOT: store {} +; TYPED-NOT: store {} +; OPAQUE-NOT: store ptr ; CHECK: L1: -; CHECK: load {} +; TYPED: load {} +; OPAQUE: load ptr ; CHECK: L2: ; CHECK: load i define void @memref_collision(i64 %x) { diff --git a/test/llvmpasses/alloc-opt-unsized.ll b/test/llvmpasses/alloc-opt-unsized.ll index 8a21091ce558c..d3868548a00d7 100644 --- a/test/llvmpasses/alloc-opt-unsized.ll +++ b/test/llvmpasses/alloc-opt-unsized.ll @@ -1,6 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s --check-prefixes=OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext --passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=OPAQUE source_filename = "text" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" @@ -27,11 +31,16 @@ entry: ret void } -; CHECK: %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16 -; CHECK: %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8* -; CHECK: %i18 = bitcast i8* %[[i1]] to {}* -; CHECK: %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)** -; CHECK: %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i -; CHECK: store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8 -; CHECK: %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}* -; CHECK: %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8 +; TYPED: %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16 +; TYPED: %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8* +; TYPED: %i18 = bitcast i8* %[[i1]] to {}* +; TYPED: %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)** +; TYPED: %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i +; TYPED: store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8 +; TYPED: %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}* +; TYPED: %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8 + +; OPAQUE: %[[i0:.+]] = alloca ptr addrspace(10), i64 1000, align 16 +; OPAQUE: %i23 = getelementptr inbounds ptr addrspace(10), ptr %i18, i64 %iv.i +; OPAQUE: store ptr addrspace(10) %arg, ptr %i23, align 8 +; OPAQUE: %l = load ptr addrspace(10), ptr %i23, align 8 diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll index 1a04db5749b39..eea3d1b288204 100644 --- a/test/llvmpasses/cpu-features.ll +++ b/test/llvmpasses/cpu-features.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s declare i1 @julia.cpu.have_fma.f64() declare double @with_fma(double %0, double %1, double %2) diff --git a/test/llvmpasses/final-lower-gc-addrspaces.ll b/test/llvmpasses/final-lower-gc-addrspaces.ll index 61e9e33875078..d3cdea7454972 100644 --- a/test/llvmpasses/final-lower-gc-addrspaces.ll +++ b/test/llvmpasses/final-lower-gc-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" @@ -24,9 +27,11 @@ attributes #0 = { allocsize(1) } define void @gc_frame_addrspace(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_addrspace -; CHECK: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5) -; CHECK: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)** -; CHECK: %1 = bitcast {} addrspace(10)** %gcframe to i8* +; TYPED: %0 = alloca {} addrspace(10)*, i32 4, align 16, addrspace(5) +; OPAQUE: %0 = alloca ptr addrspace(10), i32 4, align 16, addrspace(5) +; TYPED: %gcframe = addrspacecast {} addrspace(10)* addrspace(5)* %0 to {} addrspace(10)** +; OPAQUE: %gcframe = addrspacecast ptr addrspace(5) %0 to ptr +; TYPED: %1 = bitcast {} addrspace(10)** %gcframe to i8* %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) %pgcstack = call {}*** @julia.get_pgcstack() call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index 6f1be3d240ae4..5bbaa2f4d81ea 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {} @@ -22,34 +25,50 @@ attributes #0 = { allocsize(1) } define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack() +; TYPED: [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack() +; OPAQUE: [[GCFRAME_SLOT:%.*]] = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 -; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* -; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 -; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 -; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** -; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 -; CHECK-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0 -; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)*** -; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8 +; TYPED-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0 +; TYPED-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64* +; TYPED-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0 +; TYPED-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 +; TYPED-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}*** +; TYPED-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8 +; TYPED-DAG: store {}** [[PREV_GCFRAME]], {}*** [[PREV_GCFRAME_PTR2]], align 8, !tbaa !0 +; TYPED-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)*** +; TYPED-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8 + +; OPAQUE-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 0 +; OPAQUE-DAG: store i64 8, ptr [[GCFRAME_SIZE_PTR]], align 8, !tbaa !0 +; OPAQUE-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1 +; OPAQUE-DAG: [[PREV_GCFRAME:%.*]] = load ptr, ptr [[GCFRAME_SLOT]], align 8 +; OPAQUE-DAG: store ptr [[PREV_GCFRAME]], ptr [[PREV_GCFRAME_PTR]], align 8, !tbaa !0 +; OPAQUE-NEXT: store ptr %gcframe, ptr [[GCFRAME_SLOT]], align 8 call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) -; CHECK: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3 +; TYPED: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3 +; OPAQUE: %frame_slot_1 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 3 %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8 %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b) -; CHECK: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; OPAQUE: %frame_slot_2 = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8 -; CHECK: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) +; TYPED: call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) +; OPAQUE: call void @boxed_simple(ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed) call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 -; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 -; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)** -; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 +; TYPED-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1 +; TYPED-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 +; TYPED-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)** +; TYPED-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0 + +; OPAQUE-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 1 +; OPAQUE-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load ptr addrspace(10), ptr [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0 +; OPAQUE-NEXT: store ptr addrspace(10) [[PREV_GCFRAME_PTR4]], ptr [[GCFRAME_SLOT]], align 8, !tbaa !0 call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) ; CHECK-NEXT: ret void ret void @@ -61,7 +80,8 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* -; CHECK: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc +; TYPED: %v = call noalias nonnull dereferenceable({{[0-9]+}}) {} addrspace(10)* @ijl_gc_pool_alloc +; OPAQUE: %v = call noalias nonnull dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8) %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 @@ -76,7 +96,8 @@ top: %ptls = call {}*** @julia.ptls_states() %ptls_i8 = bitcast {}*** %ptls to i8* ; CHECK: %0 = add i64 %size, 8 -; CHECK: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null) +; TYPED: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null) +; OPAQUE: %v = call noalias nonnull dereferenceable(8) ptr addrspace(10) @ijl_gc_alloc_typed(ptr %ptls_i8, i64 %0, ptr null) %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size) %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll index 668c6ff3dd261..ab1425ec12fa5 100644 --- a/test/llvmpasses/float16.ll +++ b/test/llvmpasses/float16.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -DemoteFloat16 -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='DemoteFloat16' -S %s | FileCheck %s define half @demotehalf_test(half %a, half %b) #0 { top: diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll index eefd847bf68fa..7d29a9e3b1f9e 100644 --- a/test/llvmpasses/gcroots.ll +++ b/test/llvmpasses/gcroots.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) @@ -16,17 +19,28 @@ top: ; CHECK-LABEL: @simple %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 -; CHECK: call {} addrspace(10)* @jl_box_int64 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 +; TYPED: call {} addrspace(10)* @jl_box_int64 +; OPAQUE: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] -; CHECK: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]] -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; TYPED-NOT: getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] +; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]] +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]] +; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]] +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) @@ -37,7 +51,8 @@ define void @leftover_alloca({} addrspace(10)* %a) { ; If this pass encounters an alloca, it'll just sink it into the gcframe, ; relying on mem2reg to catch simple cases such as this earlier ; CHECK-LABEL: @leftover_alloca -; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe +; TYPED: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe +; OPAQUE: %var = getelementptr inbounds ptr addrspace(10), ptr %gcframe %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %var = alloca {} addrspace(10)* @@ -55,11 +70,16 @@ define void @simple_union() { ; CHECK-LABEL: @simple_union %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret() +; TYPED: %a = call { {} addrspace(10)*, i8 } @union_ret() +; OPAQUE: %a = call { ptr addrspace(10), i8 } @union_ret() %a = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0 -; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-NEXT: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %a, 0 +; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-NEXT: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %a, 0 +; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]] call void @union_arg({{} addrspace(10)*, i8} %a) ret void } @@ -81,7 +101,8 @@ define void @select_simple(i64 %a, i64 %b) { define void @phi_simple(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_simple -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b @@ -94,8 +115,11 @@ blabel: br label %common common: %phi = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] -; CHECK: [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]] +; TYPED: [[GEP:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP]] + +; OPAQUE: [[GEP:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP]] call void @one_arg_boxed({} addrspace(10)* %phi) ret void } @@ -104,7 +128,8 @@ declare void @one_arg_decayed(i64 addrspace(12)*) define void @select_lift(i64 %a, i64 %b) { ; CHECK-LABEL: @select_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) @@ -112,7 +137,8 @@ define void @select_lift(i64 %a, i64 %b) { %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)* %cmp = icmp eq i64 %a, %b -; CHECK: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed +; TYPED: %gclift = select i1 %cmp, {} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed +; OPAQUE: %gclift = select i1 %cmp, ptr addrspace(10) %aboxed, ptr addrspace(10) %bboxed %selectb = select i1 %cmp, i64 addrspace(12)* %adecayed, i64 addrspace(12)* %bdecayed call void @one_arg_decayed(i64 addrspace(12)* %selectb) ret void @@ -121,7 +147,8 @@ define void @select_lift(i64 %a, i64 %b) { define void @phi_lift(i64 %a, i64 %b) { top: ; CHECK-LABEL: @phi_lift -; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] +; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] +; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ] %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %cmp = icmp eq i64 %a, %b @@ -150,7 +177,8 @@ top: br i1 %cmp, label %alabel, label %blabel alabel: %u = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 +; TYPED: %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 +; OPAQUE: %aboxed = extractvalue { ptr addrspace(10), i8 } %u, 0 %aboxed = extractvalue { {} addrspace(10)*, i8 } %u, 0 %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* ; CHECK: br label %common @@ -160,7 +188,8 @@ blabel: %bdecayed = addrspacecast {} addrspace(10)* %bboxed to i64 addrspace(12)* br label %common common: -; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] +; TYPED: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ] +; OPAQUE: %gclift = phi ptr addrspace(10) [ %aboxed, %alabel ], [ %bboxed, %blabel ] %phi = phi i64 addrspace(12)* [ %adecayed, %alabel ], [ %bdecayed, %blabel ] call void @one_arg_decayed(i64 addrspace(12)* %phi) ret void @@ -169,7 +198,8 @@ common: define void @live_if_live_out(i64 %a, i64 %b) { ; CHECK-LABEL: @live_if_live_out top: -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() ; The failure case is failing to realize that `aboxed` is live across the first @@ -187,25 +217,34 @@ succ: ; safepoint define {} addrspace(10)* @ret_use(i64 %a, i64 %b) { ; CHECK-LABEL: @ret_use -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ret {} addrspace(10)* %aboxed } define {{} addrspace(10)*, i8} @ret_use_struct() { ; CHECK-LABEL: @ret_use_struct -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() -; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret() +; TYPED: %aunion = call { {} addrspace(10)*, i8 } @union_ret() +; OPAQUE: %aunion = call { ptr addrspace(10), i8 } @union_ret() %aunion = call { {} addrspace(10)*, i8 } @union_ret() -; CHECK-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0 -; CHECK-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] +; TYPED-DAG: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED-DAG: [[EXTRACT:%.*]] = extractvalue { {} addrspace(10)*, i8 } %aunion, 0 +; TYPED-NEXT: store {} addrspace(10)* [[EXTRACT]], {} addrspace(10)** [[GEP0]] + +; OPAQUE-DAG: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE-DAG: [[EXTRACT:%.*]] = extractvalue { ptr addrspace(10), i8 } %aunion, 0 +; OPAQUE-NEXT: store ptr addrspace(10) [[EXTRACT]], ptr [[GEP0]] + ; CHECK-NEXT: call void @jl_safepoint() call void @jl_safepoint() ret {{} addrspace(10)*, i8} %aunion @@ -234,23 +273,27 @@ top: define void @global_ref() { ; CHECK-LABEL: @global_ref -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1) -; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)** +; TYPED: store {} addrspace(10)* %loaded, {} addrspace(10)** +; OPAQUE: store ptr addrspace(10) %loaded, ptr call void @one_arg_boxed({} addrspace(10)* %loaded) ret void } define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) { ; CHECK-LABEL: @no_redundant_rerooting -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed ; CHECK-NEXT: call void @jl_safepoint() call void @jl_safepoint() br i1 %cond, label %blocka, label %blockb @@ -270,12 +313,14 @@ declare void @llvm.memcpy.p064.p10i8.i64(i64*, i8 addrspace(10)*, i64, i32, i1) define void @memcpy_use(i64 %a, i64 *%aptr) { ; CHECK-LABEL: @memcpy_use -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %acast = bitcast {} addrspace(10)* %aboxed to i8 addrspace(10)* call void @llvm.memcpy.p064.p10i8.i64(i64* %aptr, i8 addrspace(10)* %acast, i64 8, i32 1, i1 false) @@ -287,20 +332,24 @@ declare void @llvm.julia.gc_preserve_end(token) define void @gc_preserve(i64 %a) { ; CHECK-LABEL: @gc_preserve -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %aboxed) %aboxed2 = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed2 +; TYPED: store {} addrspace(10)* %aboxed2 +; OPAQUE: store ptr addrspace(10) %aboxed2 call void @jl_safepoint() call void @llvm.julia.gc_preserve_end(token %tok) %aboxed3 = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: store {} addrspace(10)* %aboxed3 +; TYPED: store {} addrspace(10)* %aboxed3 +; OPAQUE: store ptr addrspace(10) %aboxed3 call void @jl_safepoint() call void @one_arg_boxed({} addrspace(10)* %aboxed2) call void @one_arg_boxed({} addrspace(10)* %aboxed3) @@ -309,23 +358,37 @@ top: define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapture nonnull readonly dereferenceable(16)) { ; CHECK-LABEL: @gc_preserve_vec -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 6 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 6 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8 -; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 -; CHECK-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 -; CHECK-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 -; CHECK-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 -; CHECK-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0 -; CHECK-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1 -; CHECK-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0 -; CHECK-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1 -; CHECK-DAG: store {} addrspace(10)* [[V11]] -; CHECK-DAG: store {} addrspace(10)* [[V12]] -; CHECK-DAG: store {} addrspace(10)* [[V21]] -; CHECK-DAG: store {} addrspace(10)* [[V22]] +; TYPED-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 +; TYPED-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0 +; TYPED-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 +; TYPED-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 1 +; TYPED-DAG: [[V11:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT11]], i32 0 +; TYPED-DAG: [[V12:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT12]], i32 1 +; TYPED-DAG: [[V21:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT21]], i32 0 +; TYPED-DAG: [[V22:%.*]] = extractelement <2 x {} addrspace(10)*> [[EXTRACT22]], i32 1 +; TYPED-DAG: store {} addrspace(10)* [[V11]] +; TYPED-DAG: store {} addrspace(10)* [[V12]] +; TYPED-DAG: store {} addrspace(10)* [[V21]] +; TYPED-DAG: store {} addrspace(10)* [[V22]] + +; OPAQUE-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0 +; OPAQUE-DAG: [[EXTRACT12:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 0 +; OPAQUE-DAG: [[EXTRACT21:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1 +; OPAQUE-DAG: [[EXTRACT22:%.*]] = extractvalue [2 x <2 x ptr addrspace(10)>] %v, 1 +; OPAQUE-DAG: [[V11:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT11]], i32 0 +; OPAQUE-DAG: [[V12:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT12]], i32 1 +; OPAQUE-DAG: [[V21:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT21]], i32 0 +; OPAQUE-DAG: [[V22:%.*]] = extractelement <2 x ptr addrspace(10)> [[EXTRACT22]], i32 1 +; OPAQUE-DAG: store ptr addrspace(10) [[V11]] +; OPAQUE-DAG: store ptr addrspace(10) [[V12]] +; OPAQUE-DAG: store ptr addrspace(10) [[V21]] +; OPAQUE-DAG: store ptr addrspace(10) [[V22]] %tok = call token (...) @llvm.julia.gc_preserve_begin([2 x <2 x {} addrspace(10)*>] %v, i64 addrspace(10)* null, {}*** %ptls) call void @jl_safepoint() ret void @@ -365,7 +428,8 @@ declare {} addrspace(10) *@alloc() define {} addrspace(10)* @vec_loadobj() { ; CHECK-LABEL: @vec_loadobj -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() @@ -379,7 +443,8 @@ define {} addrspace(10)* @vec_loadobj() { define {} addrspace(10)* @vec_gep() { ; CHECK-LABEL: @vec_gep -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %v4 = call {}*** @julia.ptls_states() %obj = call {} addrspace(10) *@alloc() @@ -394,7 +459,8 @@ define {} addrspace(10)* @vec_gep() { declare i1 @check_property({} addrspace(10)* %val) define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) { ; CHECK-LABEL: @loopyness -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -407,15 +473,21 @@ header: a: ; This needs a store ; CHECK-LABEL: a: -; CHECK: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]] +; TYPED: [[GEP1:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP1]] + +; OPAQUE: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]] +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP1]] call void @one_arg_boxed({} addrspace(10)* %phi) br label %latch latch: ; This as well in case we went the other path -; CHECK: [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] -; CHECK: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]] +; TYPED: [[GEP2:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 [[GEPSLOT0]] +; TYPED: store {} addrspace(10)* %phi, {} addrspace(10)** [[GEP2]] + +; OPAQUE: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 [[GEPSLOT0]] +; OPAQUE: store ptr addrspace(10) %phi, ptr [[GEP2]] %obj = call {} addrspace(10)* @alloc() %cond = call i1 @check_property({} addrspace(10)* %phi) br i1 %cond, label %exit, label %header @@ -426,7 +498,8 @@ exit: define {} addrspace(10)* @phi_union(i1 %cond) { ; CHECK-LABEL: @phi_union -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -451,7 +524,8 @@ join: define {} addrspace(10)* @select_union(i1 %cond) { ; CHECK-LABEL: @select_union -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -467,7 +541,8 @@ top: define i8 @simple_arrayptr() { ; CHECK-LABEL: @simple_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -484,7 +559,8 @@ top: define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecstoreload -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -498,7 +574,8 @@ top: define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecphi -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -524,7 +601,8 @@ common: define i8 @phi_arrayptr(i1 %cond) { ; CHECK-LABEL: @phi_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -563,7 +641,8 @@ common: define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -580,13 +659,15 @@ top: define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> call void @jl_safepoint() -; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}} %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -598,13 +679,15 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { ; CHECK-LABEL: @vecvecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*> call void @jl_safepoint() -; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %{{[0-9]+}} +; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %{{[0-9]+}} %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %decayed call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -616,14 +699,16 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) { define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { ; CHECK-LABEL: @vecscalarselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* %avec = getelementptr i64, i64 addrspace(12)* %adecayed, <2 x i32> zeroinitializer call void @jl_safepoint() -; CHECK: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; TYPED: %gclift = select i1 %{{[0-9]+}}, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; OPAQUE: %gclift = select i1 %{{[0-9]+}}, ptr addrspace(10) null, ptr addrspace(10) %aboxed %select = select <2 x i1> %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -635,14 +720,16 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) { define void @scalarvecselect_lift(i1 %cond, i64 %a) { ; CHECK-LABEL: @scalarvecselect_lift -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)* %avec = getelementptr i64, i64 addrspace(12)* %adecayed, <2 x i32> zeroinitializer call void @jl_safepoint() -; CHECK: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; TYPED: %gclift = select i1 %cond, {} addrspace(10)* null, {} addrspace(10)* %aboxed +; OPAQUE: %gclift = select i1 %cond, ptr addrspace(10) null, ptr addrspace(10) %aboxed %select = select i1 %cond, <2 x i64 addrspace(12)*> zeroinitializer, <2 x i64 addrspace(12)*> %avec call void @jl_safepoint() %el1 = extractelement <2 x i64 addrspace(12)*> %select, i32 0 @@ -654,7 +741,8 @@ define void @scalarvecselect_lift(i1 %cond, i64 %a) { define i8 @select_arrayptr(i1 %cond) { ; CHECK-LABEL: @select_arrayptr -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -681,9 +769,14 @@ top: define i8 @vector_arrayptrs() { ; CHECK-LABEL: @vector_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -702,10 +795,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8 (<2 x i8 ad define i8 @masked_arrayptrs() { ; CHECK-LABEL: @masked_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.load.v2p13i8.p11v2p13i8(<2 x i8 addrspace(13)*> addrspace(11)* %arrayptrptr, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.load.v2p13.p11(ptr addrspace(11) %arrayptrptr, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -724,10 +823,16 @@ declare <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8 (<2 x i8 define i8 @gather_arrayptrs() { ; CHECK-LABEL: @gather_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -745,10 +850,16 @@ top: define i8 @gather_arrayptrs_alltrue() { ; CHECK-LABEL: @gather_arrayptrs -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: %arrayptrs = call <2 x i8 addrspace(13)*> @llvm.masked.gather.v2p13i8.v2p11p13i8(<2 x i8 addrspace(13)* addrspace(11)*> %arrayptrptrs, i32 16, <2 x i1> , <2 x i8 addrspace(13)*> zeroinitializer) +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]] + +; OPAQUE: %arrayptrs = call <2 x ptr addrspace(13)> @llvm.masked.gather.v2p13.v2p11(<2 x ptr addrspace(11)> %arrayptrptrs, i32 16, <2 x i1> , <2 x ptr addrspace(13)> zeroinitializer) +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) %obj1, ptr [[GEP0]] ; top: %pgcstack = call {}*** @julia.get_pgcstack() @@ -766,9 +877,14 @@ top: define i8 @lost_select_decayed(i1 %arg1) { ; CHECK-LABEL: @lost_select_decayed -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 -; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 -; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]] +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 + +; TYPED: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2 +; TYPED: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = getelementptr inbounds ptr addrspace(10), ptr %gcframe, i32 2 +; OPAQUE: store ptr addrspace(10) [[SOMETHING:%.*]], ptr [[GEP0]] top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll index 250ad620b05e6..464a96f1413d9 100644 --- a/test/llvmpasses/julia-licm-fail.ll +++ b/test/llvmpasses/julia-licm-fail.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE ; COM: This file contains functions that should not trigger allocations to be hoisted out of loops @@ -22,9 +25,11 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) +; TYPED-NEXT: %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) +; OPAQUE-NEXT: %ignore = call ptr addrspace(10) @escape(ptr addrspace(10) %alloc) %ignore = call {} addrspace(10)* @escape({} addrspace(10)* %alloc) br i1 %ret, label %return, label %loop return: @@ -46,11 +51,14 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; TYPED-NEXT: %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; OPAQUE-NEXT: %cast = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11) %cast = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* -; CHECK-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) +; TYPED-NEXT: %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) +; OPAQUE-NEXT: %ptr = call nonnull ptr @julia.pointer_from_objref(ptr addrspace(11) %cast) %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %cast) br i1 %ret, label %return, label %loop return: diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll index 977b8e2a787f9..941b2d072a1cc 100644 --- a/test/llvmpasses/julia-licm-missed.ll +++ b/test/llvmpasses/julia-licm-missed.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE ; COM: This file contains functions that currently do not trigger allocations to be hoisted out of loops ; COM: i.e. they are missed optimizations @@ -26,13 +29,17 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; TYPED-NEXT: %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* +; OPAQUE-NEXT: %derived = addrspacecast ptr addrspace(10) %alloc to ptr addrspace(11) %derived = addrspacecast {} addrspace(10)* %alloc to {} addrspace(11)* -; CHECK-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* +; TYPED-NEXT: %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* +; OPAQUE-NEXT: %ptr = bitcast ptr addrspace(11) %derived to ptr addrspace(11) %ptr = bitcast {} addrspace(11)* %derived to {} addrspace(10)* addrspace(11)* -; CHECK-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 +; TYPED-NEXT: store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 +; OPAQUE-NEXT: store ptr addrspace(10) %obj, ptr addrspace(11) %ptr, align 8 store {} addrspace(10)* %obj, {} addrspace(10)* addrspace(11)* %ptr, align 8 br i1 %ret, label %return, label %loop return: @@ -56,12 +63,14 @@ preheader: br label %loop ; CHECK: loop: loop: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) br label %other ; CHECK: other: other: -; CHECK-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ] +; TYPED-NEXT: %phi = phi {} addrspace(10)* [ %alloc, %loop ] +; OPAQUE-NEXT: %phi = phi ptr addrspace(10) [ %alloc, %loop ] %phi = phi {} addrspace(10)* [ %alloc, %loop ] br i1 %ret, label %return, label %loop return: diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll index 6fc6f85de7c26..8a39f5e50aff5 100644 --- a/test/llvmpasses/julia-licm.ll +++ b/test/llvmpasses/julia-licm.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {}, align 16 @@ -28,16 +31,23 @@ L3: ; preds = %L3.loopexit, %top L4: ; preds = %top %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12 %current_task1 = bitcast {}*** %current_task112 to {}** - ; CHECK: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) - ; CHECK-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)* - ; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false) + ; TYPED: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) + ; TYPED-NEXT: %4 = bitcast {} addrspace(10)* %3 to i8 addrspace(10)* + ; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} %4, i8 0, i64 8, i1 false) + + ; OPAQUE: %3 = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task1, i64 8, ptr addrspace(10) @tag) + ; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %3, i8 0, i64 8, i1 false) + ; CHECK-NEXT: br label %L22 br label %L22 L22: ; preds = %L4, %L22 %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ] - ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ] - ; CHECK-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* + ; TYPED: %value_phi5 = phi i64 [ 1, %L4 ], [ %6, %L22 ] + ; TYPED-NEXT %5 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* + + ; OPAQUE: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ] + ; OPAQUE-NEXT %4 = bitcast ptr addrspace(10) %3 to ptr addrspace(10) %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) #1 %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)* store i64 %value_phi5, i64 addrspace(10)* %4, align 8, !tbaa !2 @@ -56,9 +66,13 @@ top: br label %preheader ; CHECK: preheader: preheader: -; CHECK-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* -; CHECK-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false) +; TYPED-NEXT: %alloc = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task, i64 8, {} addrspace(10)* @tag) +; TYPED-NEXT: [[casted:%.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* +; TYPED-NEXT: call void @llvm.memset.p10i8.i64(i8 addrspace(10)* align {{[0-9]+}} [[casted]], i8 0, i64 8, i1 false) + +; OPAQUE-NEXT: %alloc = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr nonnull %current_task, i64 8, ptr addrspace(10) @tag) +; OPAQUE-NEXT: call void @llvm.memset.p10.i64(ptr addrspace(10) align {{[0-9]+}} %alloc, i8 0, i64 8, i1 false) + ; CHECK-NEXT: br label %loop br label %loop loop: diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll index 84a6da9f2554d..9849f432fb9a7 100644 --- a/test/llvmpasses/late-lower-gc-addrspaces.ll +++ b/test/llvmpasses/late-lower-gc-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" @@ -19,24 +22,39 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*) define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %pgcstack = call {}*** @julia.get_pgcstack() +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %pgcstack = call {}*** @julia.get_pgcstack() + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) -; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64 +; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) +; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64 + +; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) -; CHECK: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] +; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) +; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -46,17 +64,25 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: ret {} addrspace(10)* %v +; TYPED-NEXT: ret {} addrspace(10)* %v +; OPAQUE-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -71,23 +97,34 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 98c472771aaf9..36e581993c176 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s -check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s -check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {}, align 16 @@ -16,24 +19,39 @@ declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*) define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %pgcstack = call {}*** @julia.get_pgcstack() +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %pgcstack = call {}*** @julia.get_pgcstack() + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) -; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64 +; TYPED-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2) +; TYPED-NEXT: call {} addrspace(10)* @jl_box_int64 + +; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; CHECK: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] +; TYPED: [[GEP0:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %aboxed, {} addrspace(10)** [[GEP0]] + +; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; CHECK-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) -; CHECK: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; CHECK-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] +; TYPED-NOT: call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT0]]) +; TYPED: [[GEP1:%.*]] = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; TYPED-NEXT: store {} addrspace(10)* %bboxed, {} addrspace(10)** [[GEP1]] + +; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] + ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; CHECK-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED-NEXT: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -43,17 +61,25 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: ret {} addrspace(10)* %v +; TYPED-NEXT: ret {} addrspace(10)* %v +; OPAQUE-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -68,23 +94,34 @@ top: %pgcstack = call {}*** @julia.get_pgcstack() %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 -; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 -; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** -; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* -; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) -; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* -; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 -; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; TYPED: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 +; TYPED-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 16 +; TYPED-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0 +; TYPED-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}** +; TYPED-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8* +; TYPED-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8) +; TYPED-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* +; TYPED-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1 +; TYPED-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4 + +; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; OPAQUE-NEXT: %v = call ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8) +; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; TYPED-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* +; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; TYPED-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7 +; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; TYPED-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8 +; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; TYPED-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !11, !range !7 +; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void @@ -161,13 +198,21 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) { } ; CHECK-LABEL: @decayar -; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) -; CHECK: %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) -; CHECK: store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8 -; CHECK: %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) -; CHECK: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8 -; CHECK: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) -; CHECK: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) +; TYPED: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2) +; TYPED: %1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1) +; TYPED: store {} addrspace(10)* %l0, {} addrspace(10)** %1, align 8 +; TYPED: %2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0) +; TYPED: store {} addrspace(10)* %l1, {} addrspace(10)** %2, align 8 +; TYPED: %r = call i32 @callee_root({} addrspace(10)* %l0, {} addrspace(10)* %l1) +; TYPED: call void @julia.pop_gc_frame({} addrspace(10)** %gcframe) + +; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; OPAQUE: %1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1) +; OPAQUE: store ptr addrspace(10) %l0, ptr %1, align 8 +; OPAQUE: %2 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) +; OPAQUE: store ptr addrspace(10) %l1, ptr %2, align 8 +; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1) +; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe) !0 = !{i64 0, i64 23} !1 = !{!1} diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl index d39f64d5f839c..3e0df7a8885a7 100644 --- a/test/llvmpasses/llvmcall.jl +++ b/test/llvmpasses/llvmcall.jl @@ -1,7 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no %s %t +# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,TYPED + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no %s %t -# RUN: cat %t/* | FileCheck %s +# RUN: cat %t/* | FileCheck %s --check-prefixes=CHECK,OPAQUE include(joinpath("..", "testhelpers", "llvmpasses.jl")) @@ -28,7 +35,8 @@ emit(foo, Float16) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { # CHECK: define @@ -46,7 +54,8 @@ emit(foo, NTuple{2, Float16}) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { # CHECK: define @@ -64,16 +73,20 @@ emit(foo, NTuple{2, VecElement{Float16}}) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { # CHECK: define -# CHECK-SAME: i8 addrspace(3)* @julia_foo +# TYPED-SAME: i8 addrspace(3)* @julia_foo +# OPAQUE-SAME: ptr addrspace(3) @julia_foo # CHECK-SAME: { # CHECK-NOT: define -# CHECK: [[FOO_RET:%.*]] call i8 addrspace(3)* @foo(i8 addrspace(3)* [[FOO_ARG:%.*]]) +# TYPED: [[FOO_RET:%.*]] call i8 addrspace(3)* @foo(i8 addrspace(3)* [[FOO_ARG:%.*]]) +# OPAQUE: [[FOO_RET:%.*]] call ptr addrspace(3) @foo(ptr addrspace(3) [[FOO_ARG:%.*]]) # CHECK-NOT: define -# CHECK: ret i8 addrspace(3)* +# TYPED: ret i8 addrspace(3)* +# OPAQUE: ret ptr addrspace(3) # CHECK-NOT: define # CHECK: } emit(foo, Core.LLVMPtr{Float32, 3}) @@ -82,7 +95,8 @@ emit(foo, Core.LLVMPtr{Float32, 3}) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { # CHECK: define @@ -100,12 +114,14 @@ emit(foo, Foo) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { # CHECK: define # CHECK-SAME: <2 x half> @julia_bar -# CHECK-SAME: [2 x half] +# TYPED-SAME: [2 x half] +# OPAQUE-SAME: ptr # CHECK-SAME: { # CHECK-NOT: define # CHECK: ret <2 x half> @@ -117,5 +133,6 @@ emit(bar, NTuple{2, Float16}) # CHECK-NOT: { # CHECK-NOT: } # CHECK: define -# CHECK-SAME: nonnull {} addrspace(10)* @jfptr +# TYPED-SAME: nonnull {} addrspace(10)* @jfptr +# OPAQUE-SAME: nonnull ptr addrspace(10) @jfptr # CHECK-SAME: { diff --git a/test/llvmpasses/lower-handlers-addrspaces.ll b/test/llvmpasses/lower-handlers-addrspaces.ll index fcc4dc0114c21..744bf09082646 100644 --- a/test/llvmpasses/lower-handlers-addrspaces.ll +++ b/test/llvmpasses/lower-handlers-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -print-before-all -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll index c3d51f2e94c30..2f5dea6cf0892 100644 --- a/test/llvmpasses/lower-handlers.ll +++ b/test/llvmpasses/lower-handlers.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s attributes #1 = { returns_twice } declare i32 @julia.except_enter() #1 diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll index f93940db392af..afeb068317844 100644 --- a/test/llvmpasses/muladd.ll +++ b/test/llvmpasses/muladd.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s ; CHECK-LABEL: @fast_muladd1 diff --git a/test/llvmpasses/multiversioning-annotate-only.ll b/test/llvmpasses/multiversioning-annotate-only.ll index ababb4fc74b8a..0109010f4c1a1 100644 --- a/test/llvmpasses/multiversioning-annotate-only.ll +++ b/test/llvmpasses/multiversioning-annotate-only.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s ; COM: This test checks that multiversioning correctly picks up on features that should trigger cloning ; COM: Note that for annotations alone, we don't need jl_fvars or jl_gvars diff --git a/test/llvmpasses/multiversioning-clone-only.ll b/test/llvmpasses/multiversioning-clone-only.ll index 897652700c335..e37eefdc362f7 100644 --- a/test/llvmpasses/multiversioning-clone-only.ll +++ b/test/llvmpasses/multiversioning-clone-only.ll @@ -1,12 +1,17 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -JuliaMultiVersioning -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaMultiVersioning' -S %s | FileCheck %s --allow-unused-prefixes=false --check-prefixes=CHECK,OPAQUE ; CHECK: @jl_fvar_idxs = hidden constant [1 x i32] zeroinitializer ; CHECK: @jl_gvar_idxs = hidden constant [0 x i32] zeroinitializer -; CHECK: @subtarget_cloned_gv = hidden global i64* null -; CHECK: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null +; TYPED: @subtarget_cloned_gv = hidden global i64* null +; OPAQUE: @subtarget_cloned_gv = hidden global ptr null +; TYPED: @subtarget_cloned.reloc_slot = hidden global i32 (i32)* null +; OPAQUE: @subtarget_cloned.reloc_slot = hidden global ptr null ; CHECK: @jl_fvar_offsets = hidden constant [2 x i32] [i32 1, i32 0] ; CHECK: @jl_gvar_base = hidden constant i64 0 ; CHECK: @jl_gvar_offsets = hidden constant [1 x i32] zeroinitializer diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl index 1075d126c59ca..e48a5f7df111f 100644 --- a/test/llvmpasses/pipeline-o0.jl +++ b/test/llvmpasses/pipeline-o0.jl @@ -1,5 +1,14 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O0 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O1 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s diff --git a/test/llvmpasses/pipeline-o2-allocs.jl b/test/llvmpasses/pipeline-o2-allocs.jl index 86e1ded3f11e5..86ab9125f2f27 100644 --- a/test/llvmpasses/pipeline-o2-allocs.jl +++ b/test/llvmpasses/pipeline-o2-allocs.jl @@ -1,5 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s diff --git a/test/llvmpasses/pipeline-o2-broadcast.jl b/test/llvmpasses/pipeline-o2-broadcast.jl index 584e8855f0f8c..83a4450522c79 100644 --- a/test/llvmpasses/pipeline-o2-broadcast.jl +++ b/test/llvmpasses/pipeline-o2-broadcast.jl @@ -1,5 +1,12 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s +# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s # RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s diff --git a/test/llvmpasses/pipeline-o2.jl b/test/llvmpasses/pipeline-o2.jl index fcb2161de7614..9fd42562f96aa 100644 --- a/test/llvmpasses/pipeline-o2.jl +++ b/test/llvmpasses/pipeline-o2.jl @@ -1,5 +1,18 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=0" + +# RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL +# RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL + +# RUN: julia --startup-file=no -O2 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF +# RUN: julia --startup-file=no -O3 --check-bounds=no %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_OFF + +# RUN: julia --startup-file=no -O2 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO +# RUN: julia --startup-file=no -O3 --check-bounds=auto %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL,BC_AUTO + +# RUN: export JULIA_LLVM_ARGS="--opaque-pointers=1" + # RUN: julia --startup-file=no -O2 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL # RUN: julia --startup-file=no -O3 --check-bounds=yes %s %t -O && llvm-link -S %t/* | FileCheck %s --check-prefixes=ALL diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll index 0ea25aa9fb0f0..0c0d81420d9fe 100644 --- a/test/llvmpasses/pipeline-prints.ll +++ b/test/llvmpasses/pipeline-prints.ll @@ -1,25 +1,46 @@ ; COM: This is a newpm-only test, no legacypm command ; COM: we run all the prefixes even though some don't have tests because we want to make sure they don't crash -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlySimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREEARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterEarlyOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEREARLYOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLICM -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLICM +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORELOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopSimplification -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPSIMPLIFICATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterLoopOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERLOOPOPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORESCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterScalarOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERSCALAROPTIMIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterVectorization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERVECTORIZATION +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFOREINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterIntrinsicLowering -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERINTRINSICLOWERING +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=BeforeCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=BEFORECLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterCleanup -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTERCLEANUP +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='julia' --print-before=AfterOptimization -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=AFTEROPTIMIZATION ; ModuleID = 'f' source_filename = "f" diff --git a/test/llvmpasses/propagate-addrspace-non-zero.ll b/test/llvmpasses/propagate-addrspace-non-zero.ll index c1ba2069102ac..ac491000ba1e5 100644 --- a/test/llvmpasses/propagate-addrspace-non-zero.ll +++ b/test/llvmpasses/propagate-addrspace-non-zero.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll index 92bf68578477f..ffed83ddb615a 100644 --- a/test/llvmpasses/propagate-addrspace.ll +++ b/test/llvmpasses/propagate-addrspace.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s define i64 @simple() { ; CHECK-LABEL: @simple diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll index 3600fb76804ab..4637fc4b45071 100644 --- a/test/llvmpasses/refinements.ll +++ b/test/llvmpasses/refinements.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE declare {}*** @julia.ptls_states() @@ -27,13 +30,15 @@ define void @argument_refinement({} addrspace(10)* %a) { ; Check that we reuse the gc slot from the box define void @heap_refinement1(i64 %a) { ; CHECK-LABEL: @heap_refinement1 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %aboxed +; TYPED: store {} addrspace(10)* %aboxed +; OPAQUE: store ptr addrspace(10) %aboxed call void @jl_safepoint() %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)* %loaded2 = load i64, i64 addrspace(10)* %casted2 @@ -44,13 +49,15 @@ define void @heap_refinement1(i64 %a) { ; Check that we don't root the allocated value here, just the derived value define void @heap_refinement2(i64 %a) { ; CHECK-LABEL: @heap_refinement2 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a) %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)* %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %loaded1 +; TYPED: store {} addrspace(10)* %loaded1 +; OPAQUE: store ptr addrspace(10) %loaded1 call void @jl_safepoint() %casted2 = bitcast {} addrspace(10)* %loaded1 to i64 addrspace(10)* %loaded2 = load i64, i64 addrspace(10)* %casted2 @@ -60,24 +67,33 @@ define void @heap_refinement2(i64 %a) { ; Check that the way we compute rooting is compatible with refinements define void @issue22770() { ; CHECK-LABEL: @issue22770 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() %y = call {} addrspace(10)* @allocate_some_value() %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)* %x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1 -; CHECK: store {} addrspace(10)* %y, +; TYPED: store {} addrspace(10)* %y, +; OPAQUE: store ptr addrspace(10) %y, %a = call {} addrspace(10)* @allocate_some_value() -; CHECK: store {} addrspace(10)* %a -; CHECK: call void @one_arg_boxed({} addrspace(10)* %x) -; CHECK: call void @one_arg_boxed({} addrspace(10)* %a) -; CHECK: call void @one_arg_boxed({} addrspace(10)* %y) +; TYPED: store {} addrspace(10)* %a +; TYPED: call void @one_arg_boxed({} addrspace(10)* %x) +; TYPED: call void @one_arg_boxed({} addrspace(10)* %a) +; TYPED: call void @one_arg_boxed({} addrspace(10)* %y) + +; OPAQUE: store ptr addrspace(10) %a +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %x) +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %a) +; OPAQUE: call void @one_arg_boxed(ptr addrspace(10) %y) call void @one_arg_boxed({} addrspace(10)* %x) call void @one_arg_boxed({} addrspace(10)* %a) call void @one_arg_boxed({} addrspace(10)* %y) -; CHECK: store {} addrspace(10)* %x +; TYPED: store {} addrspace(10)* %x +; OPAQUE: store ptr addrspace(10) %x %c = call {} addrspace(10)* @allocate_some_value() -; CHECK: store {} addrspace(10)* %c +; TYPED: store {} addrspace(10)* %c +; OPAQUE: store ptr addrspace(10) %c call void @one_arg_boxed({} addrspace(10)* %x) call void @one_arg_boxed({} addrspace(10)* %c) ret void @@ -107,7 +123,8 @@ L3: define void @dont_refine_loop({} addrspace(10)* %x) { ; CHECK-LABEL: @dont_refine_loop -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -150,7 +167,8 @@ L2: define void @refine_loop_indirect({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() @@ -175,7 +193,8 @@ L2: define void @refine_loop_indirect2({} addrspace(10)* %x) { ; CHECK-LABEL: @refine_loop_indirect2 -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 3 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 3 top: %pgcstack = call {}*** @julia.get_pgcstack() %ptls = call {}*** @julia.ptls_states() diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll index 4710f9bd6c4d6..b2d14ae49c8e7 100644 --- a/test/llvmpasses/remove-addrspaces.ll +++ b/test/llvmpasses/remove-addrspaces.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) { @@ -34,7 +37,8 @@ top: define nonnull {} addrspace(10)* @constexpr(i64) { ; CHECK-LABEL: @constexpr top: -; CHECK: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1) +; TYPED: call {}* inttoptr (i64 139806640486784 to {}* ({}*, i64)*)({}* inttoptr (i64 139806425039920 to {}*), i64 1) +; OPAQUE: call ptr inttoptr (i64 139806640486784 to ptr)(ptr inttoptr (i64 139806425039920 to ptr), i64 1) %1 = call {} addrspace(10)* inttoptr (i64 139806640486784 to {} addrspace(10)* ({} addrspace(10)*, i64)*)({} addrspace(10)* addrspacecast ({}* inttoptr (i64 139806425039920 to {}*) to {} addrspace(10)*), i64 1) ; CHECK-NOT: addrspacecast ; CHECK-NOT: addrspace @@ -63,23 +67,23 @@ top: %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1 ; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix. ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %a +; TYPED-SAME: %list* %a +; OPAQUE-SAME: ptr %a ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %a +; TYPED-SAME: %list* %a +; OPAQUE-SAME: ptr %a ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %b +; TYPED-SAME: %list* %b +; OPAQUE-SAME: ptr %b ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %b +; TYPED-SAME: %list* %b +; OPAQUE-SAME: ptr %b ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %c +; TYPED-SAME: %list* %c +; OPAQUE-SAME: ptr %c ; CHECK: getelementptr %list -; CHECK-SAME: %list -; CHECK-SAME: * %c +; TYPED-SAME: %list* %c +; OPAQUE-SAME: ptr %c store i64 111, i64* %a.car store i64 222, i64* %b.car store i64 333, i64* %c.car @@ -108,7 +112,8 @@ exit: ; COM: check that address spaces in byval types are processed correctly define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) { -; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0) +; TYPED: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0) +; OPAQUE: define void @byval_type(ptr byval([1 x ptr]) %0) ret void } diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll index 404330ac3f7e1..699d89f7257d0 100644 --- a/test/llvmpasses/returnstwicegc.ll +++ b/test/llvmpasses/returnstwicegc.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=TYPED +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=TYPED + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s --check-prefixes=OPAQUE +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s --check-prefixes=OPAQUE declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*) @@ -14,7 +17,8 @@ declare void @one_arg_boxed({} addrspace(10)*) define void @try_catch(i64 %a, i64 %b) { ; Because of the returns_twice function, we need to keep aboxed live everywhere -; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4 +; TYPED: %gcframe = alloca {} addrspace(10)*, i32 4 +; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4 top: %sigframe = alloca [208 x i8], align 16 %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0 diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index 142250212984e..bc4b2da007dc2 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -1,7 +1,10 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s -; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s +; RUN: opt -enable-new-pm=0 --opaque-pointers=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s + +; RUN: opt -enable-new-pm=0 --opaque-pointers=1 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s declare void @julia.loopinfo_marker() From c59468a48f0a9775a936d7177125a9dc9782c311 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 23 Jun 2023 20:52:11 -0400 Subject: [PATCH 222/290] Limit memory use during 32bit build (#50272) Co-authored-by: Gabriel Baraldi --- Make.inc | 6 ++++++ contrib/generate_precompile.jl | 1 + sysimage.mk | 6 +++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Make.inc b/Make.inc index 937f146b0150c..bc8f5b94f259b 100644 --- a/Make.inc +++ b/Make.inc @@ -1500,6 +1500,12 @@ endef # Overridable in Make.user WINE ?= wine +ifeq ($(BINARY),32) +HEAPLIM := --heap-size-hint=500M +else +HEAPLIM := +endif + # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later ifeq ($(BUILD_OS), WINNT) # MSYS spawn = $(1) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 7312726fe2eaa..fea4ca6bc1fe3 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -465,6 +465,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe print("Total ─────── "); Base.time_print(stdout, tot_time); println() finally fancyprint && print(ansi_enablecursor) + GC.gc(true); GC.gc(false); # reduce memory footprint return end diff --git a/sysimage.mk b/sysimage.mk index e5bbfad119131..993ee9a990058 100644 --- a/sysimage.mk +++ b/sysimage.mk @@ -59,14 +59,14 @@ RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS) @$(call PRINT_JULIA, cd $(JULIAHOME)/base && \ - $(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp \ + $(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp \ --startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl) @mv $@.tmp $@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS) @$(call PRINT_JULIA, cd $(JULIAHOME)/base && \ if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \ - $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \ + $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" $(HEAPLIM) --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \ --startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \ echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \ false; \ @@ -82,7 +82,7 @@ $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(buil JULIA_PROJECT= \ JULIA_DEPOT_PATH=':' \ JULIA_NUM_THREADS=1 \ - $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \ + $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" $$(HEAPLIM) --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \ --startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \ echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \ false; \ From f2c6580f654adcd490a81ad4ce8344c178218faf Mon Sep 17 00:00:00 2001 From: Dilum Aluthge Date: Fri, 23 Jun 2023 23:42:35 -0400 Subject: [PATCH 223/290] `SuiteSparse_jll`: only`dlopen` the libraries if `Base.USE_GPL_LIBS` is true (#50267) --- stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl index 6b87d417fc2a8..a347a91721bad 100644 --- a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl +++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl @@ -82,30 +82,32 @@ else end function __init__() - global libamd_handle = dlopen(libamd) - global libamd_path = dlpath(libamd_handle) - global libbtf_handle = dlopen(libbtf) - global libbtf_path = dlpath(libbtf_handle) - global libcamd_handle = dlopen(libcamd) - global libcamd_path = dlpath(libcamd_handle) - global libccolamd_handle = dlopen(libccolamd) - global libccolamd_path = dlpath(libccolamd_handle) - global libcholmod_handle = dlopen(libcholmod) - global libcholmod_path = dlpath(libcholmod_handle) - global libcolamd_handle = dlopen(libcolamd) - global libcolamd_path = dlpath(libcolamd_handle) - global libklu_handle = dlopen(libklu) - global libklu_path = dlpath(libklu_handle) - global libldl_handle = dlopen(libldl) - global libldl_path = dlpath(libldl_handle) - global librbio_handle = dlopen(librbio) - global librbio_path = dlpath(librbio_handle) - global libspqr_handle = dlopen(libspqr) - global libspqr_path = dlpath(libspqr_handle) - global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig) - global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle) - global libumfpack_handle = dlopen(libumfpack) - global libumfpack_path = dlpath(libumfpack_handle) + if Base.USE_GPL_LIBS + global libamd_handle = dlopen(libamd) + global libamd_path = dlpath(libamd_handle) + global libbtf_handle = dlopen(libbtf) + global libbtf_path = dlpath(libbtf_handle) + global libcamd_handle = dlopen(libcamd) + global libcamd_path = dlpath(libcamd_handle) + global libccolamd_handle = dlopen(libccolamd) + global libccolamd_path = dlpath(libccolamd_handle) + global libcholmod_handle = dlopen(libcholmod) + global libcholmod_path = dlpath(libcholmod_handle) + global libcolamd_handle = dlopen(libcolamd) + global libcolamd_path = dlpath(libcolamd_handle) + global libklu_handle = dlopen(libklu) + global libklu_path = dlpath(libklu_handle) + global libldl_handle = dlopen(libldl) + global libldl_path = dlpath(libldl_handle) + global librbio_handle = dlopen(librbio) + global librbio_path = dlpath(librbio_handle) + global libspqr_handle = dlopen(libspqr) + global libspqr_path = dlpath(libspqr_handle) + global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig) + global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle) + global libumfpack_handle = dlopen(libumfpack) + global libumfpack_path = dlpath(libumfpack_handle) + end global artifact_dir = dirname(Sys.BINDIR) end From fdc50d8ba73d04be99ae2e08c9e18ac99ad36e84 Mon Sep 17 00:00:00 2001 From: Paul Berg Date: Sat, 24 Jun 2023 07:59:34 +0200 Subject: [PATCH 224/290] Update docstring for `StackFrame.linfo` (#49971) --- base/stacktraces.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/stacktraces.jl b/base/stacktraces.jl index 523ca827897d5..9c942814eefad 100644 --- a/base/stacktraces.jl +++ b/base/stacktraces.jl @@ -20,9 +20,10 @@ Stack information representing execution context, with the following fields: The name of the function containing the execution context. -- `linfo::Union{Core.MethodInstance, CodeInfo, Nothing}` +- `linfo::Union{Core.MethodInstance, Method, Module, Core.CodeInfo, Nothing}` - The MethodInstance containing the execution context (if it could be found). + The MethodInstance or CodeInfo containing the execution context (if it could be found), \ + or Module (for macro expansions)" - `file::Symbol` From 406ba123cedcfcc66cb50d15bb5eaeb2bcefea5b Mon Sep 17 00:00:00 2001 From: Ujjwal Sarswat <76774914+vmpyr@users.noreply.github.com> Date: Sat, 24 Jun 2023 11:45:23 +0530 Subject: [PATCH 225/290] add handling of `sqrt()` function for empty matrices (#50270) --- stdlib/LinearAlgebra/src/dense.jl | 4 +++- stdlib/LinearAlgebra/test/dense.jl | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl index 56c5954cc28fe..b8a44159de8bd 100644 --- a/stdlib/LinearAlgebra/src/dense.jl +++ b/stdlib/LinearAlgebra/src/dense.jl @@ -875,7 +875,9 @@ julia> sqrt(A) sqrt(::AbstractMatrix) function sqrt(A::AbstractMatrix{T}) where {T<:Union{Real,Complex}} - if ishermitian(A) + if checksquare(A) == 0 + return copy(A) + elseif ishermitian(A) sqrtHermA = sqrt(Hermitian(A)) return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA) elseif istriu(A) diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl index 1546f3247acf4..efeedf93ebd1f 100644 --- a/stdlib/LinearAlgebra/test/dense.jl +++ b/stdlib/LinearAlgebra/test/dense.jl @@ -1213,6 +1213,11 @@ end @test exp(log(A2)) ≈ A2 end +@testset "sqrt of empty Matrix of type $T" for T in [Int,Float32,Float64,ComplexF32,ComplexF64] + @test sqrt(Matrix{T}(undef, 0, 0)) == Matrix{T}(undef, 0, 0) + @test_throws DimensionMismatch sqrt(Matrix{T}(undef, 0, 3)) +end + struct TypeWithoutZero end Base.zero(::Type{TypeWithoutZero}) = TypeWithZero() struct TypeWithZero end From 5939e2d27a50605415ff6488eca7fa05a968defc Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Sun, 25 Jun 2023 05:57:17 -0300 Subject: [PATCH 226/290] Improve scalability of page allocator (#50137) Reduces contention in allocation heavy parallel workloads. --- src/gc-debug.c | 100 ++++------------- src/gc-pages.c | 245 +++++++++-------------------------------- src/gc.c | 222 +++++++++++++------------------------ src/gc.h | 253 ++++++++++++++++++++++++++----------------- src/julia_threads.h | 3 + src/partr.c | 14 ++- src/support/dtypes.h | 17 +++ 7 files changed, 325 insertions(+), 529 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index 02addaa98e44c..d6cb89f574022 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -27,19 +27,16 @@ jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) // the end of the page. JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) { - if (!page_metadata(p)) + if (!gc_alloc_map_is_set(p)) // Not in the pool return NULL; - struct jl_gc_metadata_ext info = page_metadata_ext(p); + jl_gc_pagemeta_t *meta = page_metadata(p); char *page_begin = gc_page_data(p) + GC_PAGE_OFFSET; // In the page header if (p < page_begin) return NULL; size_t ofs = p - page_begin; - // Check if this is a free page - if (!(info.pagetable0->allocmap[info.pagetable0_i32] & (uint32_t)(1 << info.pagetable0_i))) - return NULL; - int osize = info.meta->osize; + int osize = meta->osize; // Shouldn't be needed, just in case if (osize == 0) return NULL; @@ -111,44 +108,14 @@ static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits) } } -static void gc_clear_mark_pagetable0(pagetable0_t *pagetable0, int bits) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_page(pagetable0->meta[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable1(pagetable1_t *pagetable1, int bits) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable0(pagetable1->meta0[pg_i * 32 + j], bits); - } - } - } - } -} - -static void gc_clear_mark_pagetable(int bits) +static void gc_clear_mark_outer(int bits) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_clear_mark_pagetable1(memory_map.meta1[pg_i * 32 + j], bits); - } - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + gc_clear_mark_page(pg, bits); + pg = pg->next; } } } @@ -184,7 +151,7 @@ static void clear_mark(int bits) v = v->next; } - gc_clear_mark_pagetable(bits); + gc_clear_mark_outer(bits); } static void restore(void) @@ -561,7 +528,6 @@ void gc_scrub_record_task(jl_task_t *t) JL_NO_ASAN static void gc_scrub_range(char *low, char *high) { - jl_ptls_t ptls = jl_current_task->ptls; jl_jmp_buf *old_buf = jl_get_safe_restore(); jl_jmp_buf buf; if (jl_setjmp(buf, 0)) { @@ -1168,7 +1134,7 @@ void gc_stats_big_obj(void) static int64_t poolobj_sizes[4]; static int64_t empty_pages; -static void gc_count_pool_page(jl_gc_pagemeta_t *pg) +static void gc_count_pool_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { int osize = pg->osize; char *data = pg->data; @@ -1187,44 +1153,16 @@ static void gc_count_pool_page(jl_gc_pagemeta_t *pg) } } -static void gc_count_pool_pagetable0(pagetable0_t *pagetable0) -{ - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_page(pagetable0->meta[pg_i * 32 + j]); - } - } - } - } -} - -static void gc_count_pool_pagetable1(pagetable1_t *pagetable1) -{ - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable0(pagetable1->meta0[pg_i * 32 + j]); - } - } - } - } -} - static void gc_count_pool_pagetable(void) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - gc_count_pool_pagetable1(memory_map.meta1[pg_i * 32 + j]); - } + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + if (gc_alloc_map_is_set(pg->data)) { + gc_count_pool_page(pg); } + pg = pg->next; } } } diff --git a/src/gc-pages.c b/src/gc-pages.c index 28daa9d67a9ed..3e8207460d37b 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -19,7 +19,6 @@ extern "C" { #define MIN_BLOCK_PG_ALLOC (1) // 16 KB static int block_pg_cnt = DEFAULT_BLOCK_PG_ALLOC; -static size_t current_pg_count = 0; void jl_gc_init_page(void) { @@ -33,7 +32,7 @@ void jl_gc_init_page(void) // Try to allocate a memory block for multiple pages // Return `NULL` if allocation failed. Result is aligned to `GC_PAGE_SZ`. -static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT +char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT { size_t pages_sz = GC_PAGE_SZ * pg_cnt; #ifdef _OS_WINDOWS_ @@ -63,13 +62,12 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown. // Assumes `gc_perm_lock` is acquired, the lock is released before the // exception is thrown. -static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT +char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT { - // try to allocate a large block of memory (or a small one) - unsigned pg, pg_cnt = block_pg_cnt; + unsigned pg_cnt = block_pg_cnt; char *mem = NULL; while (1) { - if (__likely((mem = jl_gc_try_alloc_pages(pg_cnt)))) + if (__likely((mem = jl_gc_try_alloc_pages_(pg_cnt)))) break; size_t min_block_pg_alloc = MIN_BLOCK_PG_ALLOC; if (GC_PAGE_SZ * min_block_pg_alloc < jl_page_size) @@ -86,201 +84,70 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT jl_throw(jl_memory_exception); } } - - // now need to insert these pages into the pagetable metadata - // if any allocation fails, this just stops recording more pages from that point - // and will free (munmap) the remainder - jl_gc_pagemeta_t *page_meta = - (jl_gc_pagemeta_t*)jl_gc_perm_alloc_nolock(pg_cnt * sizeof(jl_gc_pagemeta_t), 1, - sizeof(void*), 0); - pg = 0; - if (page_meta) { - for (; pg < pg_cnt; pg++) { - struct jl_gc_metadata_ext info; - uint32_t msk; - unsigned i; - pagetable1_t **ppagetable1; - pagetable0_t **ppagetable0; - jl_gc_pagemeta_t **pmeta; - - char *ptr = mem + (GC_PAGE_SZ * pg); - page_meta[pg].data = ptr; - - // create & store the level 2 / outermost info - i = REGION_INDEX(ptr); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - msk = (1u << info.pagetable_i); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; // has free - info.pagetable1 = *(ppagetable1 = &memory_map.meta1[i]); - if (!info.pagetable1) { - info.pagetable1 = (pagetable1_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable1_t), 1, - sizeof(void*), 0); - *ppagetable1 = info.pagetable1; - if (!info.pagetable1) - break; - } - - // create & store the level 1 info - i = REGION1_INDEX(ptr); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - msk = (1u << info.pagetable1_i); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; // has free - info.pagetable0 = *(ppagetable0 = &info.pagetable1->meta0[i]); - if (!info.pagetable0) { - info.pagetable0 = (pagetable0_t*)jl_gc_perm_alloc_nolock(sizeof(pagetable0_t), 1, - sizeof(void*), 0); - *ppagetable0 = info.pagetable0; - if (!info.pagetable0) - break; - } - - // create & store the level 0 / page info - i = REGION0_INDEX(ptr); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - msk = (1u << info.pagetable0_i); - info.pagetable0->freemap[info.pagetable0_i32] |= msk; // is free - pmeta = &info.pagetable0->meta[i]; - info.meta = (*pmeta = &page_meta[pg]); - } - } - - if (pg < pg_cnt) { -#ifndef _OS_WINDOWS_ - // Trim the allocation to only cover the region - // that we successfully created the metadata for. - // This is not supported by the Windows kernel, - // so we have to just skip it there and just lose these virtual addresses. - munmap(mem + LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size), - GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size)); -#endif - if (pg == 0) { - uv_mutex_unlock(&gc_perm_lock); - jl_throw(jl_memory_exception); - } - } - return page_meta; + return mem; } // get a new page, either from the freemap // or from the kernel if none are available NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT { - struct jl_gc_metadata_ext info; - uv_mutex_lock(&gc_perm_lock); - int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); #endif - // scan over memory_map page-table for existing allocated but unused pages - for (info.pagetable_i32 = memory_map.lb; info.pagetable_i32 < (REGION2_PG_COUNT + 31) / 32; info.pagetable_i32++) { - uint32_t freemap1 = memory_map.freemap1[info.pagetable_i32]; - for (info.pagetable_i = 0; freemap1; info.pagetable_i++, freemap1 >>= 1) { - unsigned next = ffs_u32(freemap1); - info.pagetable_i += next; - freemap1 >>= next; - info.pagetable1 = memory_map.meta1[info.pagetable_i + info.pagetable_i32 * 32]; - // repeat over page-table level 1 - for (info.pagetable1_i32 = info.pagetable1->lb; info.pagetable1_i32 < REGION1_PG_COUNT / 32; info.pagetable1_i32++) { - uint32_t freemap0 = info.pagetable1->freemap0[info.pagetable1_i32]; - for (info.pagetable1_i = 0; freemap0; info.pagetable1_i++, freemap0 >>= 1) { - unsigned next = ffs_u32(freemap0); - info.pagetable1_i += next; - freemap0 >>= next; - info.pagetable0 = info.pagetable1->meta0[info.pagetable1_i + info.pagetable1_i32 * 32]; - // repeat over page-table level 0 - for (info.pagetable0_i32 = info.pagetable0->lb; info.pagetable0_i32 < REGION0_PG_COUNT / 32; info.pagetable0_i32++) { - uint32_t freemap = info.pagetable0->freemap[info.pagetable0_i32]; - if (freemap) { - info.pagetable0_i = ffs_u32(freemap); - info.meta = info.pagetable0->meta[info.pagetable0_i + info.pagetable0_i32 * 32]; - assert(info.meta->data); - // new pages available starting at min of lb and pagetable_i32 - if (memory_map.lb < info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb < info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb < info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - goto have_free_page; // break out of all of these loops - } - } - info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1u << info.pagetable1_i); // record that this was full - } - } - memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1u << info.pagetable_i); // record that this was full - } - } + jl_gc_pagemeta_t *meta = NULL; - // no existing pages found, allocate a new one - { - jl_gc_pagemeta_t *meta = jl_gc_alloc_new_page(); - info = page_metadata_ext(meta->data); - assert(meta == info.meta); - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; + // try to get page from `pool_clean` + meta = pop_lf_page_metadata_back(&global_page_pool_clean); + if (meta != NULL) { + gc_alloc_map_set(meta->data, 1); + goto exit; } -have_free_page: - // in-use pages are now ending at min of ub and pagetable_i32 - if (memory_map.ub < info.pagetable_i32) - memory_map.ub = info.pagetable_i32; - if (info.pagetable1->ub < info.pagetable1_i32) - info.pagetable1->ub = info.pagetable1_i32; - if (info.pagetable0->ub < info.pagetable0_i32) - info.pagetable0->ub = info.pagetable0_i32; - - // mark this entry as in-use and not free - info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1u << info.pagetable0_i); - info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1u << info.pagetable0_i); - info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1u << info.pagetable1_i); - memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1u << info.pagetable_i); + // try to get page from `pool_freed` + meta = pop_lf_page_metadata_back(&global_page_pool_freed); + if (meta != NULL) { + gc_alloc_map_set(meta->data, 1); + goto exit; + } + uv_mutex_lock(&gc_perm_lock); + // another thread may have allocated a large block while we're waiting... + meta = pop_lf_page_metadata_back(&global_page_pool_clean); + if (meta != NULL) { + uv_mutex_unlock(&gc_perm_lock); + gc_alloc_map_set(meta->data, 1); + goto exit; + } + // must map a new set of pages + char *data = jl_gc_try_alloc_pages(); + meta = (jl_gc_pagemeta_t*)malloc_s(block_pg_cnt * sizeof(jl_gc_pagemeta_t)); + for (int i = 0; i < block_pg_cnt; i++) { + jl_gc_pagemeta_t *pg = &meta[i]; + pg->data = data + GC_PAGE_SZ * i; + gc_alloc_map_maybe_create(pg->data); + if (i == 0) { + gc_alloc_map_set(pg->data, 1); + } + else { + push_lf_page_metadata_back(&global_page_pool_clean, pg); + } + } + uv_mutex_unlock(&gc_perm_lock); +exit: #ifdef _OS_WINDOWS_ - VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); -#endif -#ifdef _OS_WINDOWS_ + VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); SetLastError(last_error); #endif errno = last_errno; - current_pg_count++; - gc_final_count_page(current_pg_count); - uv_mutex_unlock(&gc_perm_lock); - return info.meta; + return meta; } // return a page to the freemap allocator -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT +void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { - // update the allocmap and freemap to indicate this contains a free entry - struct jl_gc_metadata_ext info = page_metadata_ext(p); - uint32_t msk; - msk = (uint32_t)(1u << info.pagetable0_i); - assert(!(info.pagetable0->freemap[info.pagetable0_i32] & msk)); - assert(info.pagetable0->allocmap[info.pagetable0_i32] & msk); - info.pagetable0->allocmap[info.pagetable0_i32] &= ~msk; - info.pagetable0->freemap[info.pagetable0_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable1_i); - assert(info.pagetable1->allocmap0[info.pagetable1_i32] & msk); - if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0) - info.pagetable1->freemap0[info.pagetable1_i32] |= msk; - - msk = (uint32_t)(1u << info.pagetable_i); - assert(memory_map.allocmap1[info.pagetable_i32] & msk); - if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0) - memory_map.freemap1[info.pagetable_i32] |= msk; - + void *p = pg->data; + gc_alloc_map_set((char*)p, 0); // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { @@ -290,10 +157,9 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT void *otherp = (void*)((uintptr_t)p & ~(jl_page_size - 1)); // round down to the nearest physical page p = otherp; while (n_pages--) { - struct jl_gc_metadata_ext info = page_metadata_ext(otherp); - msk = (uint32_t)(1u << info.pagetable0_i); - if (info.pagetable0->allocmap[info.pagetable0_i32] & msk) - goto no_decommit; + if (gc_alloc_map_is_set((char*)otherp)) { + return; + } otherp = (void*)((char*)otherp + GC_PAGE_SZ); } } @@ -313,20 +179,7 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #else madvise(p, decommit_size, MADV_DONTNEED); #endif - /* TODO: Should we leave this poisoned and rather allow the GC to read poisoned pointers from - * the page when it sweeps pools? - */ msan_unpoison(p, decommit_size); - -no_decommit: - // new pages are now available starting at max of lb and pagetable_i32 - if (memory_map.lb > info.pagetable_i32) - memory_map.lb = info.pagetable_i32; - if (info.pagetable1->lb > info.pagetable1_i32) - info.pagetable1->lb = info.pagetable1_i32; - if (info.pagetable0->lb > info.pagetable0_i32) - info.pagetable0->lb = info.pagetable0_i32; - current_pg_count--; } #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 930edbe9c67a8..213eebae33b1a 100644 --- a/src/gc.c +++ b/src/gc.c @@ -17,9 +17,6 @@ _Atomic(int) gc_n_threads_marking; _Atomic(int) gc_master_tid; // `tid` of first GC thread int gc_first_tid; -// Mutex/cond used to synchronize sleep/wakeup of GC threads -uv_mutex_t gc_threads_lock; -uv_cond_t gc_threads_cond; // Linked list of callback functions @@ -178,8 +175,6 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) return jl_buff_tag; } -pagetable_t memory_map; - // List of marked big objects. Not per-thread. Accessed only by master thread. bigval_t *big_objects_marked = NULL; @@ -819,7 +814,7 @@ FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_N STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - assert(!page_metadata(o)); + assert(!gc_alloc_map_is_set((char*)o)); bigval_t *hdr = bigval_header(o); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += hdr->sz & ~3; @@ -842,13 +837,11 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o, // This function should be called exactly once during marking for each pool // object being marked to update the page metadata. STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, - uint8_t mark_mode, - jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT + uint8_t mark_mode, jl_gc_pagemeta_t *page) JL_NOTSAFEPOINT { #ifdef MEMDEBUG gc_setmark_big(ptls, o, mark_mode); #else - jl_assume(page); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); @@ -869,7 +862,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, STATIC_INLINE void gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT { - gc_setmark_pool_(ptls, o, mark_mode, page_metadata(o)); + gc_setmark_pool_(ptls, o, mark_mode, page_metadata((char*)o)); } STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o, @@ -893,9 +886,9 @@ STATIC_INLINE void gc_setmark_buf_(jl_ptls_t ptls, void *o, uint8_t mark_mode, s // sure. if (__likely(gc_try_setmark_tag(buf, mark_mode)) && !gc_verifying) { if (minsz <= GC_MAX_SZCLASS) { - jl_gc_pagemeta_t *page = page_metadata(buf); - if (page != NULL) { - gc_setmark_pool_(ptls, buf, bits, page); + jl_gc_pagemeta_t *meta = page_metadata(buf); + if (meta != NULL) { + gc_setmark_pool_(ptls, buf, bits, meta); return; } } @@ -1213,29 +1206,12 @@ static void sweep_malloced_arrays(void) JL_NOTSAFEPOINT } // pool allocation -STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t *fl) JL_NOTSAFEPOINT +STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_t *p, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { assert(GC_PAGE_OFFSET >= sizeof(void*)); pg->nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / p->osize; pg->pool_n = p - ptls2->heap.norm_pools; jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET); - jl_taggedvalue_t *next = (jl_taggedvalue_t*)pg->data; - if (fl == NULL) { - next->next = NULL; - } - else { - // Insert free page after first page. - // This prevents unnecessary fragmentation from multiple pages - // being allocated from at the same time. Instead, objects will - // only ever be allocated from the first object in the list. - // This is specifically being relied on by the implementation - // of jl_gc_internal_obj_base_ptr() so that the function does - // not have to traverse the entire list. - jl_taggedvalue_t *flpage = (jl_taggedvalue_t *)gc_page_data(fl); - next->next = flpage->next; - flpage->next = beg; - beg = fl; - } pg->has_young = 0; pg->has_marked = 0; pg->fl_begin_offset = UINT16_MAX; @@ -1243,6 +1219,11 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_ return beg; } +jl_gc_global_page_pool_t global_page_pool_lazily_freed; +jl_gc_global_page_pool_t global_page_pool_clean; +jl_gc_global_page_pool_t global_page_pool_freed; +pagetable_t alloc_map; + // Add a new page to the pool. Discards any pages in `p->newpages` before. static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT { @@ -1252,7 +1233,9 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; pg->thread_n = ptls->tid; - jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg, NULL); + set_page_metadata(pg); + push_page_metadata_back(&ptls->page_metadata_allocd, pg); + jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg); p->newpages = fl; return fl; } @@ -1282,7 +1265,7 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset if (__unlikely(gc_page_data(v) != gc_page_data(next))) { // we only update pg's fields when the freelist changes page // since pg's metadata is likely not in cache - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(v)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(v)); assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; @@ -1300,11 +1283,19 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset if (v != NULL) { // like the freelist case, // but only update the page metadata when it is full - jl_gc_pagemeta_t *pg = jl_assume(page_metadata((char*)v - 1)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe((char*)v - 1)); assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; - v = *(jl_taggedvalue_t**)cur_page; + pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed); + if (pg != NULL) { + v = gc_reset_page(ptls, p, pg); + pg->osize = p->osize; + push_page_metadata_back(&ptls->page_metadata_allocd, pg); + } + else { + v = NULL; + } } // Not an else!! if (v == NULL) { @@ -1348,7 +1339,8 @@ int jl_gc_classify_pools(size_t sz, int *osize) int64_t lazy_freed_pages = 0; // Returns pointer to terminal pointer of list rooted at *pfl. -static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT +static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd, + jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT { char *data = pg->data; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); @@ -1356,24 +1348,23 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t size_t old_nfree = pg->nfree; size_t nfree; + int reuse_page = 1; + int freed_lazily = 0; int freedall = 1; int pg_skpd = 1; if (!pg->has_marked) { + reuse_page = 0; + #ifdef _P64 // lazy version: (empty) if the whole page was already unused, free it (return it to the pool) // eager version: (freedall) free page as soon as possible // the eager one uses less memory. // FIXME - need to do accounting on a per-thread basis // on quick sweeps, keep a few pages empty but allocated for performance if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) { - jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n]; - jl_taggedvalue_t *begin = gc_reset_page(ptls2, p, pg, p->newpages); - p->newpages = begin; - begin->next = NULL; lazy_freed_pages++; + freed_lazily = 1; } - else { - jl_gc_free_page(data); - } + #endif nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize; goto done; } @@ -1440,97 +1431,31 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t nfree = pg->nfree; done: + if (reuse_page) { + push_page_metadata_back(allocd, pg); + } + else if (freed_lazily) { + push_page_metadata_back(lazily_freed, pg); + } + else { + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + } gc_time_count_page(freedall, pg_skpd); gc_num.freed += (nfree - old_nfree) * osize; return pfl; } // the actual sweeping over all allocated pages in a memory pool -STATIC_INLINE void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT +STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd, + jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT { int p_n = pg->pool_n; int t_n = pg->thread_n; jl_ptls_t ptls2 = gc_all_tls_states[t_n]; jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; int osize = pg->osize; - pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); -} - -// sweep over a pagetable0 for all allocated pages -STATIC_INLINE int sweep_pool_pagetable0(jl_taggedvalue_t ***pfl, pagetable0_t *pagetable0, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable0->ub; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i]; - unsigned j; - if (!line) - continue; - ub = pg_i; - alloc = 1; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - jl_gc_pagemeta_t *pg = pagetable0->meta[pg_i * 32 + j]; - sweep_pool_page(pfl, pg, sweep_full); - } - } - pagetable0->ub = ub; - return alloc; -} - -// sweep over pagetable1 for all pagetable0 that may contain allocated pages -STATIC_INLINE int sweep_pool_pagetable1(jl_taggedvalue_t ***pfl, pagetable1_t *pagetable1, int sweep_full) JL_NOTSAFEPOINT -{ - unsigned ub = 0; - unsigned alloc = 0; - for (unsigned pg_i = 0; pg_i <= pagetable1->ub; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable0_t *pagetable0 = pagetable1->meta0[pg_i * 32 + j]; - if (pagetable0 && !sweep_pool_pagetable0(pfl, pagetable0, sweep_full)) - pagetable1->allocmap0[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (pagetable1->allocmap0[pg_i]) { - ub = pg_i; - alloc = 1; - } - } - pagetable1->ub = ub; - return alloc; -} - -// sweep over all memory for all pagetable1 that may contain allocated pages -static void sweep_pool_pagetable(jl_taggedvalue_t ***pfl, int sweep_full) JL_NOTSAFEPOINT -{ - if (REGION2_PG_COUNT == 1) { // compile-time optimization - pagetable1_t *pagetable1 = memory_map.meta1[0]; - if (pagetable1 != NULL) - sweep_pool_pagetable1(pfl, pagetable1, sweep_full); - return; - } - unsigned ub = 0; - for (unsigned pg_i = 0; pg_i <= memory_map.ub; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i]; - unsigned j; - for (j = 0; line; j++, line >>= 1) { - unsigned next = ffs_u32(line); - j += next; - line >>= next; - pagetable1_t *pagetable1 = memory_map.meta1[pg_i * 32 + j]; - if (pagetable1 && !sweep_pool_pagetable1(pfl, pagetable1, sweep_full)) - memory_map.allocmap1[pg_i] &= ~(1 << j); // no allocations found, remember that for next time - } - if (memory_map.allocmap1[pg_i]) { - ub = pg_i; - } - } - memory_map.ub = ub; + pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); } // sweep over all memory that is being used and not in a pool @@ -1584,7 +1509,7 @@ static void gc_sweep_pool(int sweep_full) jl_gc_pool_t *p = &ptls2->heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; if (last != NULL) { - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last)); gc_pool_sync_nfree(pg, last); pg->has_young = 1; } @@ -1594,17 +1519,35 @@ static void gc_sweep_pool(int sweep_full) last = p->newpages; if (last != NULL) { char *last_p = (char*)last; - jl_gc_pagemeta_t *pg = jl_assume(page_metadata(last_p - 1)); + jl_gc_pagemeta_t *pg = jl_assume(page_metadata_unsafe(last_p - 1)); assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET); pg->nfree = (GC_PAGE_SZ - (last_p - gc_page_data(last_p - 1))) / p->osize; pg->has_young = 1; } p->newpages = NULL; } + jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed; + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + lazy_freed_pages++; + pg = pg2; + } } // the actual sweeping - sweep_pool_pagetable(pfl, sweep_full); + for (int t_i = 0; t_i < n_threads; t_i++) { + jl_ptls_t ptls2 = gc_all_tls_states[t_i]; + if (ptls2 != NULL) { + jl_gc_pagemeta_t *allocd = NULL; + jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd; + while (pg != NULL) { + jl_gc_pagemeta_t *pg2 = pg->next; + gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full); + pg = pg2; + } + ptls2->page_metadata_allocd = allocd; + } + } // null out terminal pointers of free lists for (int t_i = 0; t_i < n_threads; t_i++) { @@ -2796,29 +2739,19 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } -#define GC_BACKOFF_MIN 4 -#define GC_BACKOFF_MAX 12 - -void gc_mark_backoff(int *i) -{ - if (*i < GC_BACKOFF_MAX) { - (*i)++; - } - for (int j = 0; j < (1 << *i); j++) { - jl_cpu_pause(); - } -} - void gc_mark_loop_parallel(jl_ptls_t ptls, int master) { int backoff = GC_BACKOFF_MIN; if (master) { jl_atomic_store(&gc_master_tid, ptls->tid); // Wake threads up and try to do some work - uv_mutex_lock(&gc_threads_lock); jl_atomic_fetch_add(&gc_n_threads_marking, 1); - uv_cond_broadcast(&gc_threads_cond); - uv_mutex_unlock(&gc_threads_lock); + for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + uv_mutex_lock(&ptls2->sleep_lock); + uv_cond_signal(&ptls2->wake_signal); + uv_mutex_unlock(&ptls2->sleep_lock); + } gc_mark_and_steal(ptls); jl_atomic_fetch_add(&gc_n_threads_marking, -1); } @@ -2830,7 +2763,7 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) } jl_atomic_fetch_add(&gc_n_threads_marking, -1); // Failed to steal - gc_mark_backoff(&backoff); + gc_backoff(&backoff); } } @@ -3574,13 +3507,10 @@ void jl_init_thread_heap(jl_ptls_t ptls) // System-wide initializations void jl_gc_init(void) { - JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock"); JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_cache_lock); uv_mutex_init(&gc_perm_lock); - uv_mutex_init(&gc_threads_lock); - uv_cond_init(&gc_threads_cond); jl_gc_init_page(); jl_gc_debug_init(); @@ -4031,7 +3961,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) { p = (char *) p - 1; jl_gc_pagemeta_t *meta = page_metadata(p); - if (meta) { + if (meta != NULL) { char *page = gc_page_data(p); // offset within page. size_t off = (char *)p - page; diff --git a/src/gc.h b/src/gc.h index 47aab660c0981..bfa2a0fba8f59 100644 --- a/src/gc.h +++ b/src/gc.h @@ -143,7 +143,8 @@ typedef struct _mallocarray_t { } mallocarray_t; // pool page metadata -typedef struct { +typedef struct _jl_gc_pagemeta_t { + struct _jl_gc_pagemeta_t *next; // index of pool that owns this page uint8_t pool_n; // Whether any cell in the page is marked @@ -177,28 +178,55 @@ typedef struct { char *data; } jl_gc_pagemeta_t; -// Page layout: -// Newpage freelist: sizeof(void*) -// Padding: GC_PAGE_OFFSET - sizeof(void*) -// Blocks: osize * n -// Tag: sizeof(jl_taggedvalue_t) -// Data: <= osize - sizeof(jl_taggedvalue_t) +typedef struct { + _Atomic(jl_gc_pagemeta_t *) page_metadata_back; +} jl_gc_global_page_pool_t; + +extern jl_gc_global_page_pool_t global_page_pool_clean; +extern jl_gc_global_page_pool_t global_page_pool_freed; + +#define GC_BACKOFF_MIN 4 +#define GC_BACKOFF_MAX 12 + +STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT +{ + if (*i < GC_BACKOFF_MAX) { + (*i)++; + } + for (int j = 0; j < (1 << *i); j++) { + jl_cpu_pause(); + } +} + +// Lock-free stack implementation taken +// from Herlihy's "The Art of Multiprocessor Programming" + +STATIC_INLINE void push_lf_page_metadata_back(jl_gc_global_page_pool_t *pool, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +{ + while (1) { + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + elt->next = old_back; + if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, elt)) { + break; + } + jl_cpu_pause(); + } +} + +STATIC_INLINE jl_gc_pagemeta_t *pop_lf_page_metadata_back(jl_gc_global_page_pool_t *pool) JL_NOTSAFEPOINT +{ + while (1) { + jl_gc_pagemeta_t *old_back = jl_atomic_load_relaxed(&pool->page_metadata_back); + if (old_back == NULL) { + return NULL; + } + if (jl_atomic_cmpswap(&pool->page_metadata_back, &old_back, old_back->next)) { + return old_back; + } + jl_cpu_pause(); + } +} -// Memory map: -// The complete address space is divided up into a multi-level page table. -// The three levels have similar but slightly different structures: -// - pagetable0_t: the bottom/leaf level (covers the contiguous addresses) -// - pagetable1_t: the middle level -// - pagetable2_t: the top/leaf level (covers the entire virtual address space) -// Corresponding to these similar structures is a large amount of repetitive -// code that is nearly the same but not identical. It could be made less -// repetitive with C macros, but only at the cost of debuggability. The specialized -// structure of this representation allows us to partially unroll and optimize -// various conditions at each level. - -// The following constants define the branching factors at each level. -// The constants and GC_PAGE_LG2 must therefore sum to sizeof(void*). -// They should all be multiples of 32 (sizeof(uint32_t)) except that REGION2_PG_COUNT may also be 1. #ifdef _P64 #define REGION0_PG_COUNT (1 << 16) #define REGION1_PG_COUNT (1 << 16) @@ -217,35 +245,112 @@ typedef struct { // define the representation of the levels of the page-table (0 to 2) typedef struct { - jl_gc_pagemeta_t *meta[REGION0_PG_COUNT]; - uint32_t allocmap[REGION0_PG_COUNT / 32]; - uint32_t freemap[REGION0_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; + uint8_t meta[REGION0_PG_COUNT]; } pagetable0_t; typedef struct { pagetable0_t *meta0[REGION1_PG_COUNT]; - uint32_t allocmap0[REGION1_PG_COUNT / 32]; - uint32_t freemap0[REGION1_PG_COUNT / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; } pagetable1_t; typedef struct { pagetable1_t *meta1[REGION2_PG_COUNT]; - uint32_t allocmap1[(REGION2_PG_COUNT + 31) / 32]; - uint32_t freemap1[(REGION2_PG_COUNT + 31) / 32]; - // store a lower bound of the first free page in each region - int lb; - // an upper bound of the last non-free page - int ub; } pagetable_t; +extern pagetable_t alloc_map; + +STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + if (r1 == NULL) + return 0; + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + if (r0 == NULL) + return 0; + i = REGION0_INDEX(data); + return r0->meta[i]; +} + +STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + assert(r1 != NULL); + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + assert(r0 != NULL); + i = REGION0_INDEX(data); + r0->meta[i] = v; +} + +STATIC_INLINE void gc_alloc_map_maybe_create(char *_data) JL_NOTSAFEPOINT +{ + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + pagetable1_t *r1 = alloc_map.meta1[i]; + if (r1 == NULL) { + r1 = (pagetable1_t*)calloc_s(sizeof(pagetable1_t)); + alloc_map.meta1[i] = r1; + } + i = REGION1_INDEX(data); + pagetable0_t *r0 = r1->meta0[i]; + if (r0 == NULL) { + r0 = (pagetable0_t*)calloc_s(sizeof(pagetable0_t)); + r1->meta0[i] = r0; + } +} + +// Page layout: +// Metadata pointer: sizeof(jl_gc_pagemeta_t*) +// Padding: GC_PAGE_OFFSET - sizeof(jl_gc_pagemeta_t*) +// Blocks: osize * n +// Tag: sizeof(jl_taggedvalue_t) +// Data: <= osize - sizeof(jl_taggedvalue_t) + +STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT +{ + return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); +} + +STATIC_INLINE jl_gc_pagemeta_t *page_metadata_unsafe(void *_data) JL_NOTSAFEPOINT +{ + return *(jl_gc_pagemeta_t**)(gc_page_data(_data)); +} + +STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT +{ + if (!gc_alloc_map_is_set((char*)_data)) { + return NULL; + } + return page_metadata_unsafe(_data); +} + +STATIC_INLINE void set_page_metadata(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT +{ + *(jl_gc_pagemeta_t**)(pg->data) = pg; +} + +STATIC_INLINE void push_page_metadata_back(jl_gc_pagemeta_t **ppg, jl_gc_pagemeta_t *elt) JL_NOTSAFEPOINT +{ + elt->next = *ppg; + *ppg = elt; +} + +STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) JL_NOTSAFEPOINT +{ + jl_gc_pagemeta_t *v = *ppg; + if (*ppg != NULL) { + *ppg = (*ppg)->next; + } + return v; +} + #ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; #else @@ -256,11 +361,11 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) #endif extern jl_gc_num_t gc_num; -extern pagetable_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; extern int64_t lazy_freed_pages; +extern int gc_first_tid; extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; @@ -269,12 +374,6 @@ STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT return container_of(o, bigval_t, header); } -// round an address inside a gcpage's data to its beginning -STATIC_INLINE char *gc_page_data(void *x) JL_NOTSAFEPOINT -{ - return (char*)(((uintptr_t)x >> GC_PAGE_LG2) << GC_PAGE_LG2); -} - STATIC_INLINE jl_taggedvalue_t *page_pfl_beg(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT { return (jl_taggedvalue_t*)(p->data + p->fl_begin_offset); @@ -312,52 +411,6 @@ STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT NOINLINE uintptr_t gc_get_stack_ptr(void); -STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = ((uintptr_t)_data); - unsigned i; - i = REGION_INDEX(data); - pagetable1_t *r1 = memory_map.meta1[i]; - if (!r1) - return NULL; - i = REGION1_INDEX(data); - pagetable0_t *r0 = r1->meta0[i]; - if (!r0) - return NULL; - i = REGION0_INDEX(data); - return r0->meta[i]; -} - -struct jl_gc_metadata_ext { - pagetable1_t *pagetable1; - pagetable0_t *pagetable0; - jl_gc_pagemeta_t *meta; - unsigned pagetable_i32, pagetable_i; - unsigned pagetable1_i32, pagetable1_i; - unsigned pagetable0_i32, pagetable0_i; -}; - -STATIC_INLINE struct jl_gc_metadata_ext page_metadata_ext(void *_data) JL_NOTSAFEPOINT -{ - uintptr_t data = (uintptr_t)_data; - struct jl_gc_metadata_ext info; - unsigned i; - i = REGION_INDEX(data); - info.pagetable_i = i % 32; - info.pagetable_i32 = i / 32; - info.pagetable1 = memory_map.meta1[i]; - i = REGION1_INDEX(data); - info.pagetable1_i = i % 32; - info.pagetable1_i32 = i / 32; - info.pagetable0 = info.pagetable1->meta0[i]; - i = REGION0_INDEX(data); - info.pagetable0_i = i % 32; - info.pagetable0_i32 = i / 32; - info.meta = info.pagetable0->meta[i]; - assert(info.meta); - return info; -} - STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT { *hdr->prev = hdr->next; @@ -375,14 +428,10 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE *list = hdr; } -extern uv_mutex_t gc_threads_lock; -extern uv_cond_t gc_threads_cond; extern _Atomic(int) gc_n_threads_marking; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); -void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, - jl_value_t **fl_end) JL_NOTSAFEPOINT; -void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, - size_t start) JL_NOTSAFEPOINT; +void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT; +void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT; void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_loop_serial(jl_ptls_t ptls); void gc_mark_loop_parallel(jl_ptls_t ptls, int master); @@ -391,9 +440,9 @@ void jl_gc_debug_init(void); // GC pages -void jl_gc_init_page(void); +void jl_gc_init_page(void) JL_NOTSAFEPOINT; NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT; -void jl_gc_free_page(void *p) JL_NOTSAFEPOINT; +void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT; // GC debug diff --git a/src/julia_threads.h b/src/julia_threads.h index c8242d6d6eb0f..f4c235243e684 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -198,6 +198,7 @@ typedef struct { } jl_gc_mark_cache_t; struct _jl_bt_element_t; +struct _jl_gc_pagemeta_t; // This includes all the thread local states we care about for a thread. // Changes to TLS field types must be reflected in codegen. @@ -260,6 +261,8 @@ typedef struct _jl_tls_states_t { #endif jl_thread_t system_id; arraylist_t finalizers; + struct _jl_gc_pagemeta_t *page_metadata_allocd; + struct _jl_gc_pagemeta_t *page_metadata_lazily_freed; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; arraylist_t sweep_objs; diff --git a/src/partr.c b/src/partr.c index 403f911b1284f..fb140032aaa1f 100644 --- a/src/partr.c +++ b/src/partr.c @@ -108,6 +108,12 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *t); + +static int may_mark(void) JL_NOTSAFEPOINT +{ + return (jl_atomic_load(&gc_n_threads_marking) > 0); +} + // gc thread function void jl_gc_threadfun(void *arg) { @@ -124,11 +130,11 @@ void jl_gc_threadfun(void *arg) free(targ); while (1) { - uv_mutex_lock(&gc_threads_lock); - while (jl_atomic_load(&gc_n_threads_marking) == 0) { - uv_cond_wait(&gc_threads_cond, &gc_threads_lock); + uv_mutex_lock(&ptls->sleep_lock); + while (!may_mark()) { + uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock); } - uv_mutex_unlock(&gc_threads_lock); + uv_mutex_unlock(&ptls->sleep_lock); gc_mark_loop_parallel(ptls, 0); } } diff --git a/src/support/dtypes.h b/src/support/dtypes.h index a30fe85ccc0d0..da570921c101c 100644 --- a/src/support/dtypes.h +++ b/src/support/dtypes.h @@ -340,6 +340,23 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI memcpy(ptr, &val, 2); } +STATIC_INLINE void *calloc_s(size_t sz) JL_NOTSAFEPOINT { + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *p = calloc(sz == 0 ? 1 : sz, 1); + if (p == NULL) { + perror("(julia) calloc"); + abort(); + } +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + return p; +} + STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT { int last_errno = errno; #ifdef _OS_WINDOWS_ From dd1f03df8d3b35f26b7f2ec50e52ee66351f2d44 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Sun, 25 Jun 2023 14:01:20 +0200 Subject: [PATCH 227/290] Mild `AbstractQ` review and refactoring (#49714) --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 12 +- stdlib/LinearAlgebra/src/abstractq.jl | 227 +++++++++------------- stdlib/LinearAlgebra/src/hessenberg.jl | 5 +- stdlib/LinearAlgebra/src/lq.jl | 8 +- stdlib/LinearAlgebra/src/qr.jl | 10 +- stdlib/LinearAlgebra/test/abstractq.jl | 15 +- stdlib/LinearAlgebra/test/hessenberg.jl | 2 +- stdlib/LinearAlgebra/test/lq.jl | 3 +- stdlib/LinearAlgebra/test/qr.jl | 4 +- 9 files changed, 122 insertions(+), 164 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 50d82c497282d..386de771d666f 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -9,14 +9,14 @@ module LinearAlgebra import Base: \, /, *, ^, +, -, == import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech, - asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, conj, convert, copy, copyto!, - copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat, - getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!, - length, log, map, ndims, one, oneunit, parent, permutedims, power_by_squaring, - print_matrix, promote_rule, real, round, sec, sech, setindex!, show, similar, sin, + asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, collect, conj, convert, copy, + copyto!, copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, + getindex, hcat, getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, + kron, kron!, length, log, map, ndims, one, oneunit, parent, permutedims, + power_by_squaring, promote_rule, real, sec, sech, setindex!, show, similar, sin, sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec, view, zero -using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, +using Base: IndexLinear, promote_eltype, promote_op, promote_typeof, print_matrix, @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing, splat using Base.Broadcast: Broadcasted, broadcasted diff --git a/stdlib/LinearAlgebra/src/abstractq.jl b/stdlib/LinearAlgebra/src/abstractq.jl index 88610dac2e6f6..93358d052d50b 100644 --- a/stdlib/LinearAlgebra/src/abstractq.jl +++ b/stdlib/LinearAlgebra/src/abstractq.jl @@ -35,6 +35,7 @@ convert(::Type{AbstractQ{T}}, adjQ::AdjointQ{T}) where {T} = adjQ convert(::Type{AbstractQ{T}}, adjQ::AdjointQ) where {T} = convert(AbstractQ{T}, adjQ.Q)' # ... to matrix +collect(Q::AbstractQ) = copyto!(Matrix{eltype(Q)}(undef, size(Q)), Q) Matrix{T}(Q::AbstractQ) where {T} = convert(Matrix{T}, Q*I) # generic fallback, yields square matrix Matrix{T}(adjQ::AdjointQ{S}) where {T,S} = convert(Matrix{T}, lmul!(adjQ, Matrix{S}(I, size(adjQ)))) Matrix(Q::AbstractQ{T}) where {T} = Matrix{T}(Q) @@ -56,6 +57,15 @@ function size(Q::AbstractQ, dim::Integer) end size(adjQ::AdjointQ) = reverse(size(adjQ.Q)) +# comparison +(==)(Q::AbstractQ, A::AbstractMatrix) = lmul!(Q, Matrix{eltype(Q)}(I, size(A))) == A +(==)(A::AbstractMatrix, Q::AbstractQ) = Q == A +(==)(Q::AbstractQ, P::AbstractQ) = Matrix(Q) == Matrix(P) +isapprox(Q::AbstractQ, A::AbstractMatrix; kwargs...) = + isapprox(lmul!(Q, Matrix{eltype(Q)}(I, size(A))), A, kwargs...) +isapprox(A::AbstractMatrix, Q::AbstractQ; kwargs...) = isapprox(Q, A, kwargs...) +isapprox(Q::AbstractQ, P::AbstractQ; kwargs...) = isapprox(Matrix(Q), Matrix(P), kwargs...) + # pseudo-array behaviour, required for indexing with `begin` or `end` axes(Q::AbstractQ) = map(Base.oneto, size(Q)) axes(Q::AbstractQ, d::Integer) = d in (1, 2) ? axes(Q)[d] : Base.OneTo(1) @@ -125,14 +135,31 @@ function show(io::IO, ::MIME{Symbol("text/plain")}, Q::AbstractQ) end # multiplication +# generically, treat AbstractQ like a matrix with its definite size +qsize_check(Q::AbstractQ, B::AbstractVecOrMat) = + size(Q, 2) == size(B, 1) || + throw(DimensionMismatch("second dimension of Q, $(size(Q,2)), must coincide with first dimension of B, $(size(B,1))")) +qsize_check(A::AbstractVecOrMat, Q::AbstractQ) = + size(A, 2) == size(Q, 1) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must coincide with first dimension of Q, $(size(Q,1))")) +qsize_check(Q::AbstractQ, P::AbstractQ) = + size(Q, 2) == size(P, 1) || + throw(DimensionMismatch("second dimension of A, $(size(Q,2)), must coincide with first dimension of B, $(size(P,1))")) + (*)(Q::AbstractQ, J::UniformScaling) = Q*J.λ function (*)(Q::AbstractQ, b::Number) T = promote_type(eltype(Q), typeof(b)) lmul!(convert(AbstractQ{T}, Q), Matrix{T}(b*I, size(Q))) end -function (*)(A::AbstractQ, B::AbstractVecOrMat) - T = promote_type(eltype(A), eltype(B)) - lmul!(convert(AbstractQ{T}, A), copy_similar(B, T)) +function (*)(Q::AbstractQ, B::AbstractVector) + T = promote_type(eltype(Q), eltype(B)) + qsize_check(Q, B) + mul!(similar(B, T, size(Q, 1)), convert(AbstractQ{T}, Q), B) +end +function (*)(Q::AbstractQ, B::AbstractMatrix) + T = promote_type(eltype(Q), eltype(B)) + qsize_check(Q, B) + mul!(similar(B, T, (size(Q, 1), size(B, 2))), convert(AbstractQ{T}, Q), B) end (*)(J::UniformScaling, Q::AbstractQ) = J.λ*Q @@ -140,21 +167,28 @@ function (*)(a::Number, Q::AbstractQ) T = promote_type(typeof(a), eltype(Q)) rmul!(Matrix{T}(a*I, size(Q)), convert(AbstractQ{T}, Q)) end -*(a::AbstractVector, Q::AbstractQ) = reshape(a, length(a), 1) * Q +function (*)(A::AbstractVector, Q::AbstractQ) + T = promote_type(eltype(A), eltype(Q)) + qsize_check(A, Q) + return mul!(similar(A, T, length(A)), A, convert(AbstractQ{T}, Q)) +end function (*)(A::AbstractMatrix, Q::AbstractQ) T = promote_type(eltype(A), eltype(Q)) - return rmul!(copy_similar(A, T), convert(AbstractQ{T}, Q)) + qsize_check(A, Q) + return mul!(similar(A, T, (size(A, 1), size(Q, 2))), A, convert(AbstractQ{T}, Q)) end (*)(u::AdjointAbsVec, Q::AbstractQ) = (Q'u')' ### Q*Q (including adjoints) -*(Q::AbstractQ, P::AbstractQ) = Q * (P*I) +(*)(Q::AbstractQ, P::AbstractQ) = Q * (P*I) ### mul! -function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat{T},AbstractQ{T}}) where {T} +function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat,AbstractQ}) where {T} require_one_based_indexing(C, B) - mB = size(B, 1) - mC = size(C, 1) + mB, nB = size(B, 1), size(B, 2) + mC, nC = size(C, 1), size(C, 2) + qsize_check(Q, B) + nB != nC && throw(DimensionMismatch()) if mB < mC inds = CartesianIndices(axes(B)) copyto!(view(C, inds), B) @@ -164,9 +198,21 @@ function mul!(C::AbstractVecOrMat{T}, Q::AbstractQ{T}, B::Union{AbstractVecOrMat return lmul!(Q, copyto!(C, B)) end end -mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q) -mul!(C::AbstractVecOrMat{T}, adjQ::AdjointQ{T}, B::AbstractVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B)) -mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat{T}, adjQ::AdjointQ{T}) where {T} = rmul!(copyto!(C, A), adjQ) +function mul!(C::AbstractVecOrMat{T}, A::AbstractVecOrMat, Q::AbstractQ{T}) where {T} + require_one_based_indexing(C, A) + mA, nA = size(A, 1), size(A, 2) + mC, nC = size(C, 1), size(C, 2) + mA != mC && throw(DimensionMismatch()) + qsize_check(A, Q) + if nA < nC + inds = CartesianIndices(axes(A)) + copyto!(view(C, inds), A) + C[CartesianIndices((axes(C, 1), nA+1:nC))] .= zero(T) + return rmul!(C, Q) + else + return rmul!(copyto!(C, A), Q) + end +end ### division \(Q::AbstractQ, A::AbstractVecOrMat) = Q'*A @@ -319,7 +365,7 @@ rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,<:StridedMatrix}) where {T<:BlasF LAPACK.gemqrt!('R', 'N', B.factors, B.T, A) rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,<:StridedMatrix}) where {T<:BlasFloat} = LAPACK.ormqr!('R', 'N', B.factors, B.τ, A) -function rmul!(A::AbstractMatrix, Q::QRPackedQ) +function rmul!(A::AbstractVecOrMat, Q::QRPackedQ) require_one_based_indexing(A) mQ, nQ = size(Q.factors) mA, nA = size(A,1), size(A,2) @@ -354,7 +400,7 @@ rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:Bla (Q = adjQ.Q; LAPACK.ormqr!('R', 'T', Q.factors, Q.τ, A)) rmul!(A::StridedVecOrMat{T}, adjQ::AdjointQ{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} = (Q = adjQ.Q; LAPACK.ormqr!('R', 'C', Q.factors, Q.τ, A)) -function rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:QRPackedQ}) +function rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:QRPackedQ}) require_one_based_indexing(A) Q = adjQ.Q mQ, nQ = size(Q.factors) @@ -459,42 +505,12 @@ lmul!(adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}} rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::AdjointQ{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')' # flexible left-multiplication (and adjoint right-multiplication) -function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, b::AbstractVector) - T = promote_type(eltype(Q), eltype(b)) - if size(Q.factors, 1) == length(b) - bnew = copy_similar(b, T) - elseif size(Q.factors, 2) == length(b) - bnew = [b; zeros(T, size(Q.factors, 1) - length(b))] - else - throw(DimensionMismatch("vector must have length either $(size(Q.factors, 1)) or $(size(Q.factors, 2))")) - end - lmul!(convert(AbstractQ{T}, Q), bnew) -end -function (*)(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractMatrix) - T = promote_type(eltype(Q), eltype(B)) - if size(Q.factors, 1) == size(B, 1) - Bnew = copy_similar(B, T) - elseif size(Q.factors, 2) == size(B, 1) - Bnew = [B; zeros(T, size(Q.factors, 1) - size(B,1), size(B, 2))] - else - throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))")) - end - lmul!(convert(AbstractQ{T}, Q), Bnew) -end -function (*)(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) - Q = adjQ.Q - T = promote_type(eltype(A), eltype(adjQ)) - adjQQ = convert(AbstractQ{T}, adjQ) - if size(A, 2) == size(Q.factors, 1) - AA = copy_similar(A, T) - return rmul!(AA, adjQQ) - elseif size(A, 2) == size(Q.factors, 2) - return rmul!([A zeros(T, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))], adjQQ) - else - throw(DimensionMismatch("matrix A has dimensions $(size(A)) but Q-matrix B has dimensions $(size(adjQ))")) - end -end -(*)(u::AdjointAbsVec, Q::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) = (Q'u')' +qsize_check(Q::Union{QRPackedQ,QRCompactWYQ,HessenbergQ}, B::AbstractVecOrMat) = + size(B, 1) in size(Q.factors) || + throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(Q.factors))")) +qsize_check(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:Union{QRPackedQ,QRCompactWYQ,HessenbergQ}}) = + (Q = adjQ.Q; size(A, 2) in size(Q.factors) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))"))) det(Q::HessenbergQ) = _det_tau(Q.τ) @@ -518,104 +534,41 @@ convert(::Type{AbstractQ{T}}, Q::LQPackedQ) where {T} = LQPackedQ{T}(Q) size(Q::LQPackedQ) = (n = size(Q.factors, 2); return n, n) ## Multiplication -### QB / QcB -lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B) -lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} = - (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B)) -lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} = - (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B)) +# out-of-place right application of LQPackedQs +# +# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension +# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q), +# and if so effectively apply Q's square form to A without additional shenanigans; and +# (2) if the preceding dimensions do not match, check whether the appropriate dimension of +# A instead matches the number of rows of the matrix of which Q is a factor (i.e. +# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending +# A as necessary for check (1) to pass (if possible) and then applying Q's square form -function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVector) - A = adjA.Q - T = promote_type(eltype(A), eltype(B)) - if length(B) == size(A.factors, 2) - C = copy_similar(B, T) - elseif length(B) == size(A.factors, 1) - C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))] - else - throw(DimensionMismatch("length of B, $(length(B)), must equal one of the dimensions of A, $(size(A))")) - end - lmul!(convert(AbstractQ{T}, adjA), C) -end -function (*)(adjA::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractMatrix) - A = adjA.Q - T = promote_type(eltype(A), eltype(B)) - if size(B,1) == size(A.factors,2) - C = copy_similar(B, T) - elseif size(B,1) == size(A.factors,1) - C = [B; zeros(T, size(A.factors, 2) - size(A.factors, 1), size(B, 2))] - else - throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of A, $(size(A))")) - end - lmul!(convert(AbstractQ{T}, adjA), C) -end +qsize_check(adjQ::AdjointQ{<:Any,<:LQPackedQ}, B::AbstractVecOrMat) = + size(B, 1) in size(adjQ.Q.factors) || + throw(DimensionMismatch("first dimension of B, $(size(B,1)), must equal one of the dimensions of Q, $(size(adjQ.Q.factors))")) +qsize_check(A::AbstractVecOrMat, Q::LQPackedQ) = + size(A, 2) in size(Q.factors) || + throw(DimensionMismatch("second dimension of A, $(size(A,2)), must equal one of the dimensions of Q, $(size(Q.factors))")) # in-place right-application of LQPackedQs # these methods require that the applied-to matrix's (A's) number of columns # match the number of columns (nQ) of the LQPackedQ (Q) (necessary for in-place # operation, and the underlying LAPACK routine (ormlq) treats the implicit Q # as its (nQ-by-nQ) square form) -rmul!(A::StridedMatrix{T}, B::LQPackedQ{T}) where {T<:BlasFloat} = +rmul!(A::StridedVecOrMat{T}, B::LQPackedQ{T}) where {T<:BlasFloat} = LAPACK.ormlq!('R', 'N', B.factors, B.τ, A) -rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} = +rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasReal} = (B = adjB.Q; LAPACK.ormlq!('R', 'T', B.factors, B.τ, A)) -rmul!(A::StridedMatrix{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} = +rmul!(A::StridedVecOrMat{T}, adjB::AdjointQ{<:Any,<:LQPackedQ{T}}) where {T<:BlasComplex} = (B = adjB.Q; LAPACK.ormlq!('R', 'C', B.factors, B.τ, A)) -# out-of-place right application of LQPackedQs -# -# these methods: (1) check whether the applied-to matrix's (A's) appropriate dimension -# (columns for A_*, rows for Ac_*) matches the number of columns (nQ) of the LQPackedQ (Q), -# and if so effectively apply Q's square form to A without additional shenanigans; and -# (2) if the preceding dimensions do not match, check whether the appropriate dimension of -# A instead matches the number of rows of the matrix of which Q is a factor (i.e. -# size(Q.factors, 1)), and if so implicitly apply Q's truncated form to A by zero extending -# A as necessary for check (1) to pass (if possible) and then applying Q's square form -# -function (*)(A::AbstractVector, Q::LQPackedQ) - T = promote_type(eltype(A), eltype(Q)) - if 1 == size(Q.factors, 2) - C = copy_similar(A, T) - elseif 1 == size(Q.factors, 1) - C = zeros(T, length(A), size(Q.factors, 2)) - copyto!(C, 1, A, 1, length(A)) - else - _rightappdimmismatch("columns") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -function (*)(A::AbstractMatrix, Q::LQPackedQ) - T = promote_type(eltype(A), eltype(Q)) - if size(A, 2) == size(Q.factors, 2) - C = copy_similar(A, T) - elseif size(A, 2) == size(Q.factors, 1) - C = zeros(T, size(A, 1), size(Q.factors, 2)) - copyto!(C, 1, A, 1, length(A)) - else - _rightappdimmismatch("columns") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -function (*)(adjA::AdjointAbsMat, Q::LQPackedQ) - A = adjA.parent - T = promote_type(eltype(A), eltype(Q)) - if size(A, 1) == size(Q.factors, 2) - C = copy_similar(adjA, T) - elseif size(A, 1) == size(Q.factors, 1) - C = zeros(T, size(A, 2), size(Q.factors, 2)) - adjoint!(view(C, :, 1:size(A, 1)), A) - else - _rightappdimmismatch("rows") - end - return rmul!(C, convert(AbstractQ{T}, Q)) -end -(*)(u::AdjointAbsVec, Q::LQPackedQ) = (Q'u')' - -_rightappdimmismatch(rowsorcols) = - throw(DimensionMismatch(string("the number of $(rowsorcols) of the matrix on the left ", - "must match either (1) the number of columns of the (LQPackedQ) matrix on the right ", - "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ", - "(the factorization's originating matrix's number of rows)"))) +### QB / QcB +lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B) +lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} = + (A = adjA.Q; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B)) +lmul!(adjA::AdjointQ{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} = + (A = adjA.Q; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B)) # In LQ factorization, `Q` is expressed as the product of the adjoint of the # reflectors. Thus, `det` has to be conjugated. diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl index b5071b178de10..179f93f2cd6f2 100644 --- a/stdlib/LinearAlgebra/src/hessenberg.jl +++ b/stdlib/LinearAlgebra/src/hessenberg.jl @@ -449,8 +449,7 @@ julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.] julia> F = hessenberg(A) Hessenberg{Float64, UpperHessenberg{Float64, Matrix{Float64}}, Matrix{Float64}, Vector{Float64}, Bool} -Q factor: -3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false} +Q factor: 3×3 LinearAlgebra.HessenbergQ{Float64, Matrix{Float64}, Vector{Float64}, false} H factor: 3×3 UpperHessenberg{Float64, Matrix{Float64}}: 4.0 -11.3137 -1.41421 @@ -477,7 +476,7 @@ function show(io::IO, mime::MIME"text/plain", F::Hessenberg) if !iszero(F.μ) print("\nwith shift μI for μ = ", F.μ) end - println(io, "\nQ factor:") + print(io, "\nQ factor: ") show(io, mime, F.Q) println(io, "\nH factor:") show(io, mime, F.H) diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl index 33d794906c7e6..07d918c4374a5 100644 --- a/stdlib/LinearAlgebra/src/lq.jl +++ b/stdlib/LinearAlgebra/src/lq.jl @@ -27,8 +27,7 @@ L factor: 2×2 Matrix{Float64}: -8.60233 0.0 4.41741 -0.697486 -Q factor: -2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} +Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} julia> S.L * S.Q 2×2 Matrix{Float64}: @@ -97,8 +96,7 @@ L factor: 2×2 Matrix{Float64}: -8.60233 0.0 4.41741 -0.697486 -Q factor: -2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} +Q factor: 2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}} julia> S.L * S.Q 2×2 Matrix{Float64}: @@ -154,7 +152,7 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ) summary(io, F); println(io) println(io, "L factor:") show(io, mime, F.L) - println(io, "\nQ factor:") + print(io, "\nQ factor: ") show(io, mime, F.Q) end diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl index 43d04ac5fa415..fe40fec78e801 100644 --- a/stdlib/LinearAlgebra/src/qr.jl +++ b/stdlib/LinearAlgebra/src/qr.jl @@ -314,8 +314,7 @@ julia> a = [1. 2.; 3. 4.] julia> qr!(a) LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}} -Q factor: -2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} +Q factor: 2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} R factor: 2×2 Matrix{Float64}: -3.16228 -4.42719 @@ -379,7 +378,7 @@ Multiplication with respect to either full/square or non-full/square `Q` is allo and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix with [`Matrix`](@ref). This operation returns the "thin" Q factor, i.e., if `A` is `m`×`n` with `m>=n`, then `Matrix(F.Q)` yields an `m`×`n` matrix with orthonormal columns. To retrieve the "full" Q factor, an -`m`×`m` orthogonal matrix, use `F.Q*I`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m` +`m`×`m` orthogonal matrix, use `F.Q*I` or `collect(F.Q)`. If `m<=n`, then `Matrix(F.Q)` yields an `m`×`m` orthogonal matrix. The block size for QR decomposition can be specified by keyword argument @@ -399,8 +398,7 @@ julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0] julia> F = qr(A) LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}} -Q factor: -3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} +Q factor: 3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}} R factor: 2×2 Matrix{Float64}: -5.0 10.0 @@ -452,7 +450,7 @@ Array(F::QRPivoted) = Matrix(F) function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{QR, QRCompactWY, QRPivoted}) summary(io, F); println(io) - println(io, "Q factor:") + print(io, "Q factor: ") show(io, mime, F.Q) println(io, "\nR factor:") show(io, mime, F.R) diff --git a/stdlib/LinearAlgebra/test/abstractq.jl b/stdlib/LinearAlgebra/test/abstractq.jl index e3f48c7b2e3fd..83a26c6050484 100644 --- a/stdlib/LinearAlgebra/test/abstractq.jl +++ b/stdlib/LinearAlgebra/test/abstractq.jl @@ -20,8 +20,8 @@ n = 5 Base.size(Q::MyQ) = size(Q.Q) LinearAlgebra.lmul!(Q::MyQ, B::AbstractVecOrMat) = lmul!(Q.Q, B) LinearAlgebra.lmul!(adjQ::AdjointQ{<:Any,<:MyQ}, B::AbstractVecOrMat) = lmul!(parent(adjQ).Q', B) - LinearAlgebra.rmul!(A::AbstractMatrix, Q::MyQ) = rmul!(A, Q.Q) - LinearAlgebra.rmul!(A::AbstractMatrix, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q') + LinearAlgebra.rmul!(A::AbstractVecOrMat, Q::MyQ) = rmul!(A, Q.Q) + LinearAlgebra.rmul!(A::AbstractVecOrMat, adjQ::AdjointQ{<:Any,<:MyQ}) = rmul!(A, parent(adjQ).Q') Base.convert(::Type{AbstractQ{T}}, Q::MyQ) where {T} = MyQ{T}(Q.Q) LinearAlgebra.det(Q::MyQ) = det(Q.Q) @@ -84,6 +84,17 @@ n = 5 @test Q * x ≈ Q.Q * x @test Q' * x ≈ Q.Q' * x end + A = rand(Float64, 5, 3) + F = qr(A) + Q = MyQ(F.Q) + Prect = Matrix(F.Q) + Psquare = collect(F.Q) + @test Q == Prect + @test Q == Psquare + @test Q == F.Q*I + @test Q ≈ Prect + @test Q ≈ Psquare + @test Q ≈ F.Q*I end end # module diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl index 91e4e1b1b3df0..61e498211ca7b 100644 --- a/stdlib/LinearAlgebra/test/hessenberg.jl +++ b/stdlib/LinearAlgebra/test/hessenberg.jl @@ -158,7 +158,7 @@ let n = 10 hessstring = sprint((t, s) -> show(t, "text/plain", s), H) qstring = sprint((t, s) -> show(t, "text/plain", s), H.Q) hstring = sprint((t, s) -> show(t, "text/plain", s), H.H) - @test hessstring == "$(summary(H))\nQ factor:\n$qstring\nH factor:\n$hstring" + @test hessstring == "$(summary(H))\nQ factor: $qstring\nH factor:\n$hstring" #iterate q,h = H diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl index 8b4af6a0a5f8d..6bdc4efa5d6dd 100644 --- a/stdlib/LinearAlgebra/test/lq.jl +++ b/stdlib/LinearAlgebra/test/lq.jl @@ -213,8 +213,7 @@ L factor: 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 -Q factor: -4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}""" +Q factor: 4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}""" end @testset "adjoint of LQ" begin diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl index 6e2e9a7b20603..184971da304f7 100644 --- a/stdlib/LinearAlgebra/test/qr.jl +++ b/stdlib/LinearAlgebra/test/qr.jl @@ -69,7 +69,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q) qrstring = sprint((t, s) -> show(t, "text/plain", s), qra) rstring = sprint((t, s) -> show(t, "text/plain", s), r) qstring = sprint((t, s) -> show(t, "text/plain", s), q) - @test qrstring == "$(summary(qra))\nQ factor:\n$qstring\nR factor:\n$rstring" + @test qrstring == "$(summary(qra))\nQ factor: $qstring\nR factor:\n$rstring" # iterate q, r = qra @test q*r ≈ a @@ -155,7 +155,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = Matrix(Q) rstring = sprint((t, s) -> show(t, "text/plain", s), r) qstring = sprint((t, s) -> show(t, "text/plain", s), q) pstring = sprint((t, s) -> show(t, "text/plain", s), p) - @test qrstring == "$(summary(qrpa))\nQ factor:\n$qstring\nR factor:\n$rstring\npermutation:\n$pstring" + @test qrstring == "$(summary(qrpa))\nQ factor: $qstring\nR factor:\n$rstring\npermutation:\n$pstring" # iterate q, r, p = qrpa @test q*r[:,invperm(p)] ≈ a[:,1:n1] From 39a4013ded7775304554a303de53c46a467fd488 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 25 Jun 2023 12:23:11 -0400 Subject: [PATCH 228/290] Fix indexing error in TwoPhaseDefUseMap (#50280) There was an off-by-one in the indexing for TwoPhaseDefUseMap, causing def-use chains to not be properly visited. We don't use this functionality much in base, because it's only active for irinterp on functions with loops that were shown to terminate, which the compiler currently does not generally have the power to do, but I saw it in some downstream experiments. --- base/compiler/inferencestate.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index 97a7ed66ab9b5..c4608dd5781e1 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -148,8 +148,9 @@ function kill_def_use!(tpdum::TwoPhaseDefUseMap, def::Int, use::Int) ndata = tpdum.data[idx+1] ndata == 0 && break tpdum.data[idx] = ndata + idx += 1 end - tpdum.data[idx + 1] = 0 + tpdum.data[idx] = 0 end end kill_def_use!(tpdum::TwoPhaseDefUseMap, def::SSAValue, use::Int) = From 879f6d482420e181f17af60d361b601cbcc204f9 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Sun, 25 Jun 2023 14:12:43 -0400 Subject: [PATCH 229/290] SROA: re-compute the type for inserted phi nodes (#50287) Fixes the immutable case of #50285. --- base/compiler/ssair/passes.jl | 22 +++++++++------------- test/compiler/irpasses.jl | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 882944810313c..3adf16941764b 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -364,7 +364,7 @@ const LiftedDefs = IdDict{Any, Bool} # try to compute lifted values that can replace `getfield(x, field)` call # where `x` is an immutable struct that are defined at any of `leaves` -function lift_leaves(compact::IncrementalCompact, @nospecialize(result_t), field::Int, +function lift_leaves(compact::IncrementalCompact, field::Int, leaves::Vector{Any}, 𝕃ₒ::AbstractLattice) # For every leaf, the lifted value lifted_leaves = LiftedLeaves() @@ -394,15 +394,6 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(result_t), field continue end return nothing - # Expand the Expr(:new) to include it's element Expr(:new) nodes up until the one we want - compact[leaf] = nothing - for i = (length(def.args) + 1):(1+field) - ftyp = fieldtype(typ, i - 1) - isbitstype(ftyp) || return nothing - ninst = effect_free(NewInstruction(Expr(:new, ftyp), result_t)) - push!(def.args, insert_node!(compact, leaf, ninst)) - end - compact[leaf] = def end lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves) continue @@ -1080,11 +1071,16 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) leaves, visited_philikes = collect_leaves(compact, val, struct_typ, 𝕃ₒ) isempty(leaves) && continue - result_t = argextype(SSAValue(idx), compact) - lifted_result = lift_leaves(compact, result_t, field, leaves, 𝕃ₒ) + lifted_result = lift_leaves(compact, field, leaves, 𝕃ₒ) lifted_result === nothing && continue lifted_leaves, any_undef = lifted_result + result_t = Union{} + for v in values(lifted_leaves) + v === nothing && continue + result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact)) + end + lifted_val = perform_lifting!(compact, visited_philikes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree) @@ -1098,7 +1094,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) lifted_leaves_def[k] = v === nothing ? false : true end def_val = perform_lifting!(compact, - visited_philikes, field, def_lifting_cache, result_t, lifted_leaves_def, val, lazydomtree).val + visited_philikes, field, def_lifting_cache, Bool, lifted_leaves_def, val, lazydomtree).val end insert_node!(compact, SSAValue(idx), non_effect_free(NewInstruction( Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing))) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index f3c74df884cad..3ec8c15c919e2 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -1270,3 +1270,22 @@ end return strct.b end @test fully_eliminated(one_const_field_partial; retval=5) + +# Test that SROA updates the type of intermediate phi nodes (#50285) +struct Immut50285 + x::Any +end + +function immut50285(b, x, y) + if b + z = Immut50285(x) + else + z = Immut50285(y) + end + z.x::Union{Float64, Int} +end + +let src = code_typed1(immut50285, Tuple{Bool, Int, Float64}) + @test count(isnew, src.code) == 0 + @test count(iscall((src, typeassert)), src.code) == 0 +end From ed338d000a874241032b3d553440d1ac75c3dd21 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 26 Jun 2023 03:27:33 -0400 Subject: [PATCH 230/290] lattice: Restore `tmerge_fast_path` for optimizer lattice (#50291) This function is slightly misnamed, because, while it does perform some fast pathing, the rest of the tmerge functions assume it has been run. When I removed the OptimizerLattice in #50257, we accidentally lost the call to `tmerge_fast_path` in the optimizer_lattice path, regressing tmerge quality in the optimizer (which admittedly we don't use a lot, but I did just add another use for in #50287). The proper fix is probably to split the tmerge into a helper that always calls the fast path and then delegates to the current implementation for everything else, but for now, just try moving the fast path to the start of the PartialsLattice, which should restore correctness and have reasonable performance, since there's only a handful of pointer checks between the entry to the inference tmerge and the partials lattice. --- base/compiler/typelimits.jl | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 81a176dff1c9d..b648144ea3bd1 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -456,8 +456,6 @@ end return tmerge_limited(lattice, typea, typeb) end - r = tmerge_fast_path(widenlattice(lattice), typea, typeb) - r !== nothing && return r return tmerge(widenlattice(lattice), typea, typeb) end @@ -491,8 +489,13 @@ end end return Bool end - typea = widenconditional(typea) - typeb = widenconditional(typeb) + if isa(typea, Conditional) + typeb === Union{} && return typea + typea = widenconditional(typea) + elseif isa(typeb, Conditional) + typea === Union{} && return typeb + typeb = widenconditional(typeb) + end return tmerge(widenlattice(lattice), typea, typeb) end @@ -526,14 +529,25 @@ end end return Bool end - typea = widenconditional(typea) - typeb = widenconditional(typeb) + if isa(typea, InterConditional) + typeb === Union{} && return typea + typea = widenconditional(typea) + elseif isa(typeb, InterConditional) + typea === Union{} && return typeb + typeb = widenconditional(typeb) + end return tmerge(widenlattice(lattice), typea, typeb) end @nospecializeinfer function tmerge(𝕃::AnyMustAliasesLattice, @nospecialize(typea), @nospecialize(typeb)) - typea = widenmustalias(typea) - typeb = widenmustalias(typeb) + if is_valid_lattice_norec(𝕃, typea) + typeb === Union{} && return typea + typea = widenmustalias(typea) + end + if is_valid_lattice_norec(𝕃, typeb) + typea === Union{} && return typeb + typeb = widenmustalias(typeb) + end return tmerge(widenlattice(𝕃), typea, typeb) end @@ -598,6 +612,9 @@ end end @nospecializeinfer function tmerge(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb)) + r = tmerge_fast_path(lattice, typea, typeb) + r !== nothing && return r + # type-lattice for Const and PartialStruct wrappers aps = isa(typea, PartialStruct) bps = isa(typeb, PartialStruct) From d01b8cd32abd04d422b4b198901f3de206c051c6 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Mon, 26 Jun 2023 11:06:22 +0200 Subject: [PATCH 231/290] Add type assertion to inner HermOrSym constructor (#50282) --- stdlib/LinearAlgebra/src/symmetric.jl | 4 ++-- stdlib/LinearAlgebra/test/symmetric.jl | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl index 1fc0c6aa0d94d..fa3464e93230b 100644 --- a/stdlib/LinearAlgebra/src/symmetric.jl +++ b/stdlib/LinearAlgebra/src/symmetric.jl @@ -5,7 +5,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T} data::S uplo::Char - function Symmetric{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}} + function Symmetric{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}} require_one_based_indexing(data) (uplo != 'U' && uplo != 'L') && throw_uplo() new{T,S}(data, uplo) @@ -98,7 +98,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T} data::S uplo::Char - function Hermitian{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}} + function Hermitian{T,S}(data, uplo::Char) where {T,S<:AbstractMatrix{<:T}} require_one_based_indexing(data) (uplo != 'U' && uplo != 'L') && throw_uplo() new{T,S}(data, uplo) diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl index c901f9b2facc3..224b7b31a50df 100644 --- a/stdlib/LinearAlgebra/test/symmetric.jl +++ b/stdlib/LinearAlgebra/test/symmetric.jl @@ -64,6 +64,9 @@ end @test_throws ArgumentError Symmetric(asym, :R) @test_throws ArgumentError Hermitian(asym, :R) + @test_throws MethodError Symmetric{eltya,typeof(asym)}(asym, :L) + @test_throws MethodError Hermitian{eltya,typeof(aherm)}(aherm, :L) + # mixed cases with Hermitian/Symmetric if eltya <: Real @test Symmetric(Hermitian(aherm, :U)) === Symmetric(aherm, :U) From aa9b9d0e0eb61e1efac262076e82079eecc75f78 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 26 Jun 2023 05:36:20 -0400 Subject: [PATCH 232/290] compact: Propagate IR_FLAG_REFINED (#50286) This is a more invasive version of #50281, implementing alternative 2. Because this solves the same problem, the motivation is the same, so quoth much of the commit message: In #49340, I added an ir flag (currently only set by sroa) to allow sparse-reinference of ir after optimization passes that may improve type information. However, as currently implemented, this flag gets dropped when the IR is compacted, defeating the purpose of the flag, because it can no longer be reliably used for sparse re-inference. This commit adds a special `Refined` marker in ssa_rename that lets compact propagate the IR_FLAG_REFINED flag to any users of statements that may have been compacted away. --- base/compiler/ssair/ir.jl | 87 ++++++++++++++++++++++++--------- base/compiler/ssair/irinterp.jl | 4 +- base/compiler/ssair/passes.jl | 32 +++++++++--- 3 files changed, 91 insertions(+), 32 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 2d8c2a076b2eb..699ee7ba98091 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1078,11 +1078,19 @@ function getindex(view::TypesView, idx::NewSSAValue) return view.ir[idx][:type] end +# N.B.: Don't make this <: Function to avoid ::Function deopt +struct Refiner + result_flags::Vector{UInt8} + result_idx::Int +end +(this::Refiner)() = (this.result_flags[this.result_idx] |= IR_FLAG_REFINED; nothing) + function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}, processed_idx::Int, result_idx::Int, ssa_rename::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, - do_rename_ssa::Bool) + do_rename_ssa::Bool, + mark_refined!::Union{Refiner, Nothing}) values = Vector{Any}(undef, length(old_values)) for i = 1:length(old_values) isassigned(old_values, i) || continue @@ -1093,7 +1101,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} push!(late_fixup, result_idx) val = OldSSAValue(val.id) else - val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa) + val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) end else used_ssas[val.id] += 1 @@ -1103,7 +1111,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} push!(late_fixup, result_idx) else # Always renumber these. do_rename_ssa applies only to actual SSAValues - val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true) + val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true, mark_refined!) end elseif isa(val, NewSSAValue) if val.id < 0 @@ -1120,7 +1128,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} end function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, - new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool) + new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) id = val.id if do_rename_ssa if id > length(ssanums) @@ -1128,6 +1136,10 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In end val = ssanums[id] end + if isa(val, Refined) + val = val.val + mark_refined! !== nothing && mark_refined!() + end if isa(val, SSAValue) used_ssas[val.id] += 1 end @@ -1135,7 +1147,7 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In end function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, - new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool) + new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) if val.id < 0 new_new_used_ssas[-val.id] += 1 return val @@ -1145,7 +1157,7 @@ function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector end end -function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) +function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool, mark_refined!::Union{Refiner, Nothing}) urs = userefs(stmt) for op in urs val = op[] @@ -1153,7 +1165,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Ve push!(late_fixup, result_idx) end if isa(val, Union{SSAValue, NewSSAValue}) - val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa) + val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) end if isa(val, OldSSAValue) || isa(val, NewSSAValue) push!(late_fixup, result_idx) @@ -1245,11 +1257,16 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: nothing end +struct Refined + val::Any + Refined(@nospecialize(val)) = new(val) +end + function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool) stmt = inst[:inst] (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact (; cfg_transforms_enabled, fold_constant_branches, bb_rename_succ, bb_rename_pred, result_bbs) = compact.cfg_transform - ssa_rename[idx] = SSAValue(result_idx) + mark_refined! = Refiner(result.flag, result_idx) if stmt === nothing ssa_rename[idx] = stmt elseif isa(stmt, OldSSAValue) @@ -1257,6 +1274,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr elseif isa(stmt, GotoNode) && cfg_transforms_enabled label = bb_rename_succ[stmt.label] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoNode(label) result_idx += 1 elseif isa(stmt, GlobalRef) @@ -1265,14 +1283,16 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr if (flag & total_flags) == total_flags ssa_rename[idx] = stmt else + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 end elseif isa(stmt, GotoNode) + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::GotoIfNot + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::GotoIfNot result[result_idx][:inst] = stmt cond = stmt.cond if fold_constant_branches @@ -1284,12 +1304,14 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr isa(cond, Bool) || @goto bail end if cond + ssa_rename[idx] = nothing result[result_idx][:inst] = nothing kill_edge!(compact, active_bb, active_bb, stmt.dest) # Don't increment result_idx => Drop this statement else label = bb_rename_succ[stmt.dest] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoNode(label) kill_edge!(compact, active_bb, active_bb, active_bb+1) result_idx += 1 @@ -1298,11 +1320,12 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr @label bail label = bb_rename_succ[stmt.dest] @assert label > 0 + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = GotoIfNot(cond, label) result_idx += 1 end elseif isa(stmt, Expr) - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::Expr if cfg_transforms_enabled && isexpr(stmt, :enter) label = bb_rename_succ[stmt.args[1]::Int] @assert label > 0 @@ -1312,16 +1335,18 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr if isa(cond, Bool) && cond === true # cond was folded to true - this statement # is dead. + ssa_rename[idx] = nothing return result_idx end end + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, PiNode) # As an optimization, we eliminate any trivial pinodes. For performance, we use === # type equality. We may want to consider using == in either a separate pass or if # performance turns out ok - stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::PiNode + stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!)::PiNode pi_val = stmt.val if isa(pi_val, SSAValue) if stmt.typ === result[pi_val.id][:type] @@ -1341,12 +1366,17 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr return result_idx end end + ssa_rename[idx] = SSAValue(result_idx) result[result_idx][:inst] = stmt result_idx += 1 elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot) - result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa) + ssa_rename[idx] = SSAValue(result_idx) + result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa, mark_refined!) result_idx += 1 elseif isa(stmt, PhiNode) + # N.B.: For PhiNodes, this needs to be at the top, since PhiNodes + # can self-reference. + ssa_rename[idx] = SSAValue(result_idx) if cfg_transforms_enabled # Rename phi node edges let bb_rename_pred=bb_rename_pred @@ -1364,6 +1394,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr # not a value we can copy), we copy only the edges and (defined) # values we want to keep to new arrays initialized with undefined # elements. + edges = Vector{Int32}(undef, length(stmt.edges)) values = Vector{Any}(undef, length(stmt.values)) new_index = 1 @@ -1383,7 +1414,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr values = stmt.values end - values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa) + values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!) # Don't remove the phi node if it is before the definition of its value # because doing so can create forward references. This should only # happen with dead loops, but can cause problems when optimization @@ -1407,18 +1438,25 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result_idx += 1 end elseif isa(stmt, PhiCNode) - result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)) + ssa_rename[idx] = SSAValue(result_idx) + result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa, mark_refined!)) result_idx += 1 - elseif isa(stmt, SSAValue) - # identity assign, replace uses of this ssa value with its result - if do_rename_ssa - stmt = ssa_rename[stmt.id] - end - ssa_rename[idx] = stmt - elseif isa(stmt, NewSSAValue) - ssa_rename[idx] = SSAValue(stmt.id) else - # Constant assign, replace uses of this ssa value with its result + if isa(stmt, SSAValue) + # identity assign, replace uses of this ssa value with its result + if do_rename_ssa + stmt = ssa_rename[stmt.id] + end + elseif isa(stmt, NewSSAValue) + stmt = SSAValue(stmt.id) + else + # Constant assign, replace uses of this ssa value with its result + end + if (inst[:flag] & IR_FLAG_REFINED) != 0 && !isa(stmt, Refined) + # If we're compacting away an instruction that was marked as refined, + # leave a marker in the ssa_rename, so we can taint any users. + stmt = Refined(stmt) + end ssa_rename[idx] = stmt end return result_idx @@ -1689,6 +1727,9 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt), reify_new_ end elseif isa(stmt, OldSSAValue) val = compact.ssa_rename[stmt.id] + if isa(val, Refined) + val = val.val + end if isa(val, SSAValue) compact.used_ssas[val.id] += 1 end diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index 1ba751f6d3fd4..0a738371aca6e 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -122,7 +122,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union rt = nothing if isa(inst, Expr) head = inst.head - if head === :call || head === :foreigncall || head === :new || head === :splatnew + if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined (; rt, effects) = abstract_eval_statement_expr(interp, inst, nothing, irsv) ir.stmts[idx][:flag] |= flags_for_effects(effects) elseif head === :invoke @@ -149,7 +149,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union elseif isa(inst, GlobalRef) # GlobalRef is not refinable else - error("reprocess_instruction!: unhandled instruction found") + rt = argextype(inst, irsv.ir) end if rt !== nothing if isa(rt, Const) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 3adf16941764b..305d8a9d1e318 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -190,6 +190,9 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA if isa(defssa, OldSSAValue) if already_inserted(compact, defssa) rename = compact.ssa_rename[defssa.id] + if isa(rename, Refined) + rename = rename.val + end if isa(rename, AnySSAValue) defssa = rename continue @@ -463,6 +466,9 @@ function lift_arg!( lifted = OldSSAValue(lifted.id) if already_inserted(compact, lifted) lifted = compact.ssa_rename[lifted.id] + if isa(lifted, Refined) + lifted = lifted.val + end end end if isa(lifted, GlobalRef) || isa(lifted, Expr) @@ -481,6 +487,9 @@ end function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf)) if isa(leaf, OldSSAValue) && already_inserted(compact, leaf) leaf = compact.ssa_rename[leaf.id] + if isa(leaf, Refined) + leaf = leaf.val + end if isa(leaf, AnySSAValue) leaf = simple_walk(compact, leaf) end @@ -886,6 +895,14 @@ end # which can be very large sometimes, and program counters in question are often very sparse const SPCSet = IdSet{Int} +struct IntermediaryCollector + intermediaries::SPCSet +end +function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa)) + push!(this.intermediaries, ssa.id) + return false +end + """ sroa_pass!(ir::IRCode) -> newir::IRCode @@ -1013,7 +1030,6 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) # analyze `getfield` / `isdefined` / `setfield!` call val = stmt.args[2] end - struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) if isa(struct_typ, Union) && struct_typ <: Tuple struct_typ = unswitchtupleunion(struct_typ) @@ -1030,14 +1046,12 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) continue end + # analyze this mutable struct here for the later pass if ismutabletype(struct_typ) isa(val, SSAValue) || continue let intermediaries = SPCSet() - callback = function (@nospecialize(pi), @nospecialize(ssa)) - push!(intermediaries, ssa.id) - return false - end + callback = IntermediaryCollector(intermediaries) def = simple_walk(compact, val, callback) # Mutable stuff here isa(def, SSAValue) || continue @@ -1105,7 +1119,11 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) end compact[idx] = lifted_val === nothing ? nothing : lifted_val.val - compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED + if lifted_val !== nothing + if !⊑(𝕃ₒ, compact[SSAValue(idx)][:type], result_t) + compact[SSAValue(idx)][:flag] |= IR_FLAG_REFINED + end + end end non_dce_finish!(compact) @@ -2118,7 +2136,7 @@ function cfg_simplify!(ir::IRCode) (; ssa_rename, late_fixup, used_ssas, new_new_used_ssas) = compact ssa_rename[i] = SSAValue(compact.result_idx) processed_idx = i - renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true) + renamed_values = process_phinode_values(values, late_fixup, processed_idx, compact.result_idx, ssa_rename, used_ssas, new_new_used_ssas, true, nothing) edges = Int32[] values = Any[] sizehint!(edges, length(phi.edges)); sizehint!(values, length(renamed_values)) From 0d52f8d58337ce95a2afa3899b715d9f1f6625d2 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Mon, 26 Jun 2023 22:21:03 +0900 Subject: [PATCH 233/290] SROA (mutable): re-compute the type for inserted phi nodes (#50294) Fixes the mutable case of #50285. closes #50285 --- base/compiler/ssair/passes.jl | 8 ++++++-- test/compiler/irpasses.jl | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 305d8a9d1e318..f2ef2e9d47ee1 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1336,6 +1336,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse end function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) + 𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp) lazypostdomtree = LazyPostDomtree(ir) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) @@ -1491,11 +1492,14 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse end for b in phiblocks n = ir[phinodes[b]][:inst]::PhiNode + result_t = Bottom for p in ir.cfg.blocks[b].preds push!(n.edges, p) - push!(n.values, compute_value_for_block(ir, domtree, - allblocks, du, phinodes, fidx, p)) + v = compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, p) + push!(n.values, v) + result_t = tmerge(𝕃ₒ, result_t, argextype(v, ir)) end + ir[phinodes[b]][:type] = result_t end end all_eliminated || continue diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 3ec8c15c919e2..f27961c526559 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -1289,3 +1289,18 @@ let src = code_typed1(immut50285, Tuple{Bool, Int, Float64}) @test count(isnew, src.code) == 0 @test count(iscall((src, typeassert)), src.code) == 0 end + +function mut50285(b, x, y) + z = Ref{Any}() + if b + z[] = x + else + z[] = y + end + z[]::Union{Float64, Int} +end + +let src = code_typed1(mut50285, Tuple{Bool, Int, Float64}) + @test count(isnew, src.code) == 0 + @test count(iscall((src, typeassert)), src.code) == 0 +end From 48c92c13573c764bde9b877e52435b757ee479ee Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Mon, 26 Jun 2023 12:29:41 -0300 Subject: [PATCH 234/290] initialize prev_nold and nold in gc_reset_page (#50289) --- src/gc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gc.c b/src/gc.c index 213eebae33b1a..02551dd965eb5 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1214,6 +1214,8 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_ jl_taggedvalue_t *beg = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET); pg->has_young = 0; pg->has_marked = 0; + pg->prev_nold = 0; + pg->nold = 0; pg->fl_begin_offset = UINT16_MAX; pg->fl_end_offset = UINT16_MAX; return beg; From 863057689bc3444c363147a101770250b6b8977c Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Mon, 26 Jun 2023 16:06:42 +0000 Subject: [PATCH 235/290] Reduce memory usage during image build (#50237) --- src/aotcompile.cpp | 508 ++++++++++++++++++++++--------------------- src/codegen-stubs.c | 2 +- src/julia_internal.h | 2 +- src/precompile.c | 43 ++-- 4 files changed, 280 insertions(+), 275 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index b1bef232e0915..1f02a014175b4 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -952,10 +952,15 @@ struct ShardTimers { void emitFloat16Wrappers(Module &M, bool external); +struct AOTOutputs { + SmallVector unopt, opt, obj, asm_; +}; + // Perform the actual optimization and emission of the output files -static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *outputs, const std::string *names, - NewArchiveMember *unopt, NewArchiveMember *opt, NewArchiveMember *obj, NewArchiveMember *asm_, - ShardTimers &timers, unsigned shardidx) { +static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimers &timers, + bool unopt, bool opt, bool obj, bool asm_) { + assert((unopt || opt || obj || asm_) && "no output requested"); + AOTOutputs out; auto TM = std::unique_ptr( SourceTM.getTarget().createTargetMachine( SourceTM.getTargetTriple().str(), @@ -968,17 +973,16 @@ static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *out if (unopt) { timers.unopt.startTimer(); - raw_string_ostream OS(*outputs); + raw_svector_ostream OS(out.unopt); PassBuilder PB; AnalysisManagers AM{*TM, PB, OptimizationLevel::O0}; ModulePassManager MPM; MPM.addPass(BitcodeWriterPass(OS)); MPM.run(M, AM.MAM); - *unopt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.unopt.stopTimer(); } if (!opt && !obj && !asm_) { - return; + return out; } assert(!verifyModule(M, &errs())); @@ -1037,43 +1041,38 @@ static void add_output_impl(Module &M, TargetMachine &SourceTM, std::string *out if (opt) { timers.opt.startTimer(); - raw_string_ostream OS(*outputs); + raw_svector_ostream OS(out.opt); PassBuilder PB; AnalysisManagers AM{*TM, PB, OptimizationLevel::O0}; ModulePassManager MPM; MPM.addPass(BitcodeWriterPass(OS)); MPM.run(M, AM.MAM); - *opt = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.opt.stopTimer(); } if (obj) { timers.obj.startTimer(); - SmallVector Buffer; - raw_svector_ostream OS(Buffer); + raw_svector_ostream OS(out.obj); legacy::PassManager emitter; addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false)) jl_safe_printf("ERROR: target does not support generation of object files\n"); emitter.run(M); - *outputs = { Buffer.data(), Buffer.size() }; - *obj = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.obj.stopTimer(); } if (asm_) { timers.asm_.startTimer(); - SmallVector Buffer; - raw_svector_ostream OS(Buffer); + raw_svector_ostream OS(out.asm_); legacy::PassManager emitter; addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis()); if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false)) jl_safe_printf("ERROR: target does not support generation of assembly files\n"); emitter.run(M); - *outputs = { Buffer.data(), Buffer.size() }; - *asm_ = NewArchiveMember(MemoryBufferRef(*outputs++, *names++)); timers.asm_.stopTimer(); } + + return out; } // serialize module to bitcode @@ -1233,20 +1232,12 @@ static void dropUnusedGlobals(Module &M) { // Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading, // as well as partitioning, serialization, and deserialization. -static void add_output(Module &M, TargetMachine &TM, std::vector &outputs, StringRef name, - std::vector &unopt, std::vector &opt, - std::vector &obj, std::vector &asm_, - bool unopt_out, bool opt_out, bool obj_out, bool asm_out, - unsigned threads, ModuleInfo module_info) { - unsigned outcount = unopt_out + opt_out + obj_out + asm_out; - assert(outcount); - outputs.resize(outputs.size() + outcount * threads * 2); - auto names_start = outputs.data() + outputs.size() - outcount * threads * 2; - auto outputs_start = names_start + outcount * threads; - unopt.resize(unopt.size() + unopt_out * threads); - opt.resize(opt.size() + opt_out * threads); - obj.resize(obj.size() + obj_out * threads); - asm_.resize(asm_.size() + asm_out * threads); +template +static SmallVector add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads, + bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) { + SmallVector outputs(threads); + assert(threads); + assert(unopt_out || opt_out || obj_out || asm_out); // Timers for timing purposes TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str()); SmallVector timers(threads); @@ -1282,28 +1273,13 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n"; } } - for (unsigned i = 0; i < threads; ++i) { - auto start = names_start + i * outcount; - auto istr = std::to_string(i); - if (unopt_out) - *start++ = (name + "_unopt#" + istr + ".bc").str(); - if (opt_out) - *start++ = (name + "_opt#" + istr + ".bc").str(); - if (obj_out) - *start++ = (name + "#" + istr + ".o").str(); - if (asm_out) - *start++ = (name + "#" + istr + ".s").str(); - } // Single-threaded case if (threads == 1) { output_timer.startTimer(); - add_output_impl(M, TM, outputs_start, names_start, - unopt_out ? unopt.data() + unopt.size() - 1 : nullptr, - opt_out ? opt.data() + opt.size() - 1 : nullptr, - obj_out ? obj.data() + obj.size() - 1 : nullptr, - asm_out ? asm_.data() + asm_.size() - 1 : nullptr, - timers[0], 0); + outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out); output_timer.stopTimer(); + // Don't need M anymore + module_released(M); if (!report_timings) { timer_group.clear(); @@ -1313,7 +1289,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o t.print(dbgs(), true); } } - return; + return outputs; } partition_timer.startTimer(); @@ -1332,17 +1308,15 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o auto serialized = serializeModule(M); serialize_timer.stopTimer(); - output_timer.startTimer(); + // Don't need M anymore, since we'll only read from serialized from now on + module_released(M); - auto unoptstart = unopt_out ? unopt.data() + unopt.size() - threads : nullptr; - auto optstart = opt_out ? opt.data() + opt.size() - threads : nullptr; - auto objstart = obj_out ? obj.data() + obj.size() - threads : nullptr; - auto asmstart = asm_out ? asm_.data() + asm_.size() - threads : nullptr; + output_timer.startTimer(); // Start all of the worker threads std::vector workers(threads); for (unsigned i = 0; i < threads; i++) { - workers[i] = std::thread([&, i](){ + workers[i] = std::thread([&, i]() { LLVMContext ctx; // Lazily deserialize the entire module timers[i].deserialize.startTimer(); @@ -1367,12 +1341,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o dropUnusedGlobals(*M); timers[i].deletion.stopTimer(); - add_output_impl(*M, TM, outputs_start + i * outcount, names_start + i * outcount, - unoptstart ? unoptstart + i : nullptr, - optstart ? optstart + i : nullptr, - objstart ? objstart + i : nullptr, - asmstart ? asmstart + i : nullptr, - timers[i], i); + outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out); }); } @@ -1400,6 +1369,7 @@ static void add_output(Module &M, TargetMachine &TM, std::vector &o } dbgs() << "]\n"; } + return outputs; } static unsigned compute_image_thread_count(const ModuleInfo &info) { @@ -1463,7 +1433,7 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_dump_native_impl(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s) + ios_t *z, ios_t *s) { JL_TIMING(NATIVE_AOT, NATIVE_Dump); jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; @@ -1472,14 +1442,11 @@ void jl_dump_native_impl(void *native_code, delete data; return; } - auto TSCtx = data->M.getContext(); - auto lock = TSCtx.getLock(); - LLVMContext &Context = *TSCtx.getContext(); // We don't want to use MCJIT's target machine because // it uses the large code model and we may potentially // want less optimizations there. // make sure to emit the native object format, even if FORCE_ELF was set in codegen - Triple TheTriple(data->M.getModuleUnlocked()->getTargetTriple()); + Triple TheTriple(data->M.withModuleDo([](Module &M) { return M.getTargetTriple(); })); if (TheTriple.isOSWindows()) { TheTriple.setObjectFormat(Triple::COFF); } else if (TheTriple.isOSDarwin()) { @@ -1505,21 +1472,48 @@ void jl_dump_native_impl(void *native_code, CMModel, CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag? )); + auto DL = jl_create_datalayout(*SourceTM); + std::string StackProtectorGuard; + unsigned OverrideStackAlignment; + data->M.withModuleDo([&](Module &M) { + StackProtectorGuard = M.getStackProtectorGuard().str(); + OverrideStackAlignment = M.getOverrideStackAlignment(); + }); + auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) { + return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released); + }; - std::vector bc_Archive; - std::vector obj_Archive; - std::vector asm_Archive; - std::vector unopt_bc_Archive; - std::vector outputs; - - // Reset the target triple to make sure it matches the new target machine - auto dataM = data->M.getModuleUnlocked(); - dataM->setTargetTriple(SourceTM->getTargetTriple().str()); - dataM->setDataLayout(jl_create_datalayout(*SourceTM)); - - Type *T_size = dataM->getDataLayout().getIntPtrType(Context); - Type *T_psize = T_size->getPointerTo(); + SmallVector sysimg_outputs; + SmallVector data_outputs; + SmallVector metadata_outputs; + if (z) { + LLVMContext Context; + Module sysimgM("sysimg", Context); + sysimgM.setTargetTriple(TheTriple.str()); + sysimgM.setDataLayout(DL); + sysimgM.setStackProtectorGuard(StackProtectorGuard); + sysimgM.setOverrideStackAlignment(OverrideStackAlignment); + Constant *data = ConstantDataArray::get(Context, + ArrayRef((const unsigned char*)z->buf, z->size)); + auto sysdata = new GlobalVariable(sysimgM, data->getType(), false, + GlobalVariable::ExternalLinkage, + data, "jl_system_image_data"); + sysdata->setAlignment(Align(64)); + addComdat(sysdata, TheTriple); + Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size); + addComdat(new GlobalVariable(sysimgM, len->getType(), true, + GlobalVariable::ExternalLinkage, + len, "jl_system_image_size"), TheTriple); + // Free z here, since we've copied out everything into data + // Results in serious memory savings + ios_close(z); + free(z); + // Note that we don't set z to null, this allows the check in WRITE_ARCHIVE + // to function as expected + // no need to free the module/context, destructor handles that + sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {}); + } bool imaging_mode = imaging_default() || jl_options.outputo; @@ -1527,191 +1521,201 @@ void jl_dump_native_impl(void *native_code, unsigned nfvars = 0; unsigned ngvars = 0; - ModuleInfo module_info = compute_module_info(*dataM); - LLVM_DEBUG(dbgs() - << "Dumping module with stats:\n" - << " globals: " << module_info.globals << "\n" - << " functions: " << module_info.funcs << "\n" - << " basic blocks: " << module_info.bbs << "\n" - << " instructions: " << module_info.insts << "\n" - << " clones: " << module_info.clones << "\n" - << " weight: " << module_info.weight << "\n" - ); - - // add metadata information - if (imaging_mode) { - multiversioning_preannotate(*dataM); - { - DenseSet fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end()); - for (auto &F : *dataM) { - if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) { - if (fvars.insert(&F).second) { - data->jl_sysimg_fvars.push_back(&F); + // Reset the target triple to make sure it matches the new target machine + + bool has_veccall = false; + + data->M.withModuleDo([&](Module &dataM) { + dataM.setTargetTriple(TheTriple.str()); + dataM.setDataLayout(DL); + auto &Context = dataM.getContext(); + + Type *T_psize = dataM.getDataLayout().getIntPtrType(Context)->getPointerTo(); + + // add metadata information + if (imaging_mode) { + multiversioning_preannotate(dataM); + { + DenseSet fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end()); + for (auto &F : dataM) { + if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) { + if (fvars.insert(&F).second) { + data->jl_sysimg_fvars.push_back(&F); + } } } } - } - threads = compute_image_thread_count(module_info); - LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n"); - nfvars = data->jl_sysimg_fvars.size(); - ngvars = data->jl_sysimg_gvars.size(); - emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize); - emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize); - std::vector idxs; - idxs.resize(data->jl_sysimg_gvars.size()); - std::iota(idxs.begin(), idxs.end(), 0); - auto gidxs = ConstantDataArray::get(Context, idxs); - auto gidxs_var = new GlobalVariable(*dataM, gidxs->getType(), true, - GlobalVariable::ExternalLinkage, - gidxs, "jl_gvar_idxs"); - gidxs_var->setVisibility(GlobalValue::HiddenVisibility); - gidxs_var->setDSOLocal(true); - idxs.clear(); - idxs.resize(data->jl_sysimg_fvars.size()); - std::iota(idxs.begin(), idxs.end(), 0); - auto fidxs = ConstantDataArray::get(Context, idxs); - auto fidxs_var = new GlobalVariable(*dataM, fidxs->getType(), true, - GlobalVariable::ExternalLinkage, - fidxs, "jl_fvar_idxs"); - fidxs_var->setVisibility(GlobalValue::HiddenVisibility); - fidxs_var->setDSOLocal(true); - dataM->addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0")); - - // reflect the address of the jl_RTLD_DEFAULT_handle variable - // back to the caller, so that we can check for consistency issues - GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(dataM); - addComdat(new GlobalVariable(*dataM, - jlRTLD_DEFAULT_var->getType(), - true, - GlobalVariable::ExternalLinkage, - jlRTLD_DEFAULT_var, - "jl_RTLD_DEFAULT_handle_pointer"), TheTriple); - // let the compiler know we are going to internalize a copy of this, - // if it has a current usage with ExternalLinkage - auto small_typeof_copy = dataM->getGlobalVariable("small_typeof"); - if (small_typeof_copy) { - small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); - small_typeof_copy->setDSOLocal(true); + ModuleInfo module_info = compute_module_info(dataM); + LLVM_DEBUG(dbgs() + << "Dumping module with stats:\n" + << " globals: " << module_info.globals << "\n" + << " functions: " << module_info.funcs << "\n" + << " basic blocks: " << module_info.bbs << "\n" + << " instructions: " << module_info.insts << "\n" + << " clones: " << module_info.clones << "\n" + << " weight: " << module_info.weight << "\n" + ); + threads = compute_image_thread_count(module_info); + LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n"); + nfvars = data->jl_sysimg_fvars.size(); + ngvars = data->jl_sysimg_gvars.size(); + emit_offset_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize); + emit_offset_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize); + std::vector idxs; + idxs.resize(data->jl_sysimg_gvars.size()); + std::iota(idxs.begin(), idxs.end(), 0); + auto gidxs = ConstantDataArray::get(Context, idxs); + auto gidxs_var = new GlobalVariable(dataM, gidxs->getType(), true, + GlobalVariable::ExternalLinkage, + gidxs, "jl_gvar_idxs"); + gidxs_var->setVisibility(GlobalValue::HiddenVisibility); + gidxs_var->setDSOLocal(true); + idxs.clear(); + idxs.resize(data->jl_sysimg_fvars.size()); + std::iota(idxs.begin(), idxs.end(), 0); + auto fidxs = ConstantDataArray::get(Context, idxs); + auto fidxs_var = new GlobalVariable(dataM, fidxs->getType(), true, + GlobalVariable::ExternalLinkage, + fidxs, "jl_fvar_idxs"); + fidxs_var->setVisibility(GlobalValue::HiddenVisibility); + fidxs_var->setDSOLocal(true); + dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0")); + + // reflect the address of the jl_RTLD_DEFAULT_handle variable + // back to the caller, so that we can check for consistency issues + GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&dataM); + addComdat(new GlobalVariable(dataM, + jlRTLD_DEFAULT_var->getType(), + true, + GlobalVariable::ExternalLinkage, + jlRTLD_DEFAULT_var, + "jl_RTLD_DEFAULT_handle_pointer"), TheTriple); + + // let the compiler know we are going to internalize a copy of this, + // if it has a current usage with ExternalLinkage + auto small_typeof_copy = dataM.getGlobalVariable("small_typeof"); + if (small_typeof_copy) { + small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); + small_typeof_copy->setDSOLocal(true); + } } - } - - // Reserve space for the output files and names - // DO NOT DELETE, this is necessary to ensure memorybuffers - // have a stable backing store for both their object files and - // their names - outputs.reserve((threads + 1) * (!!unopt_bc_fname + !!bc_fname + !!obj_fname + !!asm_fname) * 2); - - auto compile = [&](Module &M, StringRef name, unsigned threads) { add_output( - M, *SourceTM, outputs, name, - unopt_bc_Archive, bc_Archive, obj_Archive, asm_Archive, - !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, - threads, module_info - ); }; - compile(*dataM, "text", threads); - - auto sysimageM = std::make_unique("sysimage", Context); - sysimageM->setTargetTriple(dataM->getTargetTriple()); - sysimageM->setDataLayout(dataM->getDataLayout()); -#if JL_LLVM_VERSION >= 130000 - sysimageM->setStackProtectorGuard(dataM->getStackProtectorGuard()); - sysimageM->setOverrideStackAlignment(dataM->getOverrideStackAlignment()); -#endif + has_veccall = !!dataM.getModuleFlag("julia.mv.veccall"); + }); - if (TheTriple.isOSWindows()) { - // Windows expect that the function `_DllMainStartup` is present in an dll. - // Normal compilers use something like Zig's crtdll.c instead we provide a - // a stub implementation. - auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo(); - auto T_int32 = Type::getInt32Ty(Context); - auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false); - auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", *sysimageM); - F->setCallingConv(CallingConv::X86_StdCall); - - llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F)); - builder.CreateRet(ConstantInt::get(T_int32, 1)); - } - bool has_veccall = dataM->getModuleFlag("julia.mv.veccall"); - data->M = orc::ThreadSafeModule(); // free memory for data->M - - if (sysimg_data) { - Constant *data = ConstantDataArray::get(Context, - ArrayRef((const unsigned char*)sysimg_data, sysimg_len)); - auto sysdata = new GlobalVariable(*sysimageM, data->getType(), false, - GlobalVariable::ExternalLinkage, - data, "jl_system_image_data"); - sysdata->setAlignment(Align(64)); - addComdat(sysdata, TheTriple); - Constant *len = ConstantInt::get(T_size, sysimg_len); - addComdat(new GlobalVariable(*sysimageM, len->getType(), true, - GlobalVariable::ExternalLinkage, - len, "jl_system_image_size"), TheTriple); - } - if (imaging_mode) { - auto specs = jl_get_llvm_clone_targets(); - const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0; - std::vector data; - auto push_i32 = [&] (uint32_t v) { - uint8_t buff[4]; - memcpy(buff, &v, 4); - data.insert(data.end(), buff, buff + 4); - }; - push_i32(specs.size()); - for (uint32_t i = 0; i < specs.size(); i++) { - push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME)); - auto &specdata = specs[i].data; - data.insert(data.end(), specdata.begin(), specdata.end()); + { + // Don't use withModuleDo here since we delete the TSM midway through + auto TSCtx = data->M.getContext(); + auto lock = TSCtx.getLock(); + auto dataM = data->M.getModuleUnlocked(); + + // Delete data when add_output thinks it's done with it + // Saves memory for use when multithreading + data_outputs = compile(*dataM, "text", threads, [data](Module &) { delete data; }); + } + + { + LLVMContext Context; + Module metadataM("metadata", Context); + metadataM.setTargetTriple(TheTriple.str()); + metadataM.setDataLayout(DL); + metadataM.setStackProtectorGuard(StackProtectorGuard); + metadataM.setOverrideStackAlignment(OverrideStackAlignment); + + Type *T_size = DL.getIntPtrType(Context); + Type *T_psize = T_size->getPointerTo(); + + if (TheTriple.isOSWindows()) { + // Windows expect that the function `_DllMainStartup` is present in an dll. + // Normal compilers use something like Zig's crtdll.c instead we provide a + // a stub implementation. + auto T_pvoid = Type::getInt8Ty(Context)->getPointerTo(); + auto T_int32 = Type::getInt32Ty(Context); + auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false); + auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", metadataM); + F->setCallingConv(CallingConv::X86_StdCall); + + llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F)); + builder.CreateRet(ConstantInt::get(T_int32, 1)); } - auto value = ConstantDataArray::get(Context, data); - auto target_ids = new GlobalVariable(*sysimageM, value->getType(), true, - GlobalVariable::InternalLinkage, - value, "jl_dispatch_target_ids"); - auto shards = emit_shard_table(*sysimageM, T_size, T_psize, threads); - auto ptls = emit_ptls_table(*sysimageM, T_size, T_psize); - auto header = emit_image_header(*sysimageM, threads, nfvars, ngvars); - auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*)); - auto small_typeof_copy = new GlobalVariable(*sysimageM, AT, false, - GlobalVariable::ExternalLinkage, - Constant::getNullValue(AT), - "small_typeof"); - small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); - small_typeof_copy->setDSOLocal(true); - AT = ArrayType::get(T_psize, 5); - auto pointers = new GlobalVariable(*sysimageM, AT, false, - GlobalVariable::ExternalLinkage, - ConstantArray::get(AT, { - ConstantExpr::getBitCast(header, T_psize), - ConstantExpr::getBitCast(shards, T_psize), - ConstantExpr::getBitCast(ptls, T_psize), - ConstantExpr::getBitCast(small_typeof_copy, T_psize), - ConstantExpr::getBitCast(target_ids, T_psize) - }), - "jl_image_pointers"); - addComdat(pointers, TheTriple); - if (s) { - write_int32(s, data.size()); - ios_write(s, (const char *)data.data(), data.size()); + if (imaging_mode) { + auto specs = jl_get_llvm_clone_targets(); + const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0; + std::vector data; + auto push_i32 = [&] (uint32_t v) { + uint8_t buff[4]; + memcpy(buff, &v, 4); + data.insert(data.end(), buff, buff + 4); + }; + push_i32(specs.size()); + for (uint32_t i = 0; i < specs.size(); i++) { + push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME)); + auto &specdata = specs[i].data; + data.insert(data.end(), specdata.begin(), specdata.end()); + } + auto value = ConstantDataArray::get(Context, data); + auto target_ids = new GlobalVariable(metadataM, value->getType(), true, + GlobalVariable::InternalLinkage, + value, "jl_dispatch_target_ids"); + auto shards = emit_shard_table(metadataM, T_size, T_psize, threads); + auto ptls = emit_ptls_table(metadataM, T_size, T_psize); + auto header = emit_image_header(metadataM, threads, nfvars, ngvars); + auto AT = ArrayType::get(T_size, sizeof(small_typeof) / sizeof(void*)); + auto small_typeof_copy = new GlobalVariable(metadataM, AT, false, + GlobalVariable::ExternalLinkage, + Constant::getNullValue(AT), + "small_typeof"); + small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility); + small_typeof_copy->setDSOLocal(true); + AT = ArrayType::get(T_psize, 5); + auto pointers = new GlobalVariable(metadataM, AT, false, + GlobalVariable::ExternalLinkage, + ConstantArray::get(AT, { + ConstantExpr::getBitCast(header, T_psize), + ConstantExpr::getBitCast(shards, T_psize), + ConstantExpr::getBitCast(ptls, T_psize), + ConstantExpr::getBitCast(small_typeof_copy, T_psize), + ConstantExpr::getBitCast(target_ids, T_psize) + }), + "jl_image_pointers"); + addComdat(pointers, TheTriple); + if (s) { + write_int32(s, data.size()); + ios_write(s, (const char *)data.data(), data.size()); + } } - } - compile(*sysimageM, "data", 1); + // no need to free module/context, destructor handles that + metadata_outputs = compile(metadataM, "data", 1, [](Module &) {}); + } object::Archive::Kind Kind = getDefaultForHost(TheTriple); - if (unopt_bc_fname) - handleAllErrors(writeArchive(unopt_bc_fname, unopt_bc_Archive, true, - Kind, true, false), reportWriterError); - if (bc_fname) - handleAllErrors(writeArchive(bc_fname, bc_Archive, true, - Kind, true, false), reportWriterError); - if (obj_fname) - handleAllErrors(writeArchive(obj_fname, obj_Archive, true, - Kind, true, false), reportWriterError); - if (asm_fname) - handleAllErrors(writeArchive(asm_fname, asm_Archive, true, - Kind, true, false), reportWriterError); - - delete data; +#define WRITE_ARCHIVE(fname, field, prefix, suffix) \ + if (fname) {\ + std::vector archive; \ + SmallVector filenames; \ + SmallVector buffers; \ + for (size_t i = 0; i < threads; i++) { \ + filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \ + buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \ + } \ + filenames.push_back("metadata" prefix suffix); \ + buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \ + if (z) { \ + filenames.push_back("sysimg" prefix suffix); \ + buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \ + } \ + for (size_t i = 0; i < filenames.size(); i++) { \ + archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \ + } \ + handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \ + } + + WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc"); + WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc"); + WRITE_ARCHIVE(obj_fname, obj, "", ".o"); + WRITE_ARCHIVE(asm_fname, asm_, "", ".s"); } void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index f2c8c705bd3dc..9f0250d1f14f9 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -12,7 +12,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s) UNAVAILABLE + ios_t *z, ios_t *s) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE diff --git a/src/julia_internal.h b/src/julia_internal.h index 87a35b80516c2..cf65521770681 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1667,7 +1667,7 @@ JL_DLLIMPORT jl_value_t *jl_dump_function_asm(jl_llvmf_dump_t *dump, char emit_m JL_DLLIMPORT void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy, int imaging_mode, int cache, size_t world); JL_DLLIMPORT void jl_dump_native(void *native_code, const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, - const char *sysimg_data, size_t sysimg_len, ios_t *s); + ios_t *z, ios_t *s); JL_DLLIMPORT void jl_get_llvm_gvs(void *native_code, arraylist_t *gvs); JL_DLLIMPORT void jl_get_llvm_external_fns(void *native_code, arraylist_t *gvs); JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode, diff --git a/src/precompile.c b/src/precompile.c index 4aac28ff9a790..a7174492cf0e1 100644 --- a/src/precompile.c +++ b/src/precompile.c @@ -111,7 +111,9 @@ JL_DLLEXPORT void jl_write_compiler_output(void) bool_t emit_native = jl_options.outputo || jl_options.outputbc || jl_options.outputunoptbc || jl_options.outputasm; - bool_t emit_split = jl_options.outputji && emit_native; + const char *outputji = jl_options.outputji; + + bool_t emit_split = outputji && emit_native; ios_t *s = NULL; ios_t *z = NULL; @@ -123,40 +125,39 @@ JL_DLLEXPORT void jl_write_compiler_output(void) if (!emit_split) z = s; + ios_t f; + + if (outputji) { + if (ios_file(&f, outputji, 1, 1, 1, 1) == NULL) + jl_errorf("cannot open system image file \"%s\" for writing", outputji); + ios_write(&f, (const char *)s->buf, (size_t)s->size); + ios_close(s); + free(s); + } + // jl_dump_native writes the clone_targets into `s` // We need to postpone the srctext writing after that. if (native_code) { + ios_t *targets = outputji ? &f : NULL; + // jl_dump_native will close and free z when appropriate + // this is a horrible abstraction, but + // this helps reduce live memory significantly jl_dump_native(native_code, jl_options.outputbc, jl_options.outputunoptbc, jl_options.outputo, jl_options.outputasm, - (const char*)z->buf, (size_t)z->size, s); + z, targets); jl_postoutput_hook(); } - if ((jl_options.outputji || emit_native) && jl_options.incremental) { - write_srctext(s, udeps, srctextpos); - } - - if (jl_options.outputji) { - ios_t f; - if (ios_file(&f, jl_options.outputji, 1, 1, 1, 1) == NULL) - jl_errorf("cannot open system image file \"%s\" for writing", jl_options.outputji); - ios_write(&f, (const char*)s->buf, (size_t)s->size); + if (outputji) { + if (jl_options.incremental) { + write_srctext(&f, udeps, srctextpos); + } ios_close(&f); } - if (s) { - ios_close(s); - free(s); - } - - if (emit_split) { - ios_close(z); - free(z); - } - for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { jl_printf(JL_STDERR, "\nWARNING: detected unclosed module: "); From 269d3501a645b1a7807c1676092acd0c9719459d Mon Sep 17 00:00:00 2001 From: Max Horn Date: Mon, 26 Jun 2023 20:56:38 +0200 Subject: [PATCH 236/290] Remove obsolete admonition from asyncmap docsttring (#50297) --- base/asyncmap.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/base/asyncmap.jl b/base/asyncmap.jl index 0b3678f6b4b9b..be16ba1b27610 100644 --- a/base/asyncmap.jl +++ b/base/asyncmap.jl @@ -70,12 +70,6 @@ julia> asyncmap(batch_func, 1:5; ntasks=2, batch_size=2) "args_tuple: (4,), element_val: 4, task: 4904288162898683522" "args_tuple: (5,), element_val: 5, task: 9118321258196414413" ``` - -!!! note - Currently, all tasks in Julia are executed in a single OS thread co-operatively. Consequently, - `asyncmap` is beneficial only when the mapping function involves any I/O - disk, network, remote - worker invocation, etc. - """ function asyncmap(f, c...; ntasks=0, batch_size=nothing) return async_usemap(f, c...; ntasks=ntasks, batch_size=batch_size) From 76c906ed2cccb7326abe3d3a60d2ef98f172a454 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 27 Jun 2023 10:18:15 +0530 Subject: [PATCH 237/290] Remove unnecessary methods for `CartesianIndices` (#50258) * Remove unnecessary methods for CartesianIndices * remove IteratorSize * Add tests --- base/multidimensional.jl | 13 ++----------- test/abstractarray.jl | 13 +++++++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/base/multidimensional.jl b/base/multidimensional.jl index b76c2637d44f0..ba4e6eb12695a 100644 --- a/base/multidimensional.jl +++ b/base/multidimensional.jl @@ -2,8 +2,8 @@ ### Multidimensional iterators module IteratorsMD - import .Base: eltype, length, size, first, last, in, getindex, setindex!, IndexStyle, - min, max, zero, oneunit, isless, eachindex, ndims, IteratorSize, + import .Base: eltype, length, size, first, last, in, getindex, setindex!, + min, max, zero, oneunit, isless, eachindex, convert, show, iterate, promote_rule import .Base: +, -, *, (:) @@ -342,7 +342,6 @@ module IteratorsMD # AbstractArray implementation Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices) - Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian() Base.has_offset_axes(iter::CartesianIndices) = Base.has_offset_axes(iter.indices...) @propagate_inbounds function isassigned(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R} for i in 1:N @@ -390,10 +389,6 @@ module IteratorsMD getindex(c, C) end - ndims(R::CartesianIndices) = ndims(typeof(R)) - ndims(::Type{CartesianIndices{N}}) where {N} = N - ndims(::Type{CartesianIndices{N,TT}}) where {N,TT} = N - eachindex(::IndexCartesian, A::AbstractArray) = CartesianIndices(axes(A)) @inline function eachindex(::IndexCartesian, A::AbstractArray, B::AbstractArray...) @@ -402,10 +397,6 @@ module IteratorsMD CartesianIndices(axsA) end - eltype(::Type{CartesianIndices{N}}) where {N} = CartesianIndex{N} - eltype(::Type{CartesianIndices{N,TT}}) where {N,TT} = CartesianIndex{N} - IteratorSize(::Type{<:CartesianIndices{N}}) where {N} = Base.HasShape{N}() - @inline function iterate(iter::CartesianIndices) iterfirst = first(iter) if !all(map(in, iterfirst.I, iter.indices)) diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 7fc0652ae6488..912e0d5883d12 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -236,6 +236,19 @@ end end end +@testset "AbstractArray fallbacks for CartesianIndices" begin + @test ndims(CartesianIndices{3}) == 3 + @test eltype(CartesianIndices{3}) == CartesianIndex{3} + for t in ((1:2, 1:2), (3:4,), ()) + C2 = CartesianIndices(t) + @test ndims(C2) == length(t) + @test ndims(typeof(C2)) == length(t) + @test IndexStyle(C2) == IndexCartesian() + @test eltype(C2) == CartesianIndex{length(t)} + @test Base.IteratorSize(C2) isa Base.HasShape{length(t)} + end +end + @testset "LinearIndices" begin @testset "constructors" begin for oinds in [ From fc8b7003c92490b31067f3c89c7aa64756a1bf5d Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 27 Jun 2023 14:02:19 +0200 Subject: [PATCH 238/290] Docs: Bang has other meaning for IO and RNG functions (#50138) * Docs: Bang has other meaning for IO and RNG functions I have seen multiple people ask why functions such as `write` or `rand` does not end with a `!`, given that they mutate the input IO or RNG object. Mention in the style guide that `!` means something slightly different for functions taking IO or RNG arguments. * Update doc/src/manual/style-guide.md Co-authored-by: Thomas Christensen * Update doc/src/manual/style-guide.md Co-authored-by: Thomas Christensen * Update doc/src/manual/style-guide.md Co-authored-by: Lilith Orion Hafner * Increment -> advance --------- Co-authored-by: Thomas Christensen Co-authored-by: Lilith Orion Hafner --- doc/src/manual/style-guide.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md index d250fdd811387..d567bf7627073 100644 --- a/doc/src/manual/style-guide.md +++ b/doc/src/manual/style-guide.md @@ -119,6 +119,10 @@ with both copying and modifying forms (e.g., [`sort`](@ref) and [`sort!`](@ref)) which are just modifying (e.g., [`push!`](@ref), [`pop!`](@ref), [`splice!`](@ref)). It is typical for such functions to also return the modified array for convenience. +Functions related to IO or making use of random number generators (RNG) are notable exceptions: +Since these functions almost invariably must mutate the IO or RNG, functions ending with `!` are used to signify a mutation _other_ than mutating the IO or advancing the RNG state. +For example, `rand(x)` mutates the RNG, whereas `rand!(x)` mutates both the RNG and `x`; similarly, `read(io)` mutates `io`, whereas `read!(io, x)` mutates both arguments. + ## Avoid strange type `Union`s Types such as `Union{Function,AbstractString}` are often a sign that some design could be cleaner. From 6f26e4fd099505f046a2bfb96e76e68a66f0b748 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 27 Jun 2023 19:23:31 +0530 Subject: [PATCH 239/290] Throw on non-promotion in fallback `UnitRange` constructor (#50292) * Throw on promotion failure in UnitRange * rename variables --- base/range.jl | 6 +++++- test/ranges.jl | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/base/range.jl b/base/range.jl index f0bcc0dd20ae8..6b701d31b0358 100644 --- a/base/range.jl +++ b/base/range.jl @@ -404,7 +404,11 @@ struct UnitRange{T<:Real} <: AbstractUnitRange{T} end UnitRange{T}(start, stop) where {T<:Real} = UnitRange{T}(convert(T, start), convert(T, stop)) UnitRange(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop) -UnitRange(start, stop) = UnitRange(promote(start, stop)...) +function UnitRange(start, stop) + startstop_promoted = promote(start, stop) + not_sametype((start, stop), startstop_promoted) + UnitRange(startstop_promoted...) +end # if stop and start are integral, we know that their difference is a multiple of 1 unitrange_last(start::Integer, stop::Integer) = diff --git a/test/ranges.jl b/test/ranges.jl index 98233267d03a9..b263e6d4d530d 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -59,6 +59,9 @@ using InteractiveUtils: code_llvm @test last(10:0.2:3) === 9.8 @test step(10:0.2:3) === 0.2 @test isempty(10:0.2:3) + + unitrangeerrstr = "promotion of types Char and Char failed to change any arguments" + @test_throws unitrangeerrstr UnitRange('a', 'b') end using Dates, Random From 0e147eb8be67898afb424a900df96229ad558997 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 27 Jun 2023 12:13:40 -0400 Subject: [PATCH 240/290] irinterp: Add handling for :throw_undef_if_not (#50303) This addresses an existing TODO to terminate irinterp on discovering a :throw_undef_if_not that is dead. The underlying infrastructure to do this was added in #49692, so this just needed to be wired up properly. --- base/compiler/ssair/irinterp.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index 0a738371aca6e..bafa70ab22b03 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -130,8 +130,16 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union if nothrow ir.stmts[idx][:flag] |= IR_FLAG_NOTHROW end - elseif head === :throw_undef_if_not || # TODO: Terminate interpretation early if known false? - head === :gc_preserve_begin || + elseif head === :throw_undef_if_not + condval = maybe_extract_const_bool(argextype(inst.args[2], ir)) + condval isa Bool || return false + if condval + ir.stmts[idx][:inst] = nothing + # We simplified the IR, but we did not update the type + return false + end + rt = Union{} + elseif head === :gc_preserve_begin || head === :gc_preserve_end return false else From 3b854f44711ca57cb678711af61959b3cebb130b Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 27 Jun 2023 16:19:43 -0400 Subject: [PATCH 241/290] Fix uniquerep predicate in codegen (#50295) Fixes #50293. This code probably predates us being clear on what the uniquerep predicate is. --- src/codegen.cpp | 11 +++++------ test/core.jl | 7 +++++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 632d9cfc6c129..a81bb6eb20cc8 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1862,6 +1862,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ) typ = (jl_value_t*)jl_typeofbottom_type->super; } if (jl_is_type_type(typ)) { + assert(is_uniquerep_Type(typ)); // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ)); constant.constant = jl_tparam0(typ); @@ -1933,16 +1934,14 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isboxed, jl_value_t *typ) { - if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { - // no need to explicitly load/store a constant/ghost value - return ghostValue(ctx, typ); - } if (jl_is_type_type(typ)) { - jl_value_t *tp0 = jl_tparam0(typ); - if (jl_is_concrete_type(tp0) || tp0 == jl_bottom_type) { + if (is_uniquerep_Type(typ)) { // replace T::Type{T} with T return ghostValue(ctx, typ); } + } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { + // no need to explicitly load/store a constant/ghost value + return ghostValue(ctx, typ); } Type *T = julia_type_to_llvm(ctx, typ); if (type_is_ghost(T)) { diff --git a/test/core.jl b/test/core.jl index 7eaa6458d3690..a87c45b698e49 100644 --- a/test/core.jl +++ b/test/core.jl @@ -8033,3 +8033,10 @@ bar50250(b, y) = (b ? Bar50250(y, y) : Bar50250(y)).x @test_throws UndefRefError foo50250(false, 1) @test bar50250(true, 1) === 1 @test_throws UndefRefError bar50250(false, 1) + +# Test that Type{typeof(Union{})} doesn't get codegen'ed as a constant (#50293) +baz50293(x::Union{Type, Core.Const}) = Base.issingletontype(x) +bar50293(@nospecialize(u)) = (Base.issingletontype(u.a), baz50293(u.a)) +let u = Union{Type{Union{}}, Type{Any}}, ab = bar50293(u) + @test ab[1] == ab[2] == false +end From 290c619478fc0ae2c41a24a9166d1b5facaba2de Mon Sep 17 00:00:00 2001 From: Paul Berg Date: Tue, 27 Jun 2023 22:21:14 +0200 Subject: [PATCH 242/290] AllocOpt: Handle objref with no preserve_end in a single block. (#50277) This fixes the AllocOpt pass when there is a single block and no gc_preserve_end. The dominator tree traversal now considers the starting (gc_preverse_begin) block as well and does not introduce a lifetime end when there is no gc_preserve_end for the objref. --- src/llvm-alloc-opt.cpp | 51 ++++++++++++++++++------------- test/llvmpasses/alloc-opt-pass.ll | 23 ++++++++++++++ 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index acb2d673d6760..5bcc28e7dad6c 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -433,36 +433,45 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig) abort(); } #endif - // Record extra BBs that contain invisible uses. + + // Record extra BBs that contain invisible uses with gc_preserve_{begin,end}. + // We traverse the dominator tree starting at each `gc_preserve_begin` and marking blocks + // as users until a corresponding `gc_preserve_end` is found. Blocks containing + // the `gc_preserve_end` have already been marked in the previous step. SmallSet extra_use; SmallVector*, 8> dominated; for (auto preserve: use_info.preserves) { - for (auto RN = DT.getNode(preserve->getParent()); RN; - RN = dominated.empty() ? nullptr : dominated.pop_back_val()) { - for (auto N: *RN) { - auto bb = N->getBlock(); - if (extra_use.count(bb)) - continue; - bool ended = false; - for (auto end: preserve->users()) { - auto end_bb = cast(end)->getParent(); - auto end_node = DT.getNode(end_bb); - if (end_bb == bb || (end_node && DT.dominates(end_node, N))) { - ended = true; - break; - } + assert(dominated.empty()); + dominated.push_back(DT.getNode(preserve->getParent())); + while (!dominated.empty()) { + auto N = dominated.pop_back_val(); + if (!N) { + dominated.clear(); + break; + } + auto bb = N->getBlock(); + if (extra_use.count(bb)) + continue; + bool ended = false; + for (auto end: preserve->users()) { + auto end_bb = cast(end)->getParent(); + auto end_node = DT.getNode(end_bb); + if (end_bb == bb || (end_node && DT.dominates(end_node, N))) { + ended = true; + break; } - if (ended) - continue; - bbs.insert(bb); - extra_use.insert(bb); - dominated.push_back(N); } + if (ended) + continue; + bbs.insert(bb); + extra_use.insert(bb); + dominated.append(N->begin(), N->end()); } - assert(dominated.empty()); } + // For each BB, find the first instruction(s) where the allocation is possibly dead. // If all successors are live, then there isn't one. + // If the BB has "invisible" uses, then there isn't one. // If all successors are dead, then it's the first instruction after the last use // within the BB. // If some successors are live and others are dead, it's the first instruction in diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index 30ee1754926d7..b7e0647263caa 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -103,6 +103,7 @@ declare {}*** @julia.get_pgcstack() declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*) declare {}* @julia.pointer_from_objref({} addrspace(11)*) declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) declare token @llvm.julia.gc_preserve_begin(...) declare void @llvm.julia.gc_preserve_end(token) @@ -139,3 +140,25 @@ L2: ret void } ; CHECK-LABEL: }{{$}} + +; CHECK-LABEL: @lifetime_no_preserve_end +; CHECK: alloca +; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin +; CHECK: call void @llvm.lifetime.start +; CHECK-NOT: call void @llvm.lifetime.end +define void @lifetime_no_preserve_end({}* noalias nocapture noundef nonnull sret({}) %0) { + %pgcstack = call {}*** @julia.get_pgcstack() + %ptls = call {}*** @julia.ptls_states() + %ptls_i8 = bitcast {}*** %ptls to i8* + %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag) + %token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v) + %v_derived = addrspacecast {} addrspace(10)* %v to {} addrspace(11)* + %ptr = call nonnull {}* @julia.pointer_from_objref({} addrspace(11)* %v_derived) + %ptr_raw = bitcast {}* %ptr to i8* + call void @external_function() ; safepoint + %ret_raw = bitcast {}* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %ret_raw, i8 * align 8 %ptr_raw, i64 0, i1 false) + %ret_raw2 = bitcast {}* %0 to i8* + ret void +} +; CHECK-LABEL: }{{$}} From ba0e4843cb67eeea6882ddabf30c8c26bf22e1f1 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Tue, 27 Jun 2023 22:41:56 +0200 Subject: [PATCH 243/290] Add section about multithreaded linear algebra to performance tips (#50124) * Add section about multithreaded linear algebra to performance tips * Mention linear algebra backends --------- Co-authored-by: Viral B. Shah --- doc/src/manual/performance-tips.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index ffb84333e8e78..c86630ce2a8f1 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -1631,3 +1631,32 @@ will not require this degree of programmer annotation to attain performance. In the mean time, some user-contributed packages like [FastClosures](https://github.com/c42f/FastClosures.jl) automate the insertion of `let` statements as in `abmult3`. + +## [Multithreading and linear algebra](@id man-multithreading-linear-algebra) + +This section applies to multithreaded Julia code which, in each thread, performs linear algebra operations. +Indeed, these linear algebra operations involve BLAS / LAPACK calls, which are themselves multithreaded. +In this case, one must ensure that cores aren't oversubscribed due to the two different types of multithreading. + +Julia compiles and uses its own copy of OpenBLAS for linear algebra, whose number of threads is controlled by the environment variable `OPENBLAS_NUM_THREADS`. +It can either be set as a command line option when launching Julia, or modified during the Julia session with `BLAS.set_num_threads(N)` (the submodule `BLAS` is exported by `using LinearAlgebra`). +Its current value can be accessed with `BLAS.get_num_threads()`. + +When the user does not specify anything, Julia tries to choose a reasonable value for the number of OpenBLAS threads (e.g. based on the platform, the Julia version, etc.). +However, it is generally recommended to check and set the value manually. +The OpenBLAS behavior is as follows: + +* If `OPENBLAS_NUM_THREADS=1`, OpenBLAS uses the calling Julia thread(s), i.e. it "lives in" the Julia thread that runs the computation. +* If `OPENBLAS_NUM_THREADS=N>1`, OpenBLAS creates and manages its own pool of threads (`N` in total). There is just one OpenBLAS thread pool shared among all Julia threads. + +When you start Julia in multithreaded mode with `JULIA_NUM_THREADS=X`, it is generally recommended to set `OPENBLAS_NUM_THREADS=1`. +Given the behavior described above, increasing the number of BLAS threads to `N>1` can very easily lead to worse performance, in particular when `N< Date: Tue, 27 Jun 2023 17:49:58 -0400 Subject: [PATCH 244/290] Fix memoryssa preservation bug --- src/llvm-julia-licm.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp index fc867252318c5..da1df1ea8bea7 100644 --- a/src/llvm-julia-licm.cpp +++ b/src/llvm-julia-licm.cpp @@ -176,7 +176,7 @@ struct JuliaLICM : public JuliaPassContext { // Lazy initialization of exit blocks insertion points. bool exit_pts_init = false; SmallVector _exit_pts; - auto get_exit_pts = [&] () -> ArrayRef { + auto get_exit_pts = [&] () -> MutableArrayRef { if (!exit_pts_init) { exit_pts_init = true; SmallVector exit_bbs; @@ -242,6 +242,7 @@ struct JuliaLICM : public JuliaPassContext { } ++SunkPreserveEnd; moveInstructionBefore(*call, *exit_pts[0], MSSAU, SE, MemorySSA::Beginning); + exit_pts[0] = call; LLVM_DEBUG(dbgs() << "Sunk gc_preserve_end: " << *call << "\n"); REMARK([&](){ return OptimizationRemark(DEBUG_TYPE, "Sunk", call) @@ -250,6 +251,7 @@ struct JuliaLICM : public JuliaPassContext { for (unsigned i = 1; i < exit_pts.size(); i++) { // Clone exit auto CI = CallInst::Create(call, {}, exit_pts[i]); + exit_pts[i] = CI; createNewInstruction(CI, call, MSSAU); LLVM_DEBUG(dbgs() << "Cloned and sunk gc_preserve_end: " << *CI << "\n"); REMARK([&](){ From 5ea7f0be94377adb9ebb70666108b8649e3dce3d Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Tue, 27 Jun 2023 18:49:58 -0400 Subject: [PATCH 245/290] Add some memoryssa preservation tests --- test/llvmpasses/julia-licm-memoryssa.ll | 104 ++++++++++++++++++++++++ test/llvmpasses/julia-licm.ll | 69 ++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 test/llvmpasses/julia-licm-memoryssa.ll diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll new file mode 100644 index 0000000000000..fcc53b78b4e09 --- /dev/null +++ b/test/llvmpasses/julia-licm-memoryssa.ll @@ -0,0 +1,104 @@ +; COM: NewPM-only test, tests that memoryssa is preserved correctly + +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s + +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s + +@tag = external addrspace(10) global {}, align 16 + +declare void @julia.write_barrier({}*, ...) + +declare {}*** @julia.get_pgcstack() + +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +declare void @mssa_use({} addrspace(10)*) + +; COM: check basic preserve hoist/sink functionality +; CHECK-LABEL: MemorySSA for function: hoist_sink_preserves +; CHECK-LABEL: @hoist_sink_preserves +define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop,[[MPHI]]}) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[PRESERVE_END:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} + +; COM: check sink functionality when there are multiple loop exit blocks +; CHECK-LABEL: MemorySSA for function: hoist_multisink_preserves +; CHECK-LABEL: @hoist_multisink_preserves +define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[PRESERVE_TOKEN:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[PRESERVE_TOKEN]]},{loop2,[[MPHI]]}) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop2 +; CHECK: loop2: +loop2: +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return2, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[PRESERVE_END_1:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_1]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +; CHECK: return2: +return2: +; CHECK-NEXT: [[PRESERVE_END_2:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[PRESERVE_END_2]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll index 8a39f5e50aff5..8bedc5db75d96 100644 --- a/test/llvmpasses/julia-licm.ll +++ b/test/llvmpasses/julia-licm.ll @@ -12,6 +12,75 @@ declare void @julia.write_barrier({}*, ...) declare {}*** @julia.get_pgcstack() +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +; COM: check basic preserve hoist/sink functionality +; CHECK-LABEL: @hoist_sink_preserves +define void @hoist_sink_preserves({} addrspace(10)* %obj, i1 %ret) { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +} + +; COM: check sink functionality when there are multiple loop exit blocks +; CHECK-LABEL: @hoist_multisink_preserves +define void @hoist_multisink_preserves({} addrspace(10)* %obj, i1 %ret) { +top: + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** +; CHECK: br label %preheader + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: %preserve_token = call token (...) @llvm.julia.gc_preserve_begin +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: call token (...) @llvm.julia.gc_preserve_begin + %preserve_token = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %obj) +; CHECK-NOT: call void @llvm.julia.gc_preserve_end + call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return, label %loop2 +; CHECK: loop2: +loop2: +; CHECK-NEXT: br i1 %ret + br i1 %ret, label %return2, label %loop +; CHECK: return: +return: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +; CHECK: return2: +return2: +; CHECK-NEXT: call void @llvm.julia.gc_preserve_end(token %preserve_token) +; CHECK-NEXT: ret void + ret void +} + ; COM: check basic allocation hoisting functionality ; CHECK-LABEL: @julia_allocation_hoist define nonnull {} addrspace(10)* @julia_allocation_hoist(i64 signext %0) #0 { From f5faa08e9a506ba752f34e03327907d3b6959540 Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Tue, 27 Jun 2023 19:16:29 -0400 Subject: [PATCH 246/290] Fix small bug in memorydef for memset --- src/llvm-julia-licm.cpp | 7 +-- test/llvmpasses/julia-licm-memoryssa.ll | 73 ++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp index da1df1ea8bea7..8e03fe434a79c 100644 --- a/src/llvm-julia-licm.cpp +++ b/src/llvm-julia-licm.cpp @@ -347,11 +347,8 @@ struct JuliaLICM : public JuliaPassContext { auto align = Align(DL.getPointerSize(0)); auto clear_obj = builder.CreateMemSet(obj_i8, ConstantInt::get(Type::getInt8Ty(call->getContext()), 0), call->getArgOperand(1), align); if (MSSAU.getMemorySSA()) { - auto alloc_mdef = MSSAU.getMemorySSA()->getMemoryAccess(call); - assert(isa(alloc_mdef) && "Expected alloc to be associated with a memory def!"); - auto clear_mdef = MSSAU.createMemoryAccessAfter(clear_obj, nullptr, alloc_mdef); - assert(isa(clear_mdef) && "Expected memset to be associated with a memory def!"); - (void) clear_mdef; + auto clear_mdef = MSSAU.createMemoryAccessInBB(clear_obj, nullptr, clear_obj->getParent(), MemorySSA::BeforeTerminator); + MSSAU.insertDef(cast(clear_mdef), true); } changed = true; } diff --git a/test/llvmpasses/julia-licm-memoryssa.ll b/test/llvmpasses/julia-licm-memoryssa.ll index fcc53b78b4e09..e1684c7577578 100644 --- a/test/llvmpasses/julia-licm-memoryssa.ll +++ b/test/llvmpasses/julia-licm-memoryssa.ll @@ -1,12 +1,12 @@ ; COM: NewPM-only test, tests that memoryssa is preserved correctly -; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,TYPED -; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=1 --opaque-pointers=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(loop-mssa(JuliaLICM),print)' -S -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPAQUE @tag = external addrspace(10) global {}, align 16 -declare void @julia.write_barrier({}*, ...) +declare void @julia.write_barrier({} addrspace(10)*, ...) declare {}*** @julia.get_pgcstack() @@ -16,6 +16,8 @@ declare void @llvm.julia.gc_preserve_end(token) declare void @mssa_use({} addrspace(10)*) +declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) + ; COM: check basic preserve hoist/sink functionality ; CHECK-LABEL: MemorySSA for function: hoist_sink_preserves ; CHECK-LABEL: @hoist_sink_preserves @@ -102,3 +104,68 @@ return2: ; CHECK-NEXT: ret void ret void } + +define void @hoist_allocation({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[ALLOC:[0-9]+]] = MemoryDef([[PGCSTACK]]) + +; TYPED-NEXT: %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag) +; TYPED-NEXT: %[[BCAST:.*]] = bitcast {} addrspace(10)* %alloc to i8 addrspace(10)* + +; OPAQUE-NEXT: %alloc = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %current_task, i64 0, ptr addrspace(10) @tag) + +; CHECK-NEXT: [[MSET:[0-9]+]] = MemoryDef([[ALLOC]]) +; CHECK-NEXT: call void @llvm.memset +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: %alloc +; CHECK-NOT: @julia.gc_alloc_obj + %alloc = call {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 0, {} addrspace(10)* @tag) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[MSET]]},{loop,[[MPHI]]}) + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} + +define void @hoist_write_barrier({} addrspace(10)* %obj, i1 %ret) { +; CHECK: top: +top: +; CHECK-NEXT: [[PGCSTACK:[0-9]+]] = MemoryDef(liveOnEntry) + %pgcstack = call {}*** @julia.get_pgcstack() + %current_task = bitcast {}*** %pgcstack to {}** + br label %preheader +; CHECK: preheader: +preheader: +; CHECK-NEXT: [[WB:[0-9]+]] = MemoryDef([[PGCSTACK]]) +; CHECK-NEXT: call void +; CHECK-SAME: @julia.write_barrier +; CHECK-NEXT: br label %loop + br label %loop +; CHECK: loop: +loop: +; CHECK-NOT: @julia.write_barrier + call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %obj) +; CHECK-NEXT: [[MPHI:[0-9]+]] = MemoryPhi({preheader,[[WB]]},{loop,[[MPHI]]}) + br i1 %ret, label %return, label %loop +; CHECK: return: +return: +; CHECK-NEXT: [[MSSA_USE:[0-9]+]] = MemoryDef([[MPHI]]) +; CHECK-NEXT: call void @mssa_use + call void @mssa_use({} addrspace(10)* %obj) +; CHECK-NEXT: ret void + ret void +} From 9dc2991f8229075101c3ce20961367f6e67f7c93 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Tue, 27 Jun 2023 22:25:51 -0300 Subject: [PATCH 247/290] implement concurrent sweeping (#48969) Implements concurrent sweeping of fully empty pages. Concurrent sweeping is disabled by default and may be enabled through the --gcthreads flag. Co-authored-by: Valentin Churavy --- base/options.jl | 3 +- base/threadingconstructs.jl | 1 + src/gc-pages.c | 10 ++++++- src/gc.c | 48 +++++++++++++++++++++++-------- src/gc.h | 2 ++ src/jloptions.c | 23 +++++++++++---- src/jloptions.h | 3 +- src/partr.c | 34 ++++++++++++++++++++-- src/threading.c | 46 ++++++++++++++++++++--------- src/threading.h | 3 +- stdlib/Distributed/src/cluster.jl | 11 ++++++- test/cmdlineargs.jl | 7 +++++ test/gc.jl | 10 ++++--- 13 files changed, 158 insertions(+), 43 deletions(-) diff --git a/base/options.jl b/base/options.jl index fb043672dc19a..a94936391fa8d 100644 --- a/base/options.jl +++ b/base/options.jl @@ -11,7 +11,8 @@ struct JLOptions cpu_target::Ptr{UInt8} nthreadpools::Int16 nthreads::Int16 - ngcthreads::Int16 + nmarkthreads::Int16 + nsweepthreads::Int8 nthreads_per_pool::Ptr{Int16} nprocs::Int32 machine_file::Ptr{UInt8} diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index 0854048e6b96c..b8a522be96d97 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -134,6 +134,7 @@ end Threads.ngcthreads() -> Int Returns the number of GC threads currently configured. +This includes both mark threads and concurrent sweep threads. """ ngcthreads() = Int(unsafe_load(cglobal(:jl_n_gcthreads, Cint))) + 1 diff --git a/src/gc-pages.c b/src/gc-pages.c index 3e8207460d37b..3cb28d5827b55 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -97,6 +97,14 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT #endif jl_gc_pagemeta_t *meta = NULL; + // try to get page from `pool_lazily_freed` + meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed); + if (meta != NULL) { + gc_alloc_map_set(meta->data, 1); + // page is already mapped + return meta; + } + // try to get page from `pool_clean` meta = pop_lf_page_metadata_back(&global_page_pool_clean); if (meta != NULL) { @@ -112,7 +120,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT } uv_mutex_lock(&gc_perm_lock); - // another thread may have allocated a large block while we're waiting... + // another thread may have allocated a large block while we were waiting... meta = pop_lf_page_metadata_back(&global_page_pool_clean); if (meta != NULL) { uv_mutex_unlock(&gc_perm_lock); diff --git a/src/gc.c b/src/gc.c index 02551dd965eb5..9e588c171a676 100644 --- a/src/gc.c +++ b/src/gc.c @@ -11,12 +11,18 @@ extern "C" { #endif +// Number of GC threads that may run parallel marking +int jl_n_markthreads; +// Number of GC threads that may run concurrent sweeping (0 or 1) +int jl_n_sweepthreads; // Number of threads currently running the GC mark-loop _Atomic(int) gc_n_threads_marking; // `tid` of mutator thread that triggered GC _Atomic(int) gc_master_tid; // `tid` of first GC thread int gc_first_tid; +// To indicate whether concurrent sweeping should run +uv_sem_t gc_sweep_assists_needed; // Linked list of callback functions @@ -1356,7 +1362,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo int pg_skpd = 1; if (!pg->has_marked) { reuse_page = 0; - #ifdef _P64 + #ifdef _P64 // TODO: re-enable on `_P32`? // lazy version: (empty) if the whole page was already unused, free it (return it to the pool) // eager version: (freedall) free page as soon as possible // the eager one uses less memory. @@ -1440,8 +1446,18 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo push_page_metadata_back(lazily_freed, pg); } else { + #ifdef _P64 // only enable concurrent sweeping on 64bit + if (jl_n_sweepthreads == 0) { + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + } + else { + push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg); + } + #else jl_gc_free_page(pg); push_lf_page_metadata_back(&global_page_pool_freed, pg); + #endif } gc_time_count_page(freedall, pg_skpd); gc_num.freed += (nfree - old_nfree) * osize; @@ -1561,6 +1577,13 @@ static void gc_sweep_pool(int sweep_full) } } +#ifdef _P64 // only enable concurrent sweeping on 64bit + // wake thread up to sweep concurrently + if (jl_n_sweepthreads > 0) { + uv_sem_post(&gc_sweep_assists_needed); + } +#endif + gc_time_pool_end(sweep_full); } @@ -2691,8 +2714,8 @@ void gc_mark_and_steal(jl_ptls_t ptls) // of work for the mark loop steal : { // Try to steal chunk from random GC thread - for (int i = 0; i < 4 * jl_n_gcthreads; i++) { - uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads; + for (int i = 0; i < 4 * jl_n_markthreads; i++) { + uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads; jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue; c = gc_chunkqueue_steal_from(mq2); if (c.cid != GC_empty_chunk) { @@ -2701,7 +2724,7 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } // Sequentially walk GC threads to try to steal chunk - for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue; c = gc_chunkqueue_steal_from(mq2); if (c.cid != GC_empty_chunk) { @@ -2718,15 +2741,15 @@ void gc_mark_and_steal(jl_ptls_t ptls) } } // Try to steal pointer from random GC thread - for (int i = 0; i < 4 * jl_n_gcthreads; i++) { - uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_gcthreads; + for (int i = 0; i < 4 * jl_n_markthreads; i++) { + uint32_t v = gc_first_tid + cong(UINT64_MAX, UINT64_MAX, &ptls->rngseed) % jl_n_markthreads; jl_gc_markqueue_t *mq2 = &gc_all_tls_states[v]->mark_queue; new_obj = gc_ptr_queue_steal_from(mq2); if (new_obj != NULL) goto mark; } // Sequentially walk GC threads to try to steal pointer - for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { jl_gc_markqueue_t *mq2 = &gc_all_tls_states[i]->mark_queue; new_obj = gc_ptr_queue_steal_from(mq2); if (new_obj != NULL) @@ -2748,7 +2771,7 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) jl_atomic_store(&gc_master_tid, ptls->tid); // Wake threads up and try to do some work jl_atomic_fetch_add(&gc_n_threads_marking, 1); - for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) { + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; uv_mutex_lock(&ptls2->sleep_lock); uv_cond_signal(&ptls2->wake_signal); @@ -2771,7 +2794,7 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) void gc_mark_loop(jl_ptls_t ptls) { - if (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled) { + if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled) { gc_mark_loop_serial(ptls); } else { @@ -3065,13 +3088,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) } assert(gc_n_threads); - int single_threaded = (jl_n_gcthreads == 0 || gc_heap_snapshot_enabled); + int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled); for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; jl_ptls_t ptls_dest = ptls; jl_gc_markqueue_t *mq_dest = mq; - if (!single_threaded) { - ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_gcthreads]; + if (!single_threaded_mark) { + ptls_dest = gc_all_tls_states[gc_first_tid + t_i % jl_n_markthreads]; mq_dest = &ptls_dest->mark_queue; } if (ptls2 != NULL) { @@ -3513,6 +3536,7 @@ void jl_gc_init(void) JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_cache_lock); uv_mutex_init(&gc_perm_lock); + uv_sem_init(&gc_sweep_assists_needed, 0); jl_gc_init_page(); jl_gc_debug_init(); diff --git a/src/gc.h b/src/gc.h index bfa2a0fba8f59..8e06e91571b31 100644 --- a/src/gc.h +++ b/src/gc.h @@ -182,6 +182,7 @@ typedef struct { _Atomic(jl_gc_pagemeta_t *) page_metadata_back; } jl_gc_global_page_pool_t; +extern jl_gc_global_page_pool_t global_page_pool_lazily_freed; extern jl_gc_global_page_pool_t global_page_pool_clean; extern jl_gc_global_page_pool_t global_page_pool_freed; @@ -428,6 +429,7 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE *list = hdr; } +extern uv_sem_t gc_sweep_assists_needed; extern _Atomic(int) gc_n_threads_marking; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT; diff --git a/src/jloptions.c b/src/jloptions.c index 7a622f117f1b1..129ba9df2510e 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -40,7 +40,8 @@ JL_DLLEXPORT void jl_init_options(void) NULL, // cpu_target ("native", "core2", etc...) 0, // nthreadpools 0, // nthreads - 0, // ngcthreads + 0, // nmarkthreads + 0, // nsweepthreads NULL, // nthreads_per_pool 0, // nprocs NULL, // machine_file @@ -130,7 +131,8 @@ static const char opts[] = " interface if supported (Linux and Windows) or to the number of CPU\n" " threads if not supported (MacOS) or if process affinity is not\n" " configured, and sets M to 1.\n" - " --gcthreads=N Use N threads for GC, set to half of the number of compute threads if unspecified.\n" + " --gcthreads=M[,N] Use M threads for the mark phase of GC and N (0 or 1) threads for the concurrent sweeping phase of GC.\n" + " M is set to half of the number of compute threads and N is set to 0 if unspecified.\n" " -p, --procs {N|auto} Integer value N launches N additional local worker processes\n" " \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n" " --machine-file Run processes on hosts listed in \n\n" @@ -826,10 +828,19 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) break; case opt_gc_threads: errno = 0; - long ngcthreads = strtol(optarg, &endptr, 10); - if (errno != 0 || optarg == endptr || *endptr != 0 || ngcthreads < 1 || ngcthreads >= INT16_MAX) - jl_errorf("julia: --gcthreads=; n must be an integer >= 1"); - jl_options.ngcthreads = (int16_t)ngcthreads; + long nmarkthreads = strtol(optarg, &endptr, 10); + if (errno != 0 || optarg == endptr || nmarkthreads < 1 || nmarkthreads >= INT16_MAX) { + jl_errorf("julia: --gcthreads=[,]; n must be an integer >= 1"); + } + jl_options.nmarkthreads = (int16_t)nmarkthreads; + if (*endptr == ',') { + errno = 0; + char *endptri; + long nsweepthreads = strtol(&endptr[1], &endptri, 10); + if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nsweepthreads < 0 || nsweepthreads > 1) + jl_errorf("julia: --gcthreads=,; n must be 0 or 1"); + jl_options.nsweepthreads = (int8_t)nsweepthreads; + } break; case opt_permalloc_pkgimg: if (!strcmp(optarg,"yes")) diff --git a/src/jloptions.h b/src/jloptions.h index 93f6d321f38d6..8649c405112d7 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -15,7 +15,8 @@ typedef struct { const char *cpu_target; int8_t nthreadpools; int16_t nthreads; - int16_t ngcthreads; + int16_t nmarkthreads; + int8_t nsweepthreads; const int16_t *nthreads_per_pool; int32_t nprocs; const char *machine_file; diff --git a/src/partr.c b/src/partr.c index fb140032aaa1f..5f543565928a1 100644 --- a/src/partr.c +++ b/src/partr.c @@ -109,13 +109,13 @@ void jl_init_threadinginfra(void) void JL_NORETURN jl_finish_task(jl_task_t *t); -static int may_mark(void) JL_NOTSAFEPOINT +static inline int may_mark(void) JL_NOTSAFEPOINT { return (jl_atomic_load(&gc_n_threads_marking) > 0); } -// gc thread function -void jl_gc_threadfun(void *arg) +// gc thread mark function +void jl_gc_mark_threadfun(void *arg) { jl_threadarg_t *targ = (jl_threadarg_t*)arg; @@ -139,6 +139,34 @@ void jl_gc_threadfun(void *arg) } } +// gc thread sweep function +void jl_gc_sweep_threadfun(void *arg) +{ + jl_threadarg_t *targ = (jl_threadarg_t*)arg; + + // initialize this thread (set tid and create heap) + jl_ptls_t ptls = jl_init_threadtls(targ->tid); + + // wait for all threads + jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0); + uv_barrier_wait(targ->barrier); + + // free the thread argument here + free(targ); + + while (1) { + uv_sem_wait(&gc_sweep_assists_needed); + while (1) { + jl_gc_pagemeta_t *pg = pop_lf_page_metadata_back(&global_page_pool_lazily_freed); + if (pg == NULL) { + break; + } + jl_gc_free_page(pg); + push_lf_page_metadata_back(&global_page_pool_freed, pg); + } + } +} + // thread function: used by all mutator threads except the main thread void jl_threadfun(void *arg) { diff --git a/src/threading.c b/src/threading.c index 691fa931f1a3f..e2eb686e3061a 100644 --- a/src/threading.c +++ b/src/threading.c @@ -599,6 +599,8 @@ static void jl_check_tls(void) JL_DLLEXPORT const int jl_tls_elf_support = 0; #endif +extern int jl_n_markthreads; +extern int jl_n_sweepthreads; extern int gc_first_tid; // interface to Julia; sets up to make the runtime thread-safe @@ -653,22 +655,37 @@ void jl_init_threading(void) } } - int16_t ngcthreads = jl_options.ngcthreads - 1; - if (ngcthreads == -1 && - (cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified - - ngcthreads = (uint64_t)strtol(cp, NULL, 10) - 1; - } - if (ngcthreads == -1) { - // if `--gcthreads` was not specified, set the number of GC threads - // to half of compute threads - if (nthreads <= 1) { - ngcthreads = 0; + jl_n_markthreads = jl_options.nmarkthreads - 1; + jl_n_sweepthreads = jl_options.nsweepthreads; + if (jl_n_markthreads == -1) { // --gcthreads not specified + if ((cp = getenv(NUM_GC_THREADS_NAME))) { // ENV[NUM_GC_THREADS_NAME] specified + errno = 0; + jl_n_markthreads = (uint64_t)strtol(cp, &endptr, 10) - 1; + if (errno != 0 || endptr == cp || nthreads <= 0) + jl_n_markthreads = 0; + cp = endptr; + if (*cp == ',') { + cp++; + errno = 0; + jl_n_sweepthreads = strtol(cp, &endptri, 10); + if (errno != 0 || endptri == cp || jl_n_sweepthreads < 0) { + jl_n_sweepthreads = 0; + } + } } else { - ngcthreads = (nthreads / 2) - 1; + // if `--gcthreads` or ENV[NUM_GCTHREADS_NAME] was not specified, + // set the number of mark threads to half of compute threads + // and number of sweep threads to 0 + if (nthreads <= 1) { + jl_n_markthreads = 0; + } + else { + jl_n_markthreads = (nthreads / 2) - 1; + } } } + int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); @@ -734,8 +751,11 @@ void jl_start_threads(void) mask[i] = 0; } } + else if (i == nthreads - 1 && jl_n_sweepthreads == 1) { + uv_thread_create(&uvtid, jl_gc_sweep_threadfun, t); + } else { - uv_thread_create(&uvtid, jl_gc_threadfun, t); + uv_thread_create(&uvtid, jl_gc_mark_threadfun, t); } uv_thread_detach(&uvtid); } diff --git a/src/threading.h b/src/threading.h index 40792a2889e44..73d2cd73fb70d 100644 --- a/src/threading.h +++ b/src/threading.h @@ -25,7 +25,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid) JL_NOTSAFEPOINT; // provided by a threading infrastructure void jl_init_threadinginfra(void); -void jl_gc_threadfun(void *arg); +void jl_gc_mark_threadfun(void *arg); +void jl_gc_sweep_threadfun(void *arg); void jl_threadfun(void *arg); #ifdef __cplusplus diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl index 6dc6bd086df16..d8cc052967d50 100644 --- a/stdlib/Distributed/src/cluster.jl +++ b/stdlib/Distributed/src/cluster.jl @@ -1331,6 +1331,14 @@ function get_threads_spec(opts) end end +function get_gcthreads_spec(opts) + if opts.nmarkthreads > 0 || opts.nsweepthreads > 0 + `--gcthreads=$(opts.nmarkthreads),$(opts.nsweepthreads)` + else + `` + end +end + # Starts workers specified by (-n|--procs) and --machine-file command line options function process_opts(opts) # startup worker. @@ -1346,7 +1354,8 @@ function process_opts(opts) # Propagate --threads to workers threads = get_threads_spec(opts) - gcthreads = opts.ngcthreads > 0 ? `--gcthreads=$(opts.ngcthreads)` : `` + # Propagate --gcthreads to workers + gcthreads = get_gcthreads_spec(opts) exeflags = `$threads $gcthreads` diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 21567468ffe9e..917031b57fe5f 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -362,12 +362,19 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` withenv("JULIA_NUM_GC_THREADS" => nt) do @test read(`$exename --gcthreads=2 -e $code`, String) == "2" end + withenv("JULIA_NUM_GC_THREADS" => nt) do + @test read(`$exename --gcthreads=2,1 -e $code`, String) == "3" + end end withenv("JULIA_NUM_GC_THREADS" => 2) do @test read(`$exename -e $code`, String) == "2" end + withenv("JULIA_NUM_GC_THREADS" => "2,1") do + @test read(`$exename -e $code`, String) == "3" + end + # --machine-file # this does not check that machine file works, # only that the filename gets correctly passed to the option struct diff --git a/test/gc.jl b/test/gc.jl index ecf71fe51f6ad..e085c1d8658e5 100644 --- a/test/gc.jl +++ b/test/gc.jl @@ -5,10 +5,12 @@ using Test function run_gctest(file) let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $file` @testset for test_nthreads in (1, 2, 4) - new_env = copy(ENV) - new_env["JULIA_NUM_THREADS"] = string(test_nthreads) - new_env["JULIA_NUM_GC_THREADS"] = string(test_nthreads) - @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))) + @testset for concurrent_sweep in (0, 1) + new_env = copy(ENV) + new_env["JULIA_NUM_THREADS"] = string(test_nthreads) + new_env["JULIA_NUM_GC_THREADS"] = "$(test_nthreads),$(concurrent_sweep)" + @test success(run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))) + end end end end From 014f8de069a49d913e03c3f9c3913d581037bdf8 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 27 Jun 2023 23:44:50 -0400 Subject: [PATCH 248/290] Align meaning for effects and IR flags (#50313) This fixes a longstanding todo where the IR_FLAG_EFFECT_FREE flag actually required both :effect_free and :nothrow. After this PR, it is equivalent to :effect_free only. The mismatch in meaning here caused #50311. `Symbol(::String)` is :effect_free, but not :nothrow. As a result, setting IR_FLAG_EFFECT_FREE on it was not legal. Later, irinterp did discover that it was nothrow and set IR_FLAG_NOTHROW, but did not have sufficient information to know that it was also :effect_free, so it could not set that flag. With this PR, IR_FLAG_EFFECT_FREE is set early in inference, so once irinterp discovers IR_FLAG_NOTHROW, the call becomes DCE-eligible as desired. Fixes #50311. --- base/boot.jl | 2 ++ base/compiler/abstractinterpretation.jl | 37 +++++++++++++++---------- base/compiler/ssair/inlining.jl | 25 +++++++++-------- base/compiler/ssair/ir.jl | 5 ++-- base/compiler/ssair/irinterp.jl | 2 +- base/compiler/ssair/passes.jl | 8 +++--- test/compiler/effects.jl | 5 ++++ 7 files changed, 50 insertions(+), 34 deletions(-) diff --git a/base/boot.jl b/base/boot.jl index 6698d4360cc7d..78b7daaf47d64 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -510,9 +510,11 @@ end) function Symbol(s::String) @_foldable_meta + @noinline return _Symbol(ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s), sizeof(s), s) end function Symbol(a::Array{UInt8,1}) + @noinline return _Symbol(ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a), Intrinsics.arraylen(a), a) end Symbol(s::Symbol) = s diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 3aa2366b48aa3..5c8e8fc22efd6 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2280,17 +2280,33 @@ struct RTEffects RTEffects(@nospecialize(rt), effects::Effects) = new(rt, effects) end +function mark_curr_effect_flags!(sv::AbsIntState, effects::Effects) + if isa(sv, InferenceState) + if is_effect_free(effects) + add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + else + sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + end + if is_nothrow(effects) + add_curr_ssaflag!(sv, IR_FLAG_NOTHROW) + else + sub_curr_ssaflag!(sv, IR_FLAG_NOTHROW) + end + if is_consistent(effects) + add_curr_ssaflag!(sv, IR_FLAG_CONSISTENT) + else + sub_curr_ssaflag!(sv, IR_FLAG_CONSISTENT) + end + end +end + function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState) si = StmtInfo(!call_result_unused(sv, sv.currpc)) (; rt, effects, info) = abstract_call(interp, arginfo, si, sv) sv.stmt_info[sv.currpc] = info # mark this call statement as DCE-elgible # TODO better to do this in a single pass based on the `info` object at the end of abstractinterpret? - if is_removable_if_unused(effects) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - else - sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - end + mark_curr_effect_flags!(sv, effects) return RTEffects(rt, effects) end @@ -2429,14 +2445,7 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp elseif ehead === :foreigncall (; rt, effects) = abstract_eval_foreigncall(interp, e, vtypes, sv) t = rt - if isa(sv, InferenceState) - # mark this call statement as DCE-elgible - if is_removable_if_unused(effects) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - else - sub_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) - end - end + mark_curr_effect_flags!(sv, effects) elseif ehead === :cfunction effects = EFFECTS_UNKNOWN t = e.args[1] @@ -2558,7 +2567,7 @@ end function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState) if !isa(e, Expr) if isa(e, PhiNode) - add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE) + add_curr_ssaflag!(sv, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) return abstract_eval_phi(interp, e, vtypes, sv) end return abstract_eval_special_value(interp, e, vtypes, sv) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index c3ba033efa356..170725f231761 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -370,7 +370,7 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod if !validate_sparams(sparam_vals) # N.B. This works on the caller-side argexprs, (i.e. before the va fixup below) sp_ssa = insert_node!( - effect_free(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline))) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -426,7 +426,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector inline_compact.result[idx′][:type] = argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact) # Everything legal in value position is guaranteed to be effect free in stmt position - inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE + inline_compact.result[idx′][:flag] = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW break end inline_compact[idx′] = stmt′ @@ -702,7 +702,7 @@ function batch_inline!(ir::IRCode, todo::Vector{Pair{Int,Any}}, propagate_inboun for aidx in 1:length(argexprs) aexpr = argexprs[aidx] if isa(aexpr, Expr) || isa(aexpr, GlobalRef) - ninst = effect_free(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line])) + ninst = effect_free_and_nothrow(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line])) argexprs[aidx] = insert_node_here!(compact, ninst) end end @@ -992,9 +992,10 @@ function flags_for_effects(effects::Effects) if is_consistent(effects) flags |= IR_FLAG_CONSISTENT end - if is_removable_if_unused(effects) - flags |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW - elseif is_nothrow(effects) + if is_effect_free(effects) + flags |= IR_FLAG_EFFECT_FREE + end + if is_nothrow(effects) flags |= IR_FLAG_NOTHROW end return flags @@ -1650,7 +1651,7 @@ function inline_const_if_inlineable!(inst::Instruction) inst[:inst] = quoted(rt.val) return true end - inst[:flag] |= IR_FLAG_EFFECT_FREE + inst[:flag] |= IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW return false end @@ -1773,7 +1774,7 @@ function late_inline_special_case!( return SomeCase(quoted(type.val)) end cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3]) - cmp_call_ssa = insert_node!(ir, idx, effect_free(NewInstruction(cmp_call, Bool))) + cmp_call_ssa = insert_node!(ir, idx, effect_free_and_nothrow(NewInstruction(cmp_call, Bool))) not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa) return SomeCase(not_call) elseif length(argtypes) == 3 && istopfunction(f, :(>:)) @@ -1816,13 +1817,13 @@ end function insert_spval!(insert_node!::Inserter, spvals_ssa::SSAValue, spidx::Int, do_isdefined::Bool) ret = insert_node!( - effect_free(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core._svec_ref, false, spvals_ssa, spidx), Any))) tcheck_not = nothing if do_isdefined tcheck = insert_node!( - effect_free(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool))) + effect_free_and_nothrow(NewInstruction(Expr(:call, Core.isa, ret, Core.TypeVar), Bool))) tcheck_not = insert_node!( - effect_free(NewInstruction(Expr(:call, not_int, tcheck), Bool))) + effect_free_and_nothrow(NewInstruction(Expr(:call, not_int, tcheck), Bool))) end return (ret, tcheck_not) end @@ -1849,7 +1850,7 @@ function ssa_substitute_op!(insert_node!::Inserter, subst_inst::Instruction, (ret, tcheck_not) = insert_spval!(insert_node!, spvals_ssa::SSAValue, spidx, maybe_undef) if maybe_undef insert_node!( - non_effect_free(NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing))) + NewInstruction(Expr(:throw_undef_if_not, val.name, tcheck_not), Nothing)) end return ret end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 699ee7ba98091..5c6751c1e1dda 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -316,8 +316,7 @@ function NewInstruction(inst::Instruction; return NewInstruction(stmt, type, info, line, flag) end @specialize -effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE)) -non_effect_free(newinst::NewInstruction) = NewInstruction(newinst; flag=sub_flag(newinst, IR_FLAG_EFFECT_FREE)) +effect_free_and_nothrow(newinst::NewInstruction) = NewInstruction(newinst; flag=add_flag(newinst, IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) with_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=add_flag(newinst, flags)) without_flags(newinst::NewInstruction, flags::UInt8) = NewInstruction(newinst; flag=sub_flag(newinst, flags)) function add_flag(newinst::NewInstruction, newflag::UInt8) @@ -1677,7 +1676,7 @@ function maybe_erase_unused!(callback::Function, compact::IncrementalCompact, id stmt = inst[:inst] stmt === nothing && return false inst[:type] === Bottom && return false - effect_free = (inst[:flag] & IR_FLAG_EFFECT_FREE) ≠ 0 + effect_free = (inst[:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW effect_free || return false foreachssa(stmt) do val::SSAValue if compact.used_ssas[val.id] == 1 diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index bafa70ab22b03..db3545ed1bbd3 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -162,7 +162,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union if rt !== nothing if isa(rt, Const) ir.stmts[idx][:type] = rt - if is_inlineable_constant(rt.val) && !isa(inst, PhiNode) && (ir.stmts[idx][:flag] & IR_FLAG_EFFECT_FREE) != 0 + if is_inlineable_constant(rt.val) && !isa(inst, PhiNode) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW ir.stmts[idx][:inst] = quoted(rt.val) end return true diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index f2ef2e9d47ee1..dc983f7a53cf4 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -472,7 +472,7 @@ function lift_arg!( end end if isa(lifted, GlobalRef) || isa(lifted, Expr) - lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, argextype(lifted, compact)))) + lifted = insert_node!(compact, leaf, effect_free_and_nothrow(NewInstruction(lifted, argextype(lifted, compact)))) compact[leaf] = nothing stmt.args[argidx] = lifted compact[leaf] = stmt @@ -718,7 +718,7 @@ function perform_lifting!(compact::IncrementalCompact, end if isa(old_node, PhiNode) new_node = PhiNode() - ssa = insert_node!(compact, old_ssa, effect_free(NewInstruction(new_node, result_t))) + ssa = insert_node!(compact, old_ssa, effect_free_and_nothrow(NewInstruction(new_node, result_t))) lifted_philikes[i] = LiftedPhilike(ssa, new_node, true) else @assert is_known_call(old_node, Core.ifelse, compact) @@ -1110,8 +1110,8 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing) def_val = perform_lifting!(compact, visited_philikes, field, def_lifting_cache, Bool, lifted_leaves_def, val, lazydomtree).val end - insert_node!(compact, SSAValue(idx), non_effect_free(NewInstruction( - Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing))) + insert_node!(compact, SSAValue(idx), NewInstruction( + Expr(:throw_undef_if_not, Symbol("##getfield##"), def_val), Nothing)) else # val must be defined diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl index 65719f4a5f27d..a4b21da523a8e 100644 --- a/test/compiler/effects.jl +++ b/test/compiler/effects.jl @@ -993,3 +993,8 @@ end hf50198(s) = hasfield(typeof((;x=1, y=2)), s) f50198() = (hf50198(Ref(:x)[]); nothing) @test fully_eliminated(f50198) + +# Effects properly applied to flags by irinterp (#50311) +f50311(x, s) = Symbol(s) +g50311(x) = Val{f50311((1.0, x), "foo")}() +@test fully_eliminated(g50311, Tuple{Float64}) From 5c070f4ac3ed3238003d4ecc8065b12685fdd0e8 Mon Sep 17 00:00:00 2001 From: Aravindh Krishnamoorthy Date: Wed, 28 Jun 2023 07:29:00 +0200 Subject: [PATCH 249/290] Rework symmetric generalized `eigen`/`eigvals` (#49673) --- NEWS.md | 5 ++ stdlib/LinearAlgebra/src/diagonal.jl | 7 +- stdlib/LinearAlgebra/src/eigen.jl | 4 +- stdlib/LinearAlgebra/src/symmetriceigen.jl | 45 +++++++++---- stdlib/LinearAlgebra/test/symmetriceigen.jl | 72 +++++++++++++++++++++ 5 files changed, 116 insertions(+), 17 deletions(-) create mode 100644 stdlib/LinearAlgebra/test/symmetriceigen.jl diff --git a/NEWS.md b/NEWS.md index d73373d95d26e..5dd9f2999de5c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -92,6 +92,11 @@ Standard library changes (real symmetric) part of a matrix ([#31836]). * The `norm` of the adjoint or transpose of an `AbstractMatrix` now returns the norm of the parent matrix by default, matching the current behaviour for `AbstractVector`s ([#49020]). +* `eigen(A, B)` and `eigvals(A, B)`, where one of `A` or `B` is symmetric or Hermitian, + are now fully supported ([#49533]) +* `eigvals/eigen(A, cholesky(B))` now computes the generalized eigenvalues (`eigen`: and eigenvectors) + of `A` and `B` via Cholesky decomposition for positive definite `B`. Note: The second argument is + the output of `cholesky`. #### Printf * Format specifiers now support dynamic width and precision, e.g. `%*s` and `%*.*g` ([#40105]). diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl index fb605a57ab5c6..29c190e87df72 100644 --- a/stdlib/LinearAlgebra/src/diagonal.jl +++ b/stdlib/LinearAlgebra/src/diagonal.jl @@ -796,12 +796,11 @@ function eigen(A::AbstractMatrix, D::Diagonal; sortby::Union{Function,Nothing}=n end if size(A, 1) == size(A, 2) && isdiag(A) return eigen(Diagonal(A), D; sortby) - elseif ishermitian(A) + elseif all(isposdef, D.diag) S = promote_type(eigtype(eltype(A)), eltype(D)) - return eigen!(eigencopy_oftype(Hermitian(A), S), Diagonal{S}(D); sortby) + return eigen(A, cholesky(Diagonal{S}(D)); sortby) else - S = promote_type(eigtype(eltype(A)), eltype(D)) - return eigen!(eigencopy_oftype(A, S), Diagonal{S}(D); sortby) + return eigen!(D \ A; sortby) end end diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl index 185061b0a3a7d..489bfa4665c7a 100644 --- a/stdlib/LinearAlgebra/src/eigen.jl +++ b/stdlib/LinearAlgebra/src/eigen.jl @@ -524,7 +524,7 @@ true """ function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) + eigen!(copy_similar(A, S), copy_similar(B, S); kws...) end eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1)) @@ -619,7 +619,7 @@ julia> eigvals(A,B) """ function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) + return eigvals!(copy_similar(A, S), copy_similar(B, S); kws...) end """ diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl index 17371b74bb343..bafeb50f35459 100644 --- a/stdlib/LinearAlgebra/src/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl @@ -156,6 +156,11 @@ end eigmax(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, size(A, 1):size(A, 1))[1] eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1] +function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} + S = promote_type(eigtype(TA), TB) + return eigen!(eigencopy_oftype{S}(A), eigencopy_oftype(B, S); kws...) +end + function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data')) GeneralizedEigen(sorteig!(vals, vecs, sortby)...) @@ -164,26 +169,32 @@ function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Not vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data')) GeneralizedEigen(sorteig!(vals, vecs, sortby)...) end -function eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, B::AbstractMatrix{T}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} - return _choleigen!(A, B, sortby) -end -function eigen!(A::StridedMatrix{T}, B::Union{RealHermSymComplexHerm{T},Diagonal{T}}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} - return _choleigen!(A, B, sortby) + +function eigen(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + if ishermitian(A) + eigen!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby) + else + eigen!(copy_similar(A, eigtype(eltype(A))), C; sortby) + end end -function _choleigen!(A, B, sortby) - U = cholesky(B).U - vals, w = eigen!(UtiAUi!(A, U)) - vecs = U \ w +function eigen!(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + # Cholesky decomposition based eigenvalues and eigenvectors + vals, w = eigen!(UtiAUi!(A, C.U)) + vecs = C.U \ w GeneralizedEigen(sorteig!(vals, vecs, sortby)...) end # Perform U' \ A / U in-place, where U::Union{UpperTriangular,Diagonal} -UtiAUi!(A::StridedMatrix, U) = _UtiAUi!(A, U) +UtiAUi!(A, U) = _UtiAUi!(A, U) UtiAUi!(A::Symmetric, U) = Symmetric(_UtiAUi!(copytri!(parent(A), A.uplo), U), sym_uplo(A.uplo)) UtiAUi!(A::Hermitian, U) = Hermitian(_UtiAUi!(copytri!(parent(A), A.uplo, true), U), sym_uplo(A.uplo)) - _UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U) +function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} + S = promote_type(eigtype(TA), TB) + return eigen!(eigencopy_oftype{S}(A), eigencopy_oftype(B, S); kws...) +end + function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1] isnothing(sortby) || sort!(vals, by=sortby) @@ -195,3 +206,15 @@ function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,N return vals end eigvecs(A::HermOrSym) = eigvecs(eigen(A)) + +function eigvals(A::AbstractMatrix, C::Cholesky; sortby::Union{Function,Nothing}=nothing) + if ishermitian(A) + eigvals!(eigencopy_oftype(Hermitian(A), eigtype(eltype(A))), C; sortby) + else + eigvals!(copy_similar(A, eigtype(eltype(A))), C; sortby) + end +end +function eigvals!(A::AbstractMatrix{T}, C::Cholesky{T, <:AbstractMatrix}; sortby::Union{Function,Nothing}=nothing) where {T<:Number} + # Cholesky decomposition based eigenvalues + return eigvals!(UtiAUi!(A, C.U); sortby) +end diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl new file mode 100644 index 0000000000000..6744db7c477ad --- /dev/null +++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl @@ -0,0 +1,72 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module TestSymmetricEigen + +using Test, LinearAlgebra + +@testset "chol-eigen-eigvals" begin + ## Cholesky decomposition based + + # eigenvalue sorting + sf = x->(real(x),imag(x)) + + ## Real valued + A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4] + H = (A+A')/2 + B = Float64[2 1 4 3; 0 3 1 3; 3 1 0 0; 0 1 3 1] + BH = (B+B')/2 + # PD matrix + BPD = B*B' + # eigen + C = cholesky(BPD) + e,v = eigen(A, C; sortby=sf) + @test A*v ≈ BPD*v*Diagonal(e) + # eigvals + @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf) + + ## Complex valued + A = [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im] + AH = (A+A')/2 + B = [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im] + BH = (B+B')/2 + # PD matrix + BPD = B*B' + # eigen + C = cholesky(BPD) + e,v = eigen(A, C; sortby=sf) + @test A*v ≈ BPD*v*Diagonal(e) + # eigvals + @test eigvals(A, BPD; sortby=sf) ≈ eigvals(A, C; sortby=sf) +end + +@testset "issue #49533" begin + ## Real valued + A = Float64[1 1 0 0; 1 2 1 0; 0 1 3 1; 0 0 1 4] + B = Matrix(Diagonal(Float64[1:4;])) + # eigen + e0,v0 = eigen(A, B) + e1,v1 = eigen(A, Symmetric(B)) + e2,v2 = eigen(Symmetric(A), B) + @test e0 ≈ e1 && v0 ≈ v1 + @test e0 ≈ e2 && v0 ≈ v2 + # eigvals + @test eigvals(A, B) ≈ eigvals(A, Symmetric(B)) + @test eigvals(A, B) ≈ eigvals(Symmetric(A), B) + + ## Complex valued + A = [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im] + AH = (A+A')/2 + B = [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im] + BH = (B+B')/2 + # eigen + sf = x->(real(x),imag(x)) + e1,v1 = eigen(A, Hermitian(BH)) + e2,v2 = eigen(Hermitian(AH), B) + @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1) + @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2) + # eigvals + @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf) + @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf) +end + +end # module TestSymmetricEigen From 196956dd879eedafa08a56173408b96e468c83c5 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 28 Jun 2023 06:08:24 -0500 Subject: [PATCH 250/290] Replace `julia` with Julia in faq when referring to the language. (#50103) --- doc/src/manual/faq.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index 85b31bf20a99f..b2d6bdf5176d1 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -22,7 +22,7 @@ On the other hand, language *interoperability* is extremely useful: we want to e ### How does Julia define its public API? -The only interfaces that are stable with respect to [SemVer](https://semver.org/) of `julia` +The only interfaces that are stable with respect to [SemVer](https://semver.org/) of Julia version are the Julia `Base` and standard libraries interfaces described in [the documentation](https://docs.julialang.org/) and not marked as unstable (e.g., experimental and internal). Functions, types, and constants are not part of the public @@ -36,8 +36,8 @@ a complex non-public API, especially when using it from a stable package, it is to open an [issue](https://github.com/JuliaLang/julia/issues) or [pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it into a public API. However, we do not discourage the attempt to create packages that expose -stable public interfaces while relying on non-public implementation details of `julia` and -buffering the differences across different `julia` versions. +stable public interfaces while relying on non-public implementation details of Julia and +buffering the differences across different Julia versions. ### The documentation is not accurate enough. Can I rely on the existing behavior? From 850dd878979aee36b23508b8b69ade8f7bfc5fe9 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Wed, 28 Jun 2023 11:18:31 -0500 Subject: [PATCH 251/290] Reword "how does Julia define its public API" [NFC] (#50324) --- doc/src/manual/faq.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md index b2d6bdf5176d1..bdecb5ecf106f 100644 --- a/doc/src/manual/faq.md +++ b/doc/src/manual/faq.md @@ -22,11 +22,11 @@ On the other hand, language *interoperability* is extremely useful: we want to e ### How does Julia define its public API? -The only interfaces that are stable with respect to [SemVer](https://semver.org/) of Julia -version are the Julia `Base` and standard libraries interfaces described in -[the documentation](https://docs.julialang.org/) and not marked as unstable (e.g., -experimental and internal). Functions, types, and constants are not part of the public -API if they are not included in the documentation, _even if they have docstrings_. +Julia `Base` and standard library functionality described in the +[the documentation](https://docs.julialang.org/) that is not marked as unstable +(e.g. experimental and internal) is covered by [SemVer](https://semver.org/). +Functions, types, and constants are not part of the public API if they are not +included in the documentation, _even if they have docstrings_. ### There is a useful undocumented function/type/constant. Can I use it? From 00191c02dbd2b7e1d9915bcda4f9d628153d0a59 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 28 Jun 2023 22:22:00 +0200 Subject: [PATCH 252/290] Revert "Add mutating `stat!` function for non-allocating filesystem `stat`" (#50323) --- base/stat.jl | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/base/stat.jl b/base/stat.jl index 84c5c33a5fdf7..81f9dcfd20191 100644 --- a/base/stat.jl +++ b/base/stat.jl @@ -144,13 +144,14 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false # stat & lstat functions -macro stat_call!(stat_buf, sym, arg1type, arg) +macro stat_call(sym, arg1type, arg) return quote - r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), $(esc(stat_buf))) + stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) + r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf) if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)) uv_error(string("stat(", repr($(esc(arg))), ")"), r) end - st = StatStruct($(esc(arg)), $(esc(stat_buf))) + st = StatStruct($(esc(arg)), stat_buf) if ispath(st) != (r == 0) error("stat returned zero type for a valid path") end @@ -158,30 +159,13 @@ macro stat_call!(stat_buf, sym, arg1type, arg) end end -""" - stat!(stat_buf::Vector{UInt8}, file) - -Like [`stat`](@ref), but avoids internal allocations by using a pre-allocated buffer, -`stat_buf`. For a small performance gain over `stat`, consecutive calls to `stat!` can use -the same `stat_buf`. See also [`Base.Filesystem.get_stat_buf`](@ref). -""" -stat!(stat_buf::Vector{UInt8}, fd::OS_HANDLE) = @stat_call! stat_buf jl_fstat OS_HANDLE fd -stat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_stat Cstring path -lstat!(stat_buf::Vector{UInt8}, path::AbstractString) = @stat_call! stat_buf jl_lstat Cstring path +stat(fd::OS_HANDLE) = @stat_call jl_fstat OS_HANDLE fd +stat(path::AbstractString) = @stat_call jl_stat Cstring path +lstat(path::AbstractString) = @stat_call jl_lstat Cstring path if RawFD !== OS_HANDLE - global stat!(stat_buf::Vector{UInt8}, fd::RawFD) = stat!(stat_buf, Libc._get_osfhandle(fd)) + global stat(fd::RawFD) = stat(Libc._get_osfhandle(fd)) end -stat!(stat_buf::Vector{UInt8}, fd::Integer) = stat!(stat_buf, RawFD(fd)) - -stat(x) = stat!(get_stat_buf(), x) -lstat(x) = lstat!(get_stat_buf(), x) - -""" - get_stat_buf() - -Return a buffer of bytes of the right size for [`stat!`](@ref). -""" -get_stat_buf() = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ()))) +stat(fd::Integer) = stat(RawFD(fd)) """ stat(file) From d67b899158374bc9e69d0bc509b10355c66cfb87 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Wed, 28 Jun 2023 17:25:27 -0300 Subject: [PATCH 253/290] initialize jl_n_markthreads and jl_n_sweepthreads to be consistent with no parallel GC on bootstrap (#50332) --- src/init.c | 2 ++ src/julia.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/init.c b/src/init.c index 02769e03c668e..7cae40d2a6906 100644 --- a/src/init.c +++ b/src/init.c @@ -872,6 +872,8 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ if (jl_base_module == NULL) { // nthreads > 1 requires code in Base jl_atomic_store_relaxed(&jl_n_threads, 1); + jl_n_markthreads = 0; + jl_n_sweepthreads = 0; jl_n_gcthreads = 0; } jl_start_threads(); diff --git a/src/julia.h b/src/julia.h index 694a8d81b06e9..d2eb9a98a4a42 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1766,6 +1766,8 @@ JL_DLLIMPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT; extern JL_DLLIMPORT int jl_n_threadpools; extern JL_DLLIMPORT _Atomic(int) jl_n_threads; extern JL_DLLIMPORT int jl_n_gcthreads; +extern int jl_n_markthreads; +extern int jl_n_sweepthreads; extern JL_DLLIMPORT int *jl_n_threads_per_pool; // environment entries From 4e0da0d581035f4ff4521a07cbb67a4dd13b9155 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 29 Jun 2023 07:09:56 +0900 Subject: [PATCH 254/290] fix `compatible_vatuple` (#50331) Seemingly `vab` has been computed wrongly. --- base/compiler/typeutils.jl | 9 ++++----- test/compiler/inference.jl | 7 +++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl index cff10b02ceafc..2ecc077228264 100644 --- a/base/compiler/typeutils.jl +++ b/base/compiler/typeutils.jl @@ -139,12 +139,12 @@ valid_tparam(@nospecialize(x)) = valid_typeof_tparam(typeof(x)) function compatible_vatuple(a::DataType, b::DataType) vaa = a.parameters[end] - vab = a.parameters[end] + vab = b.parameters[end] if !(isvarargtype(vaa) && isvarargtype(vab)) return isvarargtype(vaa) == isvarargtype(vab) end - (isdefined(vaa, :N) == isdefined(vab, :N)) || return false - !isdefined(vaa, :N) && return true + isdefined(vaa, :N) || return !isdefined(vab, :N) + isdefined(vab, :N) || return false return vaa.N === vab.N end @@ -163,8 +163,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b), max_union_splitting::I elseif a isa DataType ub = unwrap_unionall(b) if ub isa DataType - if a.name === ub.name === Tuple.name && - length(a.parameters) == length(ub.parameters) + if a.name === ub.name === Tuple.name && length(a.parameters) == length(ub.parameters) if 1 < unionsplitcost(JLTypeLattice(), a.parameters) <= max_union_splitting ta = switchtupleunion(a) return typesubtract(Union{ta...}, b, 0) diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 9511abfb6d202..ded9438037733 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -3499,9 +3499,16 @@ end Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}} # Test that these don't throw @test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}} +@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Int}}, 0) == Union{} +@test Core.Compiler.typesubtract(Tuple{String,Int}, Tuple{String,Vararg{Int}}, 0) == Union{} +@test Core.Compiler.typesubtract(Tuple{String,Vararg{Int}}, Tuple{String,Int}, 0) == Tuple{String,Vararg{Int}} @test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real} @test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}} +@test Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Vararg{Int}}) +@test !Core.Compiler.compatible_vatuple(Tuple{String,Int}, Tuple{String,Vararg{Int}}) +@test !Core.Compiler.compatible_vatuple(Tuple{String,Vararg{Int}}, Tuple{String,Int}) + @test Base.return_types(Issue35566.f) == [Val{:expected}] # constant prop through keyword arguments From 663c58d00dc2c16b10f8d7e8210c97bfadd0ee1f Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 29 Jun 2023 00:12:17 +0200 Subject: [PATCH 255/290] Change SIMD Loop from Fast to only reassoc/contract (#49405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses #49387 Co-authored-by: Mosè Giordano --- NEWS.md | 4 ++++ base/simdloop.jl | 2 +- src/llvm-muladd.cpp | 8 ++++---- src/llvm-simdloop.cpp | 3 ++- test/llvmpasses/loopinfo.jl | 6 +++--- test/llvmpasses/simdloop.ll | 4 ++-- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5dd9f2999de5c..50418ffe7309b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,10 @@ Language changes that significantly improves load and inference times for heavily overloaded methods that dispatch on Types (such as traits and constructors). * The "h bar" `ℏ` (`\hslash` U+210F) character is now treated as equivalent to `ħ` (`\hbar` U+0127). +* The `@simd` macro now has a more limited and clearer semantics, it only enables reordering and contraction + of floating-point operations, instead of turning on all "fastmath" optimizations. + If you observe performance regressions due to this change, you can recover previous behavior with `@fastmath @simd`, + if you are OK with all the optimizations enabled by the `@fastmath` macro. ([#49405]) * When a method with keyword arguments is displayed in the stack trace view, the textual representation of the keyword arguments' types is simplified using the new `@Kwargs{key1::Type1, ...}` macro syntax ([#49959]). diff --git a/base/simdloop.jl b/base/simdloop.jl index 29e2382cf39aa..797b77ed75a99 100644 --- a/base/simdloop.jl +++ b/base/simdloop.jl @@ -100,7 +100,7 @@ The object iterated over in a `@simd for` loop should be a one-dimensional range By using `@simd`, you are asserting several properties of the loop: * It is safe to execute iterations in arbitrary or overlapping order, with special consideration for reduction variables. -* Floating-point operations on reduction variables can be reordered, possibly causing different results than without `@simd`. +* Floating-point operations on reduction variables can be reordered or contracted, possibly causing different results than without `@simd`. In many cases, Julia is able to automatically vectorize inner for loops without the use of `@simd`. Using `@simd` gives the compiler a little extra leeway to make it possible in more situations. In diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp index 98e56e344f7af..29c0f7e2b10d6 100644 --- a/src/llvm-muladd.cpp +++ b/src/llvm-muladd.cpp @@ -40,10 +40,10 @@ STATISTIC(TotalContracted, "Total number of multiplies marked for FMA"); * Combine * ``` * %v0 = fmul ... %a, %b - * %v = fadd fast ... %v0, %c + * %v = fadd contract ... %v0, %c * ``` * to - * `%v = call fast @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` + * `%v = call contract @llvm.fmuladd.<...>(... %a, ... %b, ... %c)` * when `%v0` has no other use */ @@ -87,13 +87,13 @@ static bool combineMulAdd(Function &F) JL_NOTSAFEPOINT it++; switch (I.getOpcode()) { case Instruction::FAdd: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; } case Instruction::FSub: { - if (!I.isFast()) + if (!I.hasAllowContract()) continue; modified |= checkCombine(I.getOperand(0), ORE) || checkCombine(I.getOperand(1), ORE); break; diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index 9a7f61410ba1d..21e2ec574d650 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -149,7 +149,8 @@ static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L, OptimizationRe return OptimizationRemark(DEBUG_TYPE, "MarkedUnsafeAlgebra", *K) << "marked unsafe algebra on " << ore::NV("Instruction", *K); }); - (*K)->setFast(true); + (*K)->setHasAllowReassoc(true); + (*K)->setHasAllowContract(true); ++length; } ReductionChainLength += length; diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl index 18661ea6fde67..b9b388c73d0c5 100644 --- a/test/llvmpasses/loopinfo.jl +++ b/test/llvmpasses/loopinfo.jl @@ -29,10 +29,10 @@ function simdf(X) acc += x # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO:![0-9]+]] # LOWER-NOT: llvm.mem.parallel_loop_access -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER-NOT: call void @julia.loopinfo_marker() # LOWER: br {{.*}}, !llvm.loop [[LOOPID:![0-9]+]] -# FINAL: fadd fast <{{(vscale x )?}}{{[0-9]+}} x double> +# FINAL: fadd reassoc contract <{{(vscale x )?}}{{[0-9]+}} x double> end acc end @@ -46,7 +46,7 @@ function simdf2(X) # CHECK: call void @julia.loopinfo_marker(), {{.*}}, !julia.loopinfo [[LOOPINFO2:![0-9]+]] # LOWER: llvm.mem.parallel_loop_access # LOWER-NOT: call void @julia.loopinfo_marker() -# LOWER: fadd fast double +# LOWER: fadd reassoc contract double # LOWER: br {{.*}}, !llvm.loop [[LOOPID2:![0-9]+]] end acc diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll index bc4b2da007dc2..929fbeea2c3f5 100644 --- a/test/llvmpasses/simdloop.ll +++ b/test/llvmpasses/simdloop.ll @@ -40,7 +40,7 @@ loop: ; CHECK: llvm.mem.parallel_loop_access %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !3 %done = icmp sgt i64 %nexti, 500 @@ -59,7 +59,7 @@ loop: %aptr = getelementptr double, double *%a, i64 %i %aval = load double, double *%aptr %nextv = fsub double %v, %aval -; CHECK: fsub fast double %v, %aval +; CHECK: fsub reassoc contract double %v, %aval %nexti = add i64 %i, 1 call void @julia.loopinfo_marker(), !julia.loopinfo !2 %done = icmp sgt i64 %nexti, 500 From cf34aa28807185bc30e6d38bffcc4e48f8cb134f Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Wed, 28 Jun 2023 22:57:40 -0400 Subject: [PATCH 256/290] Round-trip reinterpret of all isbits types (#47116) Hiding padding bytes in the process, to avoid undefined behavior if those are observed. Co-authored-by: Andy Ferris Co-authored-by: Jameson Nash Co-authored-by: Sukera <11753998+Seelengrab@users.noreply.github.com> --- base/reinterpretarray.jl | 140 ++++++++++++++++++++++++++++++++++++--- test/core.jl | 2 +- test/numbers.jl | 8 +-- test/reinterpretarray.jl | 30 +++++++-- 4 files changed, 161 insertions(+), 19 deletions(-) diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl index 830bac90d86e9..d33c127b78c76 100644 --- a/base/reinterpretarray.jl +++ b/base/reinterpretarray.jl @@ -651,8 +651,8 @@ end # Padding struct Padding - offset::Int - size::Int + offset::Int # 0-indexed offset of the next valid byte; sizeof(T) indicates trailing padding + size::Int # bytes of padding before a valid byte end function intersect(p1::Padding, p2::Padding) start = max(p1.offset, p2.offset) @@ -696,20 +696,24 @@ function iterate(cp::CyclePadding, state::Tuple) end """ - Compute the location of padding in a type. + Compute the location of padding in an isbits datatype. Recursive over the fields of that type. """ -function padding(T) - padding = Padding[] - last_end::Int = 0 +@assume_effects :foldable function padding(T::DataType, baseoffset::Int = 0) + pads = Padding[] + last_end::Int = baseoffset for i = 1:fieldcount(T) - offset = fieldoffset(T, i) + offset = baseoffset + Int(fieldoffset(T, i)) fT = fieldtype(T, i) + append!(pads, padding(fT, offset)) if offset != last_end - push!(padding, Padding(offset, offset-last_end)) + push!(pads, Padding(offset, offset-last_end)) end last_end = offset + sizeof(fT) end - padding + if 0 < last_end - baseoffset < sizeof(T) + push!(pads, Padding(baseoffset + sizeof(T), sizeof(T) - last_end + baseoffset)) + end + return Core.svec(pads...) end function CyclePadding(T::DataType) @@ -748,6 +752,124 @@ end return true end +@assume_effects :foldable function struct_subpadding(::Type{Out}, ::Type{In}) where {Out, In} + padding(Out) == padding(In) +end + +@assume_effects :foldable function packedsize(::Type{T}) where T + pads = padding(T) + return sizeof(T) - sum((p.size for p ∈ pads), init = 0) +end + +@assume_effects :foldable ispacked(::Type{T}) where T = isempty(padding(T)) + +function _copytopacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In} + writeoffset = 0 + for i ∈ 1:fieldcount(In) + readoffset = fieldoffset(In, i) + fT = fieldtype(In, i) + if ispacked(fT) + readsize = sizeof(fT) + memcpy(ptr_out + writeoffset, ptr_in + readoffset, readsize) + writeoffset += readsize + else # nested padded type + _copytopacked!(ptr_out + writeoffset, Ptr{fT}(ptr_in + readoffset)) + writeoffset += packedsize(fT) + end + end +end + +function _copyfrompacked!(ptr_out::Ptr{Out}, ptr_in::Ptr{In}) where {Out, In} + readoffset = 0 + for i ∈ 1:fieldcount(Out) + writeoffset = fieldoffset(Out, i) + fT = fieldtype(Out, i) + if ispacked(fT) + writesize = sizeof(fT) + memcpy(ptr_out + writeoffset, ptr_in + readoffset, writesize) + readoffset += writesize + else # nested padded type + _copyfrompacked!(Ptr{fT}(ptr_out + writeoffset), ptr_in + readoffset) + readoffset += packedsize(fT) + end + end +end + +""" + reinterpret(::Type{Out}, x::In) + +Reinterpret the valid non-padding bytes of an isbits value `x` as isbits type `Out`. + +Both types must have the same amount of non-padding bytes. This operation is guaranteed +to be reversible. + +```jldoctest +julia> reinterpret(NTuple{2, UInt8}, 0x1234) +(0x34, 0x12) + +julia> reinterpret(UInt16, (0x34, 0x12)) +0x1234 + +julia> reinterpret(Tuple{UInt16, UInt8}, (0x01, 0x0203)) +(0x0301, 0x02) +``` + +!!! warning + + Use caution if some combinations of bits in `Out` are not considered valid and would + otherwise be prevented by the type's constructors and methods. Unexpected behavior + may result without additional validation. +""" +@inline function reinterpret(::Type{Out}, x::In) where {Out, In} + isbitstype(Out) || throw(ArgumentError("Target type for `reinterpret` must be isbits")) + isbitstype(In) || throw(ArgumentError("Source type for `reinterpret` must be isbits")) + if isprimitivetype(Out) && isprimitivetype(In) + outsize = sizeof(Out) + insize = sizeof(In) + outsize == insize || + throw(ArgumentError("Sizes of types $Out and $In do not match; got $outsize \ + and $insize, respectively.")) + return bitcast(Out, x) + end + inpackedsize = packedsize(In) + outpackedsize = packedsize(Out) + inpackedsize == outpackedsize || + throw(ArgumentError("Packed sizes of types $Out and $In do not match; got $outpackedsize \ + and $inpackedsize, respectively.")) + in = Ref{In}(x) + out = Ref{Out}() + if struct_subpadding(Out, In) + # if packed the same, just copy + GC.@preserve in out begin + ptr_in = unsafe_convert(Ptr{In}, in) + ptr_out = unsafe_convert(Ptr{Out}, out) + memcpy(ptr_out, ptr_in, sizeof(Out)) + end + return out[] + else + # mismatched padding + GC.@preserve in out begin + ptr_in = unsafe_convert(Ptr{In}, in) + ptr_out = unsafe_convert(Ptr{Out}, out) + + if fieldcount(In) > 0 && ispacked(Out) + _copytopacked!(ptr_out, ptr_in) + elseif fieldcount(Out) > 0 && ispacked(In) + _copyfrompacked!(ptr_out, ptr_in) + else + packed = Ref{NTuple{inpackedsize, UInt8}}() + GC.@preserve packed begin + ptr_packed = unsafe_convert(Ptr{NTuple{inpackedsize, UInt8}}, packed) + _copytopacked!(ptr_packed, ptr_in) + _copyfrompacked!(ptr_out, ptr_packed) + end + end + end + return out[] + end +end + + # Reductions with IndexSCartesian2 function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K} diff --git a/test/core.jl b/test/core.jl index a87c45b698e49..f0439afeed23c 100644 --- a/test/core.jl +++ b/test/core.jl @@ -1897,7 +1897,7 @@ function f4528(A, B) end end @test f4528(false, Int32(12)) === nothing -@test_throws ErrorException f4528(true, Int32(12)) +@test_throws ArgumentError f4528(true, Int32(12)) # issue #4518 f4518(x, y::Union{Int32,Int64}) = 0 diff --git a/test/numbers.jl b/test/numbers.jl index b1523d690de99..d7fd6531b157d 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -2216,13 +2216,11 @@ end @test round(Int16, -32768.1) === Int16(-32768) end # issue #7508 -@test_throws ErrorException reinterpret(Int, 0x01) +@test_throws ArgumentError reinterpret(Int, 0x01) @testset "issue #12832" begin - @test_throws ErrorException reinterpret(Float64, Complex{Int64}(1)) - @test_throws ErrorException reinterpret(Float64, ComplexF32(1)) - @test_throws ErrorException reinterpret(ComplexF32, Float64(1)) - @test_throws ErrorException reinterpret(Int32, false) + @test_throws ArgumentError reinterpret(Float64, Complex{Int64}(1)) + @test_throws ArgumentError reinterpret(Int32, false) end # issue #41 ndigf(n) = Float64(log(Float32(n))) diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl index fae4c6434e00d..501e9f4a9b57f 100644 --- a/test/reinterpretarray.jl +++ b/test/reinterpretarray.jl @@ -450,10 +450,10 @@ end SomeSingleton(x) = new() end - @test_throws ErrorException reinterpret(Int, nothing) - @test_throws ErrorException reinterpret(Missing, 3) - @test_throws ErrorException reinterpret(Missing, NotASingleton()) - @test_throws ErrorException reinterpret(NotASingleton, ()) + @test_throws ArgumentError reinterpret(Int, nothing) + @test_throws ArgumentError reinterpret(Missing, 3) + @test_throws ArgumentError reinterpret(Missing, NotASingleton()) + @test_throws ArgumentError reinterpret(NotASingleton, ()) @test_throws ArgumentError reinterpret(NotASingleton, fill(nothing, ())) @test_throws ArgumentError reinterpret(reshape, NotASingleton, fill(missing, 3)) @@ -513,3 +513,25 @@ end @test setindex!(x, SomeSingleton(:), 3, 5) == x2 @test_throws MethodError x[2,4] = nothing end + +# reinterpret of arbitrary bitstypes +@testset "Reinterpret arbitrary bitstypes" begin + struct Bytes15 + a::Int8 + b::Int16 + c::Int32 + d::Int64 + end + + @test reinterpret(Float64, ComplexF32(1, 1)) === 0.007812501848093234 + @test reinterpret(ComplexF32, 0.007812501848093234) === ComplexF32(1, 1) + @test reinterpret(Tuple{Float64, Float64}, ComplexF64(1, 1)) === (1.0, 1.0) + @test reinterpret(ComplexF64, (1.0, 1.0)) === ComplexF64(1, 1) + @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), Int32(0), 288233674686595584) + @test reinterpret(Tuple{Int8, Int16, Tuple{Int32, Int64}}, (Int64(1), Int32(2), Int16(3), Int8(4))) === (Int8(1), Int16(0), (Int32(0), 288233674686595584)) + @test reinterpret(Tuple{Int64, Int32, Int16, Int8}, (Int8(1), Int16(0), (Int32(0), 288233674686595584))) === (Int64(1), Int32(2), Int16(3), Int8(4)) + @test reinterpret(Tuple{Int8, Int16, Int32, Int64}, Bytes15(Int8(1), Int16(2), Int32(3), Int64(4))) === (Int8(1), Int16(2), Int32(3), Int64(4)) + @test reinterpret(Bytes15, (Int8(1), Int16(2), Int32(3), Int64(4))) == Bytes15(Int8(1), Int16(2), Int32(3), Int64(4)) + + @test_throws ArgumentError reinterpret(Tuple{Int32, Int64}, (Int16(1), Int64(4))) +end From cb6d0f2e53004c4a98988dcb8b89048026eecff9 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 29 Jun 2023 02:45:08 -0400 Subject: [PATCH 257/290] invokelatest docs should say not exported before 1.9 (#50341) --- base/essentials.jl | 3 +++ base/reflection.jl | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/base/essentials.jl b/base/essentials.jl index 0477ccd5b172a..97f32483a6b14 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -857,6 +857,9 @@ e.g. long-running event loops or callback functions that may call obsolete versions of a function `f`. (The drawback is that `invokelatest` is somewhat slower than calling `f` directly, and the type of the result cannot be inferred by the compiler.) + +!!! compat "Julia 1.9" + Prior to Julia 1.9, this function was not exported, and was called as `Base.invokelatest`. """ function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...) kwargs = merge(NamedTuple(), kwargs) diff --git a/base/reflection.jl b/base/reflection.jl index 5268beddeb8eb..02a927ba7b265 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -2157,7 +2157,7 @@ end """ @invokelatest f(args...; kwargs...) -Provides a convenient way to call [`Base.invokelatest`](@ref). +Provides a convenient way to call [`invokelatest`](@ref). `@invokelatest f(args...; kwargs...)` will simply be expanded into `Base.invokelatest(f, args...; kwargs...)`. @@ -2187,6 +2187,9 @@ julia> @macroexpand @invokelatest xs[i] = v !!! compat "Julia 1.7" This macro requires Julia 1.7 or later. +!!! compat "Julia 1.9" + Prior to Julia 1.9, this macro was not exported, and was called as `Base.@invokelatest`. + !!! compat "Julia 1.10" The additional syntax is supported as of Julia 1.10. """ From 7eb358e79967efb9a07b53f592c329139da7706a Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 29 Jun 2023 02:46:21 -0400 Subject: [PATCH 258/290] doc fix for @invokelatest (#50342) --- base/reflection.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index 02a927ba7b265..bbcd6cad27128 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -2164,8 +2164,8 @@ Provides a convenient way to call [`invokelatest`](@ref). It also supports the following syntax: - `@invokelatest x.f` expands to `Base.invokelatest(getproperty, x, :f)` - `@invokelatest x.f = v` expands to `Base.invokelatest(setproperty!, x, :f, v)` -- `@invokelatest xs[i]` expands to `invoke(getindex, xs, i)` -- `@invokelatest xs[i] = v` expands to `invoke(setindex!, xs, v, i)` +- `@invokelatest xs[i]` expands to `Base.invokelatest(getindex, xs, i)` +- `@invokelatest xs[i] = v` expands to `Base.invokelatest(setindex!, xs, v, i)` ```jldoctest julia> @macroexpand @invokelatest f(x; kw=kwv) @@ -2191,7 +2191,7 @@ julia> @macroexpand @invokelatest xs[i] = v Prior to Julia 1.9, this macro was not exported, and was called as `Base.@invokelatest`. !!! compat "Julia 1.10" - The additional syntax is supported as of Julia 1.10. + The additional `x.f` and `xs[i]` syntax requires Julia 1.10. """ macro invokelatest(ex) topmod = Core.Compiler._topmod(__module__) # well, except, do not get it via CC but define it locally From e4600c5f5b172963e5394dad831204e69c74d949 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 29 Jun 2023 09:06:27 -0400 Subject: [PATCH 259/290] Relax constraints on the PHI block (#50308) In #50158, I tought the verifier to reject code that has invalid statements in the original PHI block. In #50235, this required irinterp to stop folding PhiNodes to the respective constants. I said at the time that a subsequent compact would fix it, but it turns out that we don't actually have the logic for that. I might still add that logic, but on the other hand it just seems kinda silly that PhiNodes need to be a special case here. This PR relaxes the semantics of the PHI block, to allow any value-position constant to appear in the PHI block and undoes the irinterp change from #50235. Only the interpreter really cares about the semantics of the phi block, so the primary change is there. Of note, SSAValue forwards are not allowed in the phi block. This is because of the following: ``` loop: %1 = %(...) %2 = %1 %3 = %(top => %1) ``` The two phi values %1 and %2 have different semantics: %1 gets the *current* iteration of the loop, while %3 gets the *previous* value. As a result, any pass that wants to move SSAValues out of PhiNode uses would have to be aware of these semantics anyway, and there's no simplicitly benefits to allowing SSAValues in the middle of a phi block. --- base/compiler/ssair/irinterp.jl | 2 +- base/compiler/ssair/verify.jl | 22 +++++++++++++++----- src/interpreter.c | 36 +++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl index db3545ed1bbd3..f4c04ea4e1380 100644 --- a/base/compiler/ssair/irinterp.jl +++ b/base/compiler/ssair/irinterp.jl @@ -162,7 +162,7 @@ function reprocess_instruction!(interp::AbstractInterpreter, idx::Int, bb::Union if rt !== nothing if isa(rt, Const) ir.stmts[idx][:type] = rt - if is_inlineable_constant(rt.val) && !isa(inst, PhiNode) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW + if is_inlineable_constant(rt.val) && (ir.stmts[idx][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW)) == IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW ir.stmts[idx][:inst] = quoted(rt.val) end return true diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 316f7a7f6e4b9..39f56a47e1908 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -20,6 +20,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error")) end end +is_value_pos_expr_head(head::Symbol) = head === :boundscheck function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool) if isa(op, SSAValue) if op.id > length(ir.stmts) @@ -60,7 +61,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, # Allow a tuple in symbol position for foreigncall - this isn't actually # a real call - it's interpreted in global scope by codegen. However, # we do need to keep this a real use, because it could also be a pointer. - elseif op.head !== :boundscheck + elseif !is_value_pos_expr_head(op.head) if !allow_frontend_forms || op.head !== :opaque_closure_method @verify_error "Expr not allowed in value position" error("") @@ -189,9 +190,12 @@ function verify_ir(ir::IRCode, print::Bool=true, end lastbb = 0 is_phinode_block = false + firstidx = 1 + lastphi = 1 for (bb, idx) in bbidxiter(ir) if bb != lastbb is_phinode_block = true + lastphi = firstidx = idx lastbb = bb end # We allow invalid IR in dead code to avoid passes having to detect when @@ -204,6 +208,7 @@ function verify_ir(ir::IRCode, print::Bool=true, @verify_error "φ node $idx is not at the beginning of the basic block $bb" error("") end + lastphi = idx @assert length(stmt.edges) == length(stmt.values) for i = 1:length(stmt.edges) edge = stmt.edges[i] @@ -244,12 +249,19 @@ function verify_ir(ir::IRCode, print::Bool=true, check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, idx, print, false, i, allow_frontend_forms) end continue - elseif stmt === nothing - # Nothing to do - continue end - is_phinode_block = false + if is_phinode_block && isa(stmt, Union{Expr, UpsilonNode, PhiCNode, SSAValue}) + if !isa(stmt, Expr) || !is_value_pos_expr_head(stmt.head) + # Go back and check that all non-PhiNodes are valid value-position + for validate_idx in firstidx:(lastphi-1) + validate_stmt = ir.stmts[validate_idx][:inst] + isa(validate_stmt, PhiNode) && continue + check_op(ir, domtree, validate_stmt, bb, idx, idx, print, false, 0, allow_frontend_forms) + end + is_phinode_block = false + end + end if isa(stmt, PhiCNode) for i = 1:length(stmt.values) val = stmt.values[i] diff --git a/src/interpreter.c b/src/interpreter.c index c08496f72ce04..2ad56e76b2549 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -349,20 +349,34 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ { size_t from = s->ip; size_t ip = to; - unsigned nphi = 0; + unsigned nphiblockstmts = 0; for (ip = to; ip < ns; ip++) { jl_value_t *e = jl_array_ptr_ref(stmts, ip); - if (!jl_is_phinode(e)) - break; - nphi += 1; + if (!jl_is_phinode(e)) { + if (jl_is_expr(e) || jl_is_returnnode(e) || jl_is_gotoifnot(e) || + jl_is_gotonode(e) || jl_is_phicnode(e) || jl_is_upsilonnode(e) || + jl_is_ssavalue(e)) { + break; + } + // Everything else is allowed in the phi-block for implementation + // convenience - fall through. + } + nphiblockstmts += 1; } - if (nphi) { + if (nphiblockstmts) { jl_value_t **dest = &s->locals[jl_source_nslots(s->src) + to]; - jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphi); - JL_GC_PUSHARGS(phis, nphi); - for (unsigned i = 0; i < nphi; i++) { + jl_value_t **phis; // = (jl_value_t**)alloca(sizeof(jl_value_t*) * nphiblockstmts); + JL_GC_PUSHARGS(phis, nphiblockstmts); + for (unsigned i = 0; i < nphiblockstmts; i++) { jl_value_t *e = jl_array_ptr_ref(stmts, to + i); - assert(jl_is_phinode(e)); + if (!jl_is_phinode(e)) { + // IR verification guarantees that the only thing that gets + // evaluated here are constants, so it doesn't matter if we + // update the locals or the phis, but let's be consistent + // for simplicity. + phis[i] = eval_value(e, s); + continue; + } jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(e, 0); ssize_t edge = -1; size_t closest = to; // implicit edge has `to <= edge - 1 < to + i` @@ -405,7 +419,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ i -= n_oldphi; dest += n_oldphi; to += n_oldphi; - nphi -= n_oldphi; + nphiblockstmts -= n_oldphi; } if (edge != -1) { // if edges list doesn't contain last branch, or the value is explicitly undefined @@ -418,7 +432,7 @@ static size_t eval_phi(jl_array_t *stmts, interpreter_state *s, size_t ns, size_ phis[i] = val; } // now move all phi values to their position in edges - for (unsigned j = 0; j < nphi; j++) { + for (unsigned j = 0; j < nphiblockstmts; j++) { dest[j] = phis[j]; } JL_GC_POP(); From 6174056c625aeb2182e57674d63a64445f692d07 Mon Sep 17 00:00:00 2001 From: Olivier Benz Date: Thu, 29 Jun 2023 17:52:38 +0200 Subject: [PATCH 260/290] macOS <=10.14: move build flags to OSLIBS (#50017) Replace file src/mach_dyld_atfork.tbd with command line flags Fixes #49976 --- src/Makefile | 2 +- src/mach_dyld_atfork.tbd | 26 -------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) delete mode 100644 src/mach_dyld_atfork.tbd diff --git a/src/Makefile b/src/Makefile index e561aefcdfe04..6a7c2fee844a1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -150,7 +150,7 @@ endif CLANG_LDFLAGS := $(LLVM_LDFLAGS) ifeq ($(OS), Darwin) CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup -OSLIBS += $(SRCDIR)/mach_dyld_atfork.tbd +OSLIBS += -Wl,-U,__dyld_atfork_parent -Wl,-U,__dyld_atfork_prepare -Wl,-U,__dyld_dlopen_atfork_parent -Wl,-U,__dyld_dlopen_atfork_prepare LIBJULIA_PATH_REL := @rpath/libjulia else LIBJULIA_PATH_REL := libjulia diff --git a/src/mach_dyld_atfork.tbd b/src/mach_dyld_atfork.tbd deleted file mode 100644 index c2cda4417ec38..0000000000000 --- a/src/mach_dyld_atfork.tbd +++ /dev/null @@ -1,26 +0,0 @@ ---- !tapi-tbd -# copied from XCode's libSystem.tbd (current-version: 1311) -# to provide weak-linkage info for new symbols on old systems -tbd-version: 4 -targets: [ x86_64-macos, x86_64-maccatalyst, arm64-macos, arm64-maccatalyst, - arm64e-macos, arm64e-maccatalyst ] -uuids: - - target: x86_64-macos - value: AFE6C76A-B47A-35F5-91D0-4E9FC439E90D - - target: x86_64-maccatalyst - value: AFE6C76A-B47A-35F5-91D0-4E9FC439E90D - - target: arm64-macos - value: 2EA09BDB-811B-33AA-BB58-4B53AA2DB522 - - target: arm64-maccatalyst - value: 2EA09BDB-811B-33AA-BB58-4B53AA2DB522 - - target: arm64e-macos - value: 09AB3723-C26D-3762-93BA-98E9C38B89C1 - - target: arm64e-maccatalyst - value: 09AB3723-C26D-3762-93BA-98E9C38B89C1 -install-name: '/usr/lib/libSystem.B.dylib' -exports: - - targets: [ arm64-macos, arm64e-macos, x86_64-macos, x86_64-maccatalyst, - arm64-maccatalyst, arm64e-maccatalyst ] - symbols: [ __dyld_atfork_parent, __dyld_atfork_prepare, - __dyld_dlopen_atfork_parent, __dyld_dlopen_atfork_prepare ] -... From 6d400e4f205db5e010248f1ecf26f90474fd4880 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:23:36 -0400 Subject: [PATCH 261/290] Makefile: Add support for symbol versioning (ON by default) (#49012) This enables symbol versioning by default, with the version string `JL_LIBJULIA_1.10` (the version suffix is the SOMAJOR in general). This will configure the linker to assign the specified version string to all Julia-exported symbols. As part of this change, the SOMAJOR has also been changed to be MAJOR.MINOR instead of just MAJOR. This is required to allow multiple minor releases of libjulia to live side-by-side in the same process. `SYMBOL_VERSION_SUFFIX` is provided to allow appending an additional unique "salt" to SOMAJOR, which can be helpful for creating template binaries that will be "version-stamped" after distribution. Correctly compute `LLVM_SHLIB_SYMBOL_VERSION` using `llvm-config`, so USE_SYSTEM_LLVM should continue to work. --- .gitignore | 1 + Make.inc | 24 +++++++++++++----------- cli/Makefile | 16 ++++++++++++---- cli/julia.expmap.in | 1 + deps/llvm-ver.make | 1 + src/Makefile | 20 ++++++++++++++------ src/{julia.expmap => julia.expmap.in} | 11 +++++++---- 7 files changed, 49 insertions(+), 25 deletions(-) create mode 100644 cli/julia.expmap.in rename src/{julia.expmap => julia.expmap.in} (92%) diff --git a/.gitignore b/.gitignore index 0368b7d19efa0..f0072fec9c91e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ /source-dist.tmp /source-dist.tmp1 +*.expmap *.exe *.dll *.dwo diff --git a/Make.inc b/Make.inc index bc8f5b94f259b..96064cb7eac15 100644 --- a/Make.inc +++ b/Make.inc @@ -188,15 +188,17 @@ JULIA_MINOR_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'. JULIA_PATCH_VERSION := $(shell echo $(JULIA_VERSION) | cut -d'-' -f 1 | cut -d'+' -f 1 | cut -d'.' -f 3) # libjulia's SONAME will follow the format libjulia.so.$(SOMAJOR). Before v1.0.0, -# SOMAJOR will be a two-decimal value, e.g. libjulia.so.0.5, whereas at and beyond -# v1.0.0, SOMAJOR will be simply the major version number, e.g. libjulia.so.1 +# somajor was a two-decimal value (e.g. libjulia.so.0.5). During v1.0.x - v1.9.x, +# somajor was simply the major version number (e.g. libjulia.so.1). Starting in +# v1.10.0, somajor is major.minor again (e.g. libjulia.so.1.10) # The file itself will ultimately symlink to libjulia.so.$(SOMAJOR).$(SOMINOR) -ifeq ($(JULIA_MAJOR_VERSION),0) SOMAJOR := $(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION) SOMINOR := $(JULIA_PATCH_VERSION) -else -SOMAJOR := $(JULIA_MAJOR_VERSION) -SOMINOR := $(JULIA_MINOR_VERSION) + +# This suffix affects libjulia's SONAME and the symbol version associated with +# all of its exported symbols. +ifdef SYMBOL_VERSION_SUFFIX +SOMAJOR := $(SOMAJOR)_$(SYMBOL_VERSION_SUFFIX) endif ifneq ($(NO_GIT), 1) @@ -488,7 +490,7 @@ endif JCFLAGS_COMMON := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 JCFLAGS_CLANG := $(JCFLAGS_COMMON) -JCFLAGS_GCC := $(JCFLAGS_COMMON) +JCFLAGS_GCC := $(JCFLAGS_COMMON) -fno-gnu-unique # AArch64 needs this flag to generate the .eh_frame used by libunwind JCPPFLAGS_COMMON := -fasynchronous-unwind-tables @@ -497,7 +499,7 @@ JCPPFLAGS_GCC := $(JCPPFLAGS_COMMON) JCXXFLAGS_COMMON := -pipe $(fPIC) -fno-rtti -std=c++14 JCXXFLAGS_CLANG := $(JCXXFLAGS_COMMON) -pedantic -JCXXFLAGS_GCC := $(JCXXFLAGS_COMMON) +JCXXFLAGS_GCC := $(JCXXFLAGS_COMMON) -fno-gnu-unique DEBUGFLAGS_COMMON := -O0 -DJL_DEBUG_BUILD -fstack-protector DEBUGFLAGS_CLANG := $(DEBUGFLAGS_COMMON) -g @@ -1317,7 +1319,7 @@ ifeq (supported, $(shell echo $(IFUNC_DETECT_SRC) | $(CC) -Werror -x c - -S -o / JCPPFLAGS += -DJULIA_HAS_IFUNC_SUPPORT=1 endif JLDFLAGS += -Wl,-Bdynamic -OSLIBS += -Wl,--version-script=$(JULIAHOME)/src/julia.expmap +OSLIBS += -Wl,--version-script=$(BUILDROOT)/src/julia.expmap ifneq ($(SANITIZE),1) JLDFLAGS += -Wl,-no-undefined endif @@ -1342,7 +1344,7 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic # See #21788 OSLIBS += -lgcc_s -OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \ +OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \ $(NO_WHOLE_ARCHIVE) endif @@ -1357,7 +1359,7 @@ endif ifeq ($(OS), WINNT) HAVE_SSP := 1 -OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \ +OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \ $(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic JLDFLAGS += -Wl,--stack,8388608 ifeq ($(ARCH),i686) diff --git a/cli/Makefile b/cli/Makefile index c2e2bcd568a07..b6a2b48ebf044 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -104,14 +104,18 @@ julia-debug: $(build_bindir)/julia-debug$(EXE) libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT) libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) +ifneq (,$(filter $(OS), Linux FreeBSD)) +VERSIONSCRIPT := -Wl,--version-script=$(BUILDDIR)/julia.expmap +endif + ifeq ($(OS),WINNT) # On Windows we need to strip out exported functions from the generated import library. STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_strip_symbols.h) endif -$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) +$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \ - $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT))) + $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia.$(JL_MAJOR_SHLIB_EXT) $@ @$(DSYMUTIL) $@ ifeq ($(OS), WINNT) @@ -120,9 +124,9 @@ ifeq ($(OS), WINNT) @$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a) endif -$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir) +$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h $(BUILDDIR)/julia.expmap | $(build_shlibdir) $(build_libdir) @$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \ - $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) + $(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(VERSIONSCRIPT) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-debug.$(JL_MAJOR_SHLIB_EXT) $@ @$(DSYMUTIL) $@ ifeq ($(OS), WINNT) @@ -144,8 +148,12 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT) $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir) @$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug) +$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in + sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/' + clean: | $(CLEAN_TARGETS) rm -f $(BUILDDIR)/*.o $(BUILDDIR)/*.dbg.obj rm -f $(build_bindir)/julia* + rm -f $(BUILDDIR)/julia.expmap .PHONY: clean release debug julia-release julia-debug diff --git a/cli/julia.expmap.in b/cli/julia.expmap.in new file mode 100644 index 0000000000000..b6fa72e9aca03 --- /dev/null +++ b/cli/julia.expmap.in @@ -0,0 +1 @@ +@JULIA_SHLIB_SYMBOL_VERSION@ { global: *; }; diff --git a/deps/llvm-ver.make b/deps/llvm-ver.make index 3c498be6c2363..3777d5b37915a 100644 --- a/deps/llvm-ver.make +++ b/deps/llvm-ver.make @@ -17,3 +17,4 @@ LLVM_SHARED_LIB_VER_SUFFIX := $(LLVM_VER_MAJ)jl # e.g.: "libLLVM-14jl" LLVM_SHARED_LIB_NAME := libLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX) LLVM_SHARED_LINK_FLAG := -lLLVM-$(LLVM_SHARED_LIB_VER_SUFFIX) +LLVM_SHLIB_SYMBOL_VERSION := JL_LLVM_$(LLVM_VER_MAJ).$(LLVM_VER_MIN) diff --git a/src/Makefile b/src/Makefile index 6a7c2fee844a1..9e34dfda1c4ed 100644 --- a/src/Makefile +++ b/src/Makefile @@ -119,6 +119,9 @@ ifeq ($(JULIACODEGEN),LLVM) ifneq ($(USE_SYSTEM_LLVM),0) # USE_SYSTEM_LLVM != 0 CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs) +LLVM_SHLIB_SYMBOL_VERSION := $(shell nm -D --with-symbol-versions $(shell $(LLVM_CONFIG_HOST) --libfiles --link-shared | awk '{print $1; exit}') | \ + grep _ZN4llvm3Any6TypeId | head -n 1 | sed -e 's/.*@//') + # HACK: llvm-config doesn't correctly point to shared libs on all platforms # https://github.com/JuliaLang/julia/issues/29981 else @@ -344,10 +347,10 @@ $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a $(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a $(MAKE) -C $(BUILDDIR)/flisp $(subst $(abspath $(BUILDDIR)/flisp)/,,$(abspath $(FLISP_EXECUTABLE_debug))) -$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a +$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a $(BUILDDIR)/julia.expmap $(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)' -$(BUILDDIR)/flisp/libflisp-debug.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport-debug.a +$(BUILDDIR)/flisp/libflisp-debug.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport-debug.a $(BUILDDIR)/julia.expmap $(MAKE) -C $(SRCDIR)/flisp debug BUILDDIR='$(abspath $(BUILDDIR)/flisp)' $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION @@ -365,13 +368,17 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION CXXLD = $(CXX) -shared -$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) +$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in + sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \ + -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/" + +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ @@ -393,13 +400,13 @@ libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_ libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS) -$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT) +$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) +$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@ @@ -429,6 +436,7 @@ clean: -rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest* -rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen + -rm -f $(BUILDDIR)/julia.expmap -rm -f $(BUILDDIR)/julia_version.h clean-flisp: diff --git a/src/julia.expmap b/src/julia.expmap.in similarity index 92% rename from src/julia.expmap rename to src/julia.expmap.in index ee35997827221..484c83a4b16b2 100644 --- a/src/julia.expmap +++ b/src/julia.expmap.in @@ -1,4 +1,4 @@ -{ +@JULIA_SHLIB_SYMBOL_VERSION@ { global: pthread*; __stack_chk_guard; @@ -35,9 +35,6 @@ JLJIT*; llvmGetPassPluginInfo; - /* Make visible so that linker will merge duplicate definitions across DSO boundaries */ - _ZN4llvm3Any6TypeId*; - /* freebsd */ environ; __progname; @@ -45,3 +42,9 @@ local: *; }; + +@LLVM_SHLIB_SYMBOL_VERSION@ { + global: + /* Make visible so that linker will merge duplicate definitions across DSO boundaries */ + _ZN4llvm3Any6TypeId*; +}; From 5db930e67a30c4ef75de43c72b1e9d617d55673c Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Thu, 29 Jun 2023 12:26:51 -0400 Subject: [PATCH 262/290] optimize: Handle path-excluded `Core.ifelse` arguments (#50312) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's possible for PiNodes to effectively imply statically the condition of a Core.ifelse. For example: ```julia 23 ─ %60 = Core.ifelse(%47, false, true)::Bool │ %61 = Core.ifelse(%47, %58, false)::Union{Missing, Bool} 25 ─ goto #27 if not %60 26 ─ %65 = π (%61, Bool) └─── ... ``` In basic block #26, the PiNode gives us enough information to conclude that `%47 === false` if control flow reaches that point. The previous code incorrectly assumed that this kind of pruning would only be done for PhiNodes. Resolves #50276 --- base/compiler/ssair/passes.jl | 17 +++++++++++- test/compiler/irpasses.jl | 51 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index dc983f7a53cf4..974f0c32ea7ca 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -730,7 +730,7 @@ function perform_lifting!(compact::IncrementalCompact, new_node = Expr(:call, ifelse_func, condition) # Renamed then_result, else_result added below new_inst = NewInstruction(new_node, result_t, NoCallInfo(), old_inst[:line], old_inst[:flag]) - ssa = insert_node!(compact, old_ssa, new_inst) + ssa = insert_node!(compact, old_ssa, new_inst, #= attach_after =# true) lifted_philikes[i] = LiftedPhilike(ssa, IfElseCall(new_node), true) end # lifting_cache[ckey] = ssa @@ -767,6 +767,21 @@ function perform_lifting!(compact::IncrementalCompact, else_result = lifted_value(compact, old_node_ssa, else_result, lifted_philikes, lifted_leaves, reverse_mapping) + # In cases where the Core.ifelse condition is statically-known, e.g., thanks + # to a PiNode from a guarding conditional, replace with the remaining branch. + if then_result === SKIP_TOKEN || else_result === SKIP_TOKEN + only_result = (then_result === SKIP_TOKEN) ? else_result : then_result + + # Replace Core.ifelse(%cond, %a, %b) with %a + compact[lf.ssa][:inst] = only_result + should_count && _count_added_node!(compact, only_result) + + # Note: Core.ifelse(%cond, %a, %b) has observable effects (!nothrow), but since + # we have not deleted the preceding statement that this was derived from, this + # replacement is safe, i.e. it will not affect the effects observed. + continue + end + @assert then_result !== SKIP_TOKEN && then_result !== UNDEF_TOKEN @assert else_result !== SKIP_TOKEN && else_result !== UNDEF_TOKEN diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index f27961c526559..a1738b52161bf 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -742,6 +742,57 @@ let m = Meta.@lower 1 + 1 @test Core.Compiler.verify_ir(ir) === nothing end +# A lifted Core.ifelse with an eliminated branch (#50276) +let m = Meta.@lower 1 + 1 + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::CodeInfo + src.code = Any[ + # block 1 + #= %1: =# Core.Argument(2), + # block 2 + #= %2: =# Expr(:call, Core.ifelse, SSAValue(1), true, missing), + #= %3: =# GotoIfNot(SSAValue(2), 11), + # block 3 + #= %4: =# PiNode(SSAValue(2), Bool), # <-- This PiNode is the trigger of the bug, since it + # means that only one branch of the Core.ifelse + # is lifted. + #= %5: =# GotoIfNot(false, 8), + # block 2 + #= %6: =# nothing, + #= %7: =# GotoNode(8), + # block 4 + #= %8: =# PhiNode(Int32[5, 7], Any[SSAValue(4), SSAValue(6)]), + # ^-- N.B. This PhiNode also needs to have a Union{ ... } type in order + # for lifting to be performed (it is skipped for e.g. `Bool`) + # + #= %9: =# Expr(:call, isa, SSAValue(8), Missing), + #= %10: =# ReturnNode(SSAValue(9)), + # block 5 + #= %11: =# ReturnNode(false), + ] + src.ssavaluetypes = Any[ + Any, + Union{Missing, Bool}, + Any, + Bool, + Any, + Missing, + Any, + Union{Nothing, Bool}, + Bool, + Any, + Any + ] + nstmts = length(src.code) + src.codelocs = fill(one(Int32), nstmts) + src.ssaflags = fill(one(Int32), nstmts) + src.slotflags = fill(zero(UInt8), 3) + ir = Core.Compiler.inflate_ir(src) + @test Core.Compiler.verify_ir(ir) === nothing + ir = @test_nowarn Core.Compiler.sroa_pass!(ir) + @test Core.Compiler.verify_ir(ir) === nothing +end + # Issue #31546 - missing widenconst in SROA function f_31546(x) (a, b) = x == "r" ? (false, false) : From f6f35533f237d55e881276428bef2f091f9cae5b Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 29 Jun 2023 13:15:16 -0400 Subject: [PATCH 263/290] add `Base.isprecompiled(pkg::PkgId)` (#50218) --- NEWS.md | 1 + base/loading.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++ doc/src/base/base.md | 1 + test/precompile.jl | 5 +++- 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 50418ffe7309b..e9ae12847ed29 100644 --- a/NEWS.md +++ b/NEWS.md @@ -57,6 +57,7 @@ New library functions * `fourthroot(x)` is now defined in `Base.Math` and can be used to compute the fourth root of `x`. It can also be accessed using the unicode character `∜`, which can be typed by `\fourthroot` ([#48899]). * `Libc.memmove`, `Libc.memset`, and `Libc.memcpy` are now defined, whose functionality matches that of their respective C calls. +* `Base.isprecompiled(pkg::PkgId)` to identify whether a package has already been precompiled ([#50218]). New library features -------------------- diff --git a/base/loading.jl b/base/loading.jl index 1262872ffaf72..1ea4412ecc68f 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1364,6 +1364,65 @@ end # End extensions +# should sync with the types of arguments of `stale_cachefile` +const StaleCacheKey = Tuple{Base.PkgId, UInt128, String, String} + +""" + Base.isprecompiled(pkg::PkgId; ignore_loaded::Bool=false) + +Returns whether a given PkgId within the active project is precompiled. + +By default this check observes the same approach that code loading takes +with respect to when different versions of dependencies are currently loaded +to that which is expected. To ignore loaded modules and answer as if in a +fresh julia session specify `ignore_loaded=true`. + +!!! compat "Julia 1.10" + This function requires at least Julia 1.10. +""" +function isprecompiled(pkg::PkgId; + ignore_loaded::Bool=false, + stale_cache::Dict{StaleCacheKey,Bool}=Dict{StaleCacheKey, Bool}(), + cachepaths::Vector{String}=Base.find_all_in_cache_path(pkg), + sourcepath::Union{String,Nothing}=Base.locate_package(pkg) + ) + isnothing(sourcepath) && error("Cannot locate source for $(repr(pkg))") + for path_to_try in cachepaths + staledeps = stale_cachefile(sourcepath, path_to_try, ignore_loaded = true) + if staledeps === true + continue + end + staledeps, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}} + # finish checking staledeps module graph + for i in 1:length(staledeps) + dep = staledeps[i] + dep isa Module && continue + modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128} + modpaths = find_all_in_cache_path(modkey) + for modpath_to_try in modpaths::Vector{String} + stale_cache_key = (modkey, modbuild_id, modpath, modpath_to_try)::StaleCacheKey + if get!(() -> stale_cachefile(stale_cache_key...; ignore_loaded) === true, + stale_cache, stale_cache_key) + continue + end + @goto check_next_dep + end + @goto check_next_path + @label check_next_dep + end + try + # update timestamp of precompilation file so that it is the first to be tried by code loading + touch(path_to_try) + catch ex + # file might be read-only and then we fail to update timestamp, which is fine + ex isa IOError || rethrow() + end + return true + @label check_next_path + end + return false +end + # loads a precompile cache file, after checking stale_cachefile tests function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128) assert_havelock(require_lock) diff --git a/doc/src/base/base.md b/doc/src/base/base.md index e540f50ca2cb0..81abc009f2fbc 100644 --- a/doc/src/base/base.md +++ b/doc/src/base/base.md @@ -462,6 +462,7 @@ Base.identify_package Base.locate_package Base.require Base.compilecache +Base.isprecompiled ``` ## Internals diff --git a/test/precompile.jl b/test/precompile.jl index 9cb5a1d52d485..62d862c384040 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -655,7 +655,10 @@ precompile_test_harness("code caching") do dir precompile(getelsize, (Vector{Int32},)) end """) - Base.compilecache(Base.PkgId(string(Cache_module))) + pkgid = Base.PkgId(string(Cache_module)) + @test !Base.isprecompiled(pkgid) + Base.compilecache(pkgid) + @test Base.isprecompiled(pkgid) @eval using $Cache_module M = getfield(@__MODULE__, Cache_module) # Test that this cache file "owns" all the roots From 3ddceeee167d18dc62e56aab7cd71add5f843e40 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 29 Jun 2023 14:50:53 -0400 Subject: [PATCH 264/290] Fall bad generated functions back to interpreter (#50348) This fixes #49715. The fix itself is pretty simple - just remove the generator expansion that was added in #48766, but the bigger question here is what the correct behavior should be in the first place. # Dynamic Semantics, generally The primary question here are of the semantics of generated functions. Note that this is quite different to how they are implemented. In general, the way we think about compiling Julia is that there is a well defined set of *dynamic semantics* that specify what a particular piece of Julia code means. Julia's dynamic semantics are generally quite simple (at every point, call the most specific applicable method). What happens under the hood may be quite different (e.g. lots of inference, compiling constant folding, etc), but the compilation process should mostly preserve the semantics (with a few well defined exceptions around floating point arithmetic, effect assumptions, semantically unobservable side effects, etc.). # The dnymaic semantics of generated functions With that diatribe out of the way, let's think about the dynamic semantics of generated functions. We haven't always been particularly clear about this, but I propose it's basically the following: For a generated function: ``` @generated function f(args...) # = generator body =# end ``` this is semantically equivalent to the function to basically the following: ``` const lno = LineNumberNode(@__FILE__, @__LINE__); function f(args...) generator = @opaque @assume_effects :foldable :generator (args...)->#= generator body =# body = generator(Base.get_world_counter(), lno, Core.Typeof.(args)) execute(body, f, args...) end ``` A couple of notes on this: 1. `@opaque` used here for the world-age capture semantics of the generator itself 2. There's an effects-assumption `:generator` that doesn't exist but is supposed to capture the special allowance for calling generators. This is discussed more below. ## Implementing `execute` For a long time, we didn't really have a first-class implementation of `execute`. It's almost (some liberties around the way that the arguments work, but you get the idea) ``` execute_eval(body, f, args...) = eval((args...)->$body)(f, args....) ``` but that doesn't have the correct world age semantics (would error as written and even if you used invokelatest, the body would run in the wrong world). However, with OpaqueClosure we do actually have a mechanism now and we could write: ``` execute(body, f, args...) = OpaqueClosure(body, f)(args...) ``` Again, I'm not proposing this as an implementation, just to give us an idea of what the dynamic semantics of generated functions are. # The particular bug (#49715) The issue in #49715 is that the following happens: 1. A generated function gets called and inference is attempted. 2. Inference attempts to infer the generated function and call the generator. 3. The generator throws an error. 4. Inference fails. 5. The compiler enters a generic inference-failure fallback path 6. The compiler asks for a generator expansion in the generic world (-1) 7. This gives a different error, confusing the user. There is the additional problem that this error gets thrown at compilation time, which is not technically legal (and there was an existing TODO to fix that). In addition to that, I think there is a separate question of whether it should be semantically legal to throw an error for a different world age than the currently running one. Given the semantics proposed above, I would suggest that the answer should be no. This does depend on the exact semantics of :generator, but in general, our existing effects-related notions do not allow particularly strong assumptions on the particular error being thrown (requiring them to be re-evaluated at runtime), and I see no reason to depart from this practice here. Thus, I would suggest that the current behavior should be disallowed and the expected behavior is that the generic fallback implementation of generated functions invoke the generator in the runtime world and expose the appropriate error. # Should we keep the generic world? That does leave the question what to do about the generic world (-1). I'm not 100% convinced that this is necessarily a useful concept to have. It is true that most generated functions do not depend on the world age, but they can already indicate this by returning a value with bounded world range and no backedges (equivalently returning a plain expression). On the other hand, keeping the generic world does risk creating the inverse of the situation that prompted this issue, in that there is no semantically reachable path to calling the generator with the generic world, making it hard to debug. As a result, I am very strongly leaning towards removing this concept, but I am open to being convinced otherwise. # This PR This PR, which is considerably shorter than this commit message is very simple: The attempt to invoke the generator with the generic world -1 is removed. Instead, we fall back to the interpreter, which already has the precise semantics that I want here - invoking the generator in the dynamic world and interpreting the result. # The semantics of :generator That leaves one issue to be resolved which is the semantics of `:generator`. I don't think it's necessary to be as precise here as we are about the other effects we expose, but I propose it be something like the following: For functions with the :generator effects assumption, :consistent-cy is relaxed as follows: 1. The requistive notion of equality is relaxed to a "same code and metadata" equality of code instances. I don't think we have any predicate for this (and it's not necessarily computable), but the idea should be that the CodeInstance is always computed in the exact same way, but may be mutable and such. Note that this is explicitly not functional extensionality, because we do analyze the structure of the returned code and codegen based on it. 2. The world-age semantics of :consistent sharpened to require our relaxed notion of consistency for any overlapping min_world:max_world range returned from the generator. Co-authored-by: Oscar Smith --- src/jitlayers.cpp | 15 ++++++--------- test/compiler/contextual.jl | 19 +++++++++++++++++++ test/staged.jl | 2 +- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 88e4b0f97927f..ae4a2ed02fb7e 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -547,21 +547,18 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec) jl_method_t *def = unspec->def->def.method; if (jl_is_method(def)) { src = (jl_code_info_t*)def->source; - if (src == NULL) { - // TODO: this is wrong - assert(def->generator); - // TODO: jl_code_for_staged can throw - src = jl_code_for_staged(unspec->def, ~(size_t)0); - } if (src && (jl_value_t*)src != jl_nothing) src = jl_uncompress_ir(def, NULL, (jl_value_t*)src); } else { src = (jl_code_info_t*)jl_atomic_load_relaxed(&unspec->def->uninferred); + assert(src); + } + if (src) { + assert(jl_is_code_info(src)); + ++UnspecFPtrCount; + _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0); } - assert(src && jl_is_code_info(src)); - ++UnspecFPtrCount; - _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext(), 0); jl_callptr_t null = nullptr; // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr); diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl index 0e8fe27591a5e..bbcf7b0dfb959 100644 --- a/test/compiler/contextual.jl +++ b/test/compiler/contextual.jl @@ -201,3 +201,22 @@ finally @show err end end + +# Test that writing a bad cassette-style pass gives the expected error (#49715) +function generator49715(world, source, self, f, tt) + tt = tt.parameters[1] + sig = Tuple{f, tt.parameters...} + mi = Base._which(sig; world) + + error("oh no") + + stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec()) + stub(world, source, :(nothing)) +end + +@eval function doit49715(f, tt) + $(Expr(:meta, :generated, generator49715)) + $(Expr(:meta, :generated_only)) +end + +@test_throws "oh no" doit49715(sin, Tuple{Int}) diff --git a/test/staged.jl b/test/staged.jl index 0fa8ecb182cff..df351d8d47b96 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -182,7 +182,7 @@ let gf_err, tsk = @async nothing # create a Task for yield to try to run Expected = ErrorException("task switch not allowed from inside staged nor pure functions") @test_throws Expected gf_err() @test_throws Expected gf_err() - @test gf_err_ref[] == 4 + @test gf_err_ref[] < 1000 end gf_err_ref[] = 0 From 3d7aa6ee037f6aa9dbefaef3c893a32694ef9e2c Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Thu, 29 Jun 2023 14:06:00 -0500 Subject: [PATCH 265/290] Remove broken conversion of `@fastmath x[i] += 1` (#50347) Fixes #47241 --- base/fastmath.jl | 15 +++------------ test/fastmath.jl | 9 +++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/fastmath.jl b/base/fastmath.jl index 7865736f8a776..44440ebad2050 100644 --- a/base/fastmath.jl +++ b/base/fastmath.jl @@ -112,19 +112,10 @@ function make_fastmath(expr::Expr) if isa(var, Symbol) # simple assignment expr = :($var = $op($var, $rhs)) - elseif isa(var, Expr) && var.head === :ref - var = var::Expr - # array reference - arr = var.args[1] - inds = var.args[2:end] - arrvar = gensym() - indvars = Any[gensym() for _ in inds] - expr = quote - $(Expr(:(=), arrvar, arr)) - $(Expr(:(=), Base.exprarray(:tuple, indvars), Base.exprarray(:tuple, inds))) - $arrvar[$(indvars...)] = $op($arrvar[$(indvars...)], $rhs) - end end + # It is hard to optimize array[i += 1] += 1 + # and array[end] += 1 without bugs. (#47241) + # We settle for not optimizing the op= call. end Base.exprarray(make_fastmath(expr.head), Base.mapany(make_fastmath, expr.args)) end diff --git a/test/fastmath.jl b/test/fastmath.jl index 8755e727db092..21f3ebc1e603f 100644 --- a/test/fastmath.jl +++ b/test/fastmath.jl @@ -284,3 +284,12 @@ end end end end + +@testset "+= with indexing (#47241)" begin + i = 0 + x = zeros(2) + @fastmath x[i += 1] += 1 + @fastmath x[end] += 1 + @test x == [1, 1] + @test i == 1 +end From 02f80c6bdf87edd90898f0af5828d68343340afa Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Thu, 29 Jun 2023 20:12:42 -0400 Subject: [PATCH 266/290] Add docs on task-specific buffering using multithreading (#48542) Co-authored-by: Mason Protter --- base/threadingconstructs.jl | 4 +- doc/src/manual/multi-threading.md | 72 +++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl index b8a522be96d97..e4f14e26ac5a9 100644 --- a/base/threadingconstructs.jl +++ b/base/threadingconstructs.jl @@ -246,8 +246,8 @@ For example, the above conditions imply that: - Communicating between iterations using blocking primitives like `Channel`s is incorrect. - Write only to locations not shared across iterations (unless a lock or atomic operation is used). -- The value of [`threadid()`](@ref Threads.threadid) may change even within a single - iteration. See [`Task Migration`](@ref man-task-migration) +- Unless the `:static` schedule is used, the value of [`threadid()`](@ref Threads.threadid) + may change even within a single iteration. See [`Task Migration`](@ref man-task-migration). ## Schedulers diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md index afb1c749f9447..056ceb1363fd7 100644 --- a/doc/src/manual/multi-threading.md +++ b/doc/src/manual/multi-threading.md @@ -239,6 +239,68 @@ julia> a Note that [`Threads.@threads`](@ref) does not have an optional reduction parameter like [`@distributed`](@ref). +### Using `@threads` without data races + +Taking the example of a naive sum + +```julia-repl +julia> function sum_single(a) + s = 0 + for i in a + s += i + end + s + end +sum_single (generic function with 1 method) + +julia> sum_single(1:1_000_000) +500000500000 +``` + +Simply adding `@threads` exposes a data race with multiple threads reading and writing `s` at the same time. +```julia-repl +julia> function sum_multi_bad(a) + s = 0 + Threads.@threads for i in a + s += i + end + s + end +sum_multi_bad (generic function with 1 method) + +julia> sum_multi_bad(1:1_000_000) +70140554652 +``` + +Note that the result is not `500000500000` as it should be, and will most likely change each evaluation. + +To fix this, buffers that are specific to the task may be used to segment the sum into chunks that are race-free. +Here `sum_single` is reused, with its own internal buffer `s`, and vector `a` is split into `nthreads()` +chunks for parallel work via `nthreads()` `@spawn`-ed tasks. + +```julia-repl +julia> function sum_multi_good(a) + chunks = Iterators.partition(a, length(a) ÷ Threads.nthreads()) + tasks = map(chunks) do chunk + Threads.@spawn sum_single(chunk) + end + chunk_sums = fetch.(tasks) + return sum_single(chunk_sums) + end +sum_multi_good (generic function with 1 method) + +julia> sum_multi_good(1:1_000_000) +500000500000 +``` +!!! Note + Buffers should not be managed based on `threadid()` i.e. `buffers = zeros(Threads.nthreads())` because concurrent tasks + can yield, meaning multiple concurrent tasks may use the same buffer on a given thread, introducing risk of data races. + Further, when more than one thread is available tasks may change thread at yield points, which is known as + [task migration](@ref man-task-migration). + +Another option is the use of atomic operations on variables shared across tasks/threads, which may be more performant +depending on the characteristics of the operations. + ## Atomic Operations Julia supports accessing and modifying values *atomically*, that is, in a thread-safe way to avoid @@ -390,11 +452,13 @@ threads in Julia: ## [Task Migration](@id man-task-migration) -After a task starts running on a certain thread (e.g. via [`@spawn`](@ref Threads.@spawn) or -[`@threads`](@ref Threads.@threads)), it may move to a different thread if the task yields. +After a task starts running on a certain thread it may move to a different thread if the task yields. + +Such tasks may have been started with [`@spawn`](@ref Threads.@spawn) or [`@threads`](@ref Threads.@threads), +although the `:static` schedule option for `@threads` does freeze the threadid. -This means that [`threadid()`](@ref Threads.threadid) should not be treated as constant within a task, and therefore -should not be used to index into a vector of buffers or stateful objects. +This means that in most cases [`threadid()`](@ref Threads.threadid) should not be treated as constant within a task, +and therefore should not be used to index into a vector of buffers or stateful objects. !!! compat "Julia 1.7" Task migration was introduced in Julia 1.7. Before this tasks always remained on the same thread that they were From 0e8af1c1620cbf5304c8a7cabbc5475ec48a78ec Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:06:06 +0900 Subject: [PATCH 267/290] AbstractInterpreter: Refactor `abstract_call` to be more overload-friendly (#50355) The existing signature of `abstract_call` can lead to ambiguous method errors when it is overloaded for an external `AbstractInterpreter`. This commit mitigates this issue by narrowing the argument type of `max_methods` from `::Union{Int,Nothing}` to `::Int`. Additionally, it introduces `abstract_call_unknown`, providing a hook for static code analyzers like JET.jl to report unknown calls. --- base/compiler/abstractinterpretation.jl | 58 ++++++++++++++----------- base/compiler/tfuncs.jl | 6 +-- test/compiler/AbstractInterpreter.jl | 4 +- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 5c8e8fc22efd6..0cf3e6c00a1b7 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1684,7 +1684,7 @@ end end function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo, - sv::AbsIntState, max_methods::Int) + sv::AbsIntState) @nospecialize f la = length(argtypes) 𝕃ᵢ = typeinf_lattice(interp) @@ -1976,7 +1976,7 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), elseif f === applicable return abstract_applicable(interp, argtypes, sv, max_methods) end - rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) + rt = abstract_call_builtin(interp, f, arginfo, sv) effects = builtin_effects(𝕃ᵢ, f, arginfo, rt) if f === getfield && (fargs !== nothing && isexpr(fargs[end], :boundscheck)) && !is_nothrow(effects) && isa(sv, InferenceState) # As a special case, we delayed tainting `noinbounds` for getfield calls in case we can prove @@ -2096,35 +2096,41 @@ function most_general_argtypes(closure::PartialOpaque) return Any[argt.parameters...] end +function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft), + arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, + max_methods::Int) + if isa(ft, PartialOpaque) + newargtypes = copy(arginfo.argtypes) + newargtypes[1] = ft.env + return abstract_call_opaque_closure(interp, + ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true) + end + wft = widenconst(ft) + if hasintersect(wft, Builtin) + add_remark!(interp, sv, "Could not identify method table for call") + return CallMeta(Any, Effects(), NoCallInfo()) + elseif hasintersect(wft, Core.OpaqueClosure) + uft = unwrap_unionall(wft) + if isa(uft, DataType) + return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo()) + end + return CallMeta(Any, Effects(), NoCallInfo()) + end + # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic + atype = argtypes_to_type(arginfo.argtypes) + return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods) +end + # call where the function is any lattice element function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, - sv::AbsIntState, max_methods::Union{Int, Nothing} = nothing) - argtypes = arginfo.argtypes - ft = widenslotwrapper(argtypes[1]) + sv::AbsIntState, max_methods::Int=typemin(Int)) + ft = widenslotwrapper(arginfo.argtypes[1]) f = singleton_type(ft) if f === nothing - if isa(ft, PartialOpaque) - newargtypes = copy(argtypes) - newargtypes[1] = ft.env - return abstract_call_opaque_closure(interp, - ft, ArgInfo(arginfo.fargs, newargtypes), si, sv, #=check=#true) - end - wft = widenconst(ft) - if hasintersect(wft, Builtin) - add_remark!(interp, sv, "Could not identify method table for call") - return CallMeta(Any, Effects(), NoCallInfo()) - elseif hasintersect(wft, Core.OpaqueClosure) - uft = unwrap_unionall(wft) - if isa(uft, DataType) - return CallMeta(rewrap_unionall(uft.parameters[2], wft), Effects(), NoCallInfo()) - end - return CallMeta(Any, Effects(), NoCallInfo()) - end - # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic - max_methods = max_methods === nothing ? get_max_methods(interp, sv) : max_methods - return abstract_call_gf_by_type(interp, nothing, arginfo, si, argtypes_to_type(argtypes), sv, max_methods) + max_methods = max_methods == typemin(Int) ? get_max_methods(interp, sv) : max_methods + return abstract_call_unknown(interp, ft, arginfo, si, sv, max_methods) end - max_methods = max_methods === nothing ? get_max_methods(interp, f, sv) : max_methods + max_methods = max_methods == typemin(Int) ? get_max_methods(interp, f, sv) : max_methods return abstract_call_known(interp, f, arginfo, si, sv, max_methods) end diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 7ead9dcb7fa6b..f5690f4e5b8d6 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -1382,7 +1382,7 @@ function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any op = unwrapva(argtypes[4]) v = unwrapva(argtypes[5]) TF = getfield_tfunc(𝕃ᵢ, o, f) - callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=# 1) + callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1) TF2 = tmeet(callinfo.rt, widenconst(TF)) if TF2 === Bottom RT = Bottom @@ -2640,10 +2640,10 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s if isa(sv, InferenceState) old_restrict = sv.restrict_abstract_call_sites sv.restrict_abstract_call_sites = false - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1) + call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) sv.restrict_abstract_call_sites = old_restrict else - call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, -1) + call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1) end info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure() rt = widenslotwrapper(call.rt) diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index 632f4851cd248..9db0a8903593d 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -279,9 +279,9 @@ CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx) CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx) function CC.abstract_call(interp::NoinlineInterpreter, - arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing}) + arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter, - arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Union{Int,Nothing}) + arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) if sv.mod in noinline_modules(interp) return CC.CallMeta(ret.rt, ret.effects, NoinlineCallInfo(ret.info)) end From eeb0b6952a4caf5e9f1b4ae751adbabc2aae8e13 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Fri, 30 Jun 2023 14:03:38 +0200 Subject: [PATCH 268/290] Fix typos in 2x2 matmatmul (#50362) --- stdlib/LinearAlgebra/src/matmul.jl | 24 ++++++++++++------------ stdlib/LinearAlgebra/test/matmul.jl | 17 ++++++++++++++++- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl index e9839857f93e6..e375108f6a831 100644 --- a/stdlib/LinearAlgebra/src/matmul.jl +++ b/stdlib/LinearAlgebra/src/matmul.jl @@ -1008,18 +1008,18 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat # TODO making these lazy could improve perf B11 = copy(B[1,1]'); B12 = copy(B[2,1]') B21 = copy(B[1,2]'); B22 = copy(B[2,2]') - elseif tA == 'S' - B11 = symmetric(A[1,1], :U); B12 = A[1,2] - B21 = copy(transpose(A[1,2])); B22 = symmetric(A[2,2], :U) - elseif tA == 's' - B11 = symmetric(A[1,1], :L); B12 = copy(transpose(A[2,1])) - B21 = A[2,1]; B22 = symmetric(A[2,2], :L) - elseif tA == 'H' - B11 = hermitian(A[1,1], :U); B12 = A[1,2] - B21 = copy(adjoint(A[1,2])); B22 = hermitian(A[2,2], :U) - else # if tA == 'h' - B11 = hermitian(A[1,1], :L); B12 = copy(adjoint(A[2,1])) - B21 = A[2,1]; B22 = hermitian(A[2,2], :L) + elseif tB == 'S' + B11 = symmetric(B[1,1], :U); B12 = B[1,2] + B21 = copy(transpose(B[1,2])); B22 = symmetric(B[2,2], :U) + elseif tB == 's' + B11 = symmetric(B[1,1], :L); B12 = copy(transpose(B[2,1])) + B21 = B[2,1]; B22 = symmetric(B[2,2], :L) + elseif tB == 'H' + B11 = hermitian(B[1,1], :U); B12 = B[1,2] + B21 = copy(adjoint(B[1,2])); B22 = hermitian(B[2,2], :U) + else # if tB == 'h' + B11 = hermitian(B[1,1], :L); B12 = copy(adjoint(B[2,1])) + B21 = B[2,1]; B22 = hermitian(B[2,2], :L) end _modify!(_add, A11*B11 + A12*B21, C, (1,1)) _modify!(_add, A11*B12 + A12*B22, C, (1,2)) diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl index cc24e2bd6a1bf..e6000a4b24e2d 100644 --- a/stdlib/LinearAlgebra/test/matmul.jl +++ b/stdlib/LinearAlgebra/test/matmul.jl @@ -4,10 +4,19 @@ module TestMatmul using Base: rtoldefault using Test, LinearAlgebra, Random -using LinearAlgebra: mul! +using LinearAlgebra: mul!, Symmetric, Hermitian ## Test Julia fallbacks to BLAS routines +mul_wrappers = [ + m -> m, + m -> Symmetric(m, :U), + m -> Symmetric(m, :L), + m -> Hermitian(m, :U), + m -> Hermitian(m, :L), + m -> adjoint(m), + m -> transpose(m)] + @testset "matrices with zero dimensions" begin for (dimsA, dimsB, dimsC) in ( ((0, 5), (5, 3), (0, 3)), @@ -42,6 +51,9 @@ end @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im] @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2] end + for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers + @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB)) + end @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB) end @testset "3x3 matmul" begin @@ -62,6 +74,9 @@ end @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im] @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3] end + for wrapper_a in mul_wrappers, wrapper_b in mul_wrappers + @test wrapper_a(AA) * wrapper_b(BB) == Array(wrapper_a(AA)) * Array(wrapper_b(BB)) + end @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB) end From b303d0e4c285bfd190bc4cf759ffa9437f70990d Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 30 Jun 2023 10:23:15 -0300 Subject: [PATCH 269/290] Also set the nthreads for the threadpools during bootstrap (#50358) --- src/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/init.c b/src/init.c index 7cae40d2a6906..4a152ed04b13d 100644 --- a/src/init.c +++ b/src/init.c @@ -875,6 +875,8 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_ jl_n_markthreads = 0; jl_n_sweepthreads = 0; jl_n_gcthreads = 0; + jl_n_threads_per_pool[0] = 1; + jl_n_threads_per_pool[1] = 0; } jl_start_threads(); From 734cafabbfb1040aac7a92682ae78e8a6db699cf Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:38:59 +0000 Subject: [PATCH 270/290] Remove `new BitCastInst` unless it's in a typed pointer context (#50338) * Add debug_level module flag * Make ptls use IRBuilder * Only use 'new BitCastInst' when we know we are not in opaque pointer mode --- src/codegen.cpp | 1 + src/llvm-alloc-opt.cpp | 2 ++ src/llvm-final-gc-lowering.cpp | 35 +++++----------------- src/llvm-late-gc-lowering.cpp | 19 ++++++++++-- src/llvm-propagate-addrspaces.cpp | 2 ++ src/llvm-ptls.cpp | 48 +++++++++++++++++-------------- 6 files changed, 55 insertions(+), 52 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index a81bb6eb20cc8..122170ae3fa97 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -7271,6 +7271,7 @@ static jl_llvm_functions_t // allocate Function declarations and wrapper objects //Safe because params holds ctx lock Module *M = TSM.getModuleUnlocked(); + M->addModuleFlag(Module::Warning, "julia.debug_level", ctx.emission_context.debug_level); jl_debugcache_t debuginfo; debuginfo.initialize(M); jl_returninfo_t returninfo = {}; diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 5bcc28e7dad6c..b87a5a6799b0b 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -732,6 +732,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) auto replace_i = new_i; Type *new_t = new_i->getType(); if (cast_t != new_t) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(cast_t->getContext().supportsTypedPointers()); replace_i = new BitCastInst(replace_i, cast_t, "", user); replace_i->setDebugLoc(user->getDebugLoc()); replace_i->takeName(user); diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index ac7d67cddd6f3..e31bcb21199f5 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -81,37 +81,16 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) unsigned nRoots = cast(target->getArgOperand(0))->getLimitedValue(INT_MAX); // Create the GC frame. - unsigned allocaAddressSpace = F.getParent()->getDataLayout().getAllocaAddrSpace(); - AllocaInst *gcframe_alloca = new AllocaInst( - T_prjlvalue, - allocaAddressSpace, - ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2), - Align(16)); - gcframe_alloca->insertAfter(target); - Instruction *gcframe; - if (allocaAddressSpace) { - // addrspacecast as needed for non-0 alloca addrspace - gcframe = new AddrSpaceCastInst(gcframe_alloca, T_prjlvalue->getPointerTo(0)); - gcframe->insertAfter(gcframe_alloca); - } else { - gcframe = gcframe_alloca; - } + IRBuilder<> builder(target->getNextNode()); + auto gcframe_alloca = builder.CreateAlloca(T_prjlvalue, ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2)); + gcframe_alloca->setAlignment(Align(16)); + // addrspacecast as needed for non-0 alloca addrspace + auto gcframe = cast(builder.CreateAddrSpaceCast(gcframe_alloca, T_prjlvalue->getPointerTo(0))); gcframe->takeName(target); // Zero out the GC frame. - BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), ""); - tempSlot_i8->insertAfter(gcframe); - Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())}; - Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT)); - Value *args[4] = { - tempSlot_i8, // dest - ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val - ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len - ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile - CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args)); - cast(zeroing)->setDestAlignment(Align(16)); - zeroing->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); - zeroing->insertAfter(tempSlot_i8); + auto ptrsize = F.getParent()->getDataLayout().getPointerSize(); + builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe); return gcframe; } diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index f1cef798224d2..6d87abd68d7c2 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -702,8 +702,11 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) { ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i), "", SI); } - if (FalseElem->getType() != TrueElem->getType()) + if (FalseElem->getType() != TrueElem->getType()) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(FalseElem->getContext().supportsTypedPointers()); FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI); + } SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI); int Number = ++S.MaxPtrNumber; S.AllPtrNumbering[SelectBase] = Number; @@ -776,6 +779,8 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) { else BaseElem = IncomingBases[i]; if (BaseElem->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(BaseElem->getContext().supportsTypedPointers()); auto &remap = CastedRoots[i][BaseElem]; if (!remap) { if (auto constant = dyn_cast(BaseElem)) { @@ -2595,8 +2600,11 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor // Pointee types don't have semantics, so the optimizer is // free to rewrite them if convenient. We need to change // it back here for the store. - if (Val->getType() != T_prjlvalue) + if (Val->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(Val->getContext().supportsTypedPointers()); Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore); + } new StoreInst(Val, slotAddress, InsertBefore); } @@ -2677,6 +2685,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State } if (slotAddress->getType() != AI->getType()) { // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(slotAddress->getContext().supportsTypedPointers()); auto BCI = new BitCastInst(slotAddress, AI->getType()); BCI->insertAfter(slotAddress); slotAddress = BCI; @@ -2705,8 +2715,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector &Colors, State slotAddress->insertAfter(gcframe); auto ValExpr = std::make_pair(Base, isa(Base->getType()) ? -1 : i); auto Elem = MaybeExtractScalar(S, ValExpr, SI); - if (Elem->getType() != T_prjlvalue) + if (Elem->getType() != T_prjlvalue) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(Elem->getContext().supportsTypedPointers()); Elem = new BitCastInst(Elem, T_prjlvalue, "", SI); + } //auto Idxs = makeArrayRef(Tracked[i]); //Value *Elem = ExtractScalar(Base, true, Idxs, SI); Value *shadowStore = new StoreInst(Elem, slotAddress, SI); diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp index 2158109cea120..9f6cfa1beb38e 100644 --- a/src/llvm-propagate-addrspaces.cpp +++ b/src/llvm-propagate-addrspaces.cpp @@ -187,6 +187,8 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc if (LiftingMap.count(CurrentV)) CurrentV = LiftingMap[CurrentV]; if (CurrentV->getType() != TargetType) { + // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine + assert(CurrentV->getContext().supportsTypedPointers()); auto *BCI = new BitCastInst(CurrentV, TargetType); ToInsert.push_back(std::make_pair(BCI, InsertPt)); CurrentV = BCI; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index f69078433941f..840efaebee032 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -70,6 +70,7 @@ void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const { + IRBuilder<> builder(insertBefore); Value *tls; if (TargetTriple.isX86() && insertBefore->getFunction()->callsFunctionThatReturnsTwice()) { // Workaround LLVM bug by hiding the offset computation @@ -87,15 +88,15 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor if (offset) { std::vector args(0); args.push_back(offset->getType()); - auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), args, false), + auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), args, false), dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore); + tls = builder.CreateCall(tp, {offset}, "pgcstack"); } else { auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}", false); - tls = CallInst::Create(tp, "pgcstack_i8", insertBefore); + tls = builder.CreateCall(tp, {}, "tls_pgcstack"); } } else { // AArch64/ARM doesn't seem to have this issue. @@ -118,12 +119,12 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor } if (!offset) offset = ConstantInt::getSigned(T_size, jl_tls_offset); - auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false); - tls = CallInst::Create(tp, "thread_ptr", insertBefore); - tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore); + auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(builder.getContext()), false), asm_str, "=r", false); + tls = builder.CreateCall(tp, {}, "thread_ptr"); + tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack"); } - tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore); - return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore); + tls = builder.CreateBitCast(tls, T_pppjlvalue->getPointerTo()); + return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack"); } GlobalVariable *LowerPTLS::create_hidden_global(Type *T, StringRef name) const @@ -153,15 +154,16 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, // if (!retboxed) // foreach(retinst) // emit_gc_unsafe_leave(ctx, last_gc_state); - auto phi = PHINode::Create(pgcstack->getType(), 2, ""); - phi->insertAfter(pgcstack); + IRBuilder<> builder(pgcstack->getNextNode()); + auto phi = builder.CreatePHI(pgcstack->getType(), 2, "pgcstack"); pgcstack->replaceAllUsesWith(phi); MDBuilder MDB(pgcstack->getContext()); SmallVector Weights{9, 1}; TerminatorInst *fastTerm; TerminatorInst *slowTerm; assert(pgcstack->getType()); // Static analyzer - auto cmp = new ICmpInst(phi, CmpInst::ICMP_NE, pgcstack, Constant::getNullValue(pgcstack->getType())); + builder.SetInsertPoint(phi); + auto cmp = builder.CreateICmpNE(pgcstack, Constant::getNullValue(pgcstack->getType())); SplitBlockAndInsertIfThenElse(cmp, phi, &fastTerm, &slowTerm, MDB.createBranchWeights(Weights)); if (CFGModified) @@ -180,7 +182,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, adopt->insertBefore(slowTerm); phi->addIncoming(adopt, slowTerm->getParent()); // emit fast branch code - IRBuilder<> builder(fastTerm->getParent()); + builder.SetInsertPoint(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true); @@ -194,7 +196,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, last_gc_state->addIncoming(prior, fastTerm->getParent()); for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { - IRBuilder<> builder(BB.getTerminator()); + builder.SetInsertPoint(BB.getTerminator()); emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true); } } @@ -202,16 +204,16 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, } if (imaging_mode) { + IRBuilder<> builder(pgcstack); if (jl_tls_elf_support) { // if (offset != 0) // pgcstack = tp + offset; // fast // else // pgcstack = getter(); // slow - auto offset = new LoadInst(T_size, pgcstack_offset, "", false, pgcstack); + auto offset = builder.CreateLoad(T_size, pgcstack_offset); offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); - auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset, - Constant::getNullValue(offset->getType())); + auto cmp = builder.CreateICmpNE(offset, Constant::getNullValue(offset->getType())); MDBuilder MDB(pgcstack->getContext()); SmallVector Weights{9, 1}; TerminatorInst *fastTerm; @@ -222,10 +224,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, *CFGModified = true; auto fastTLS = emit_pgcstack_tp(offset, fastTerm); - auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack); + // refresh the basic block in the builder + builder.SetInsertPoint(pgcstack); + auto phi = builder.CreatePHI(T_pppjlvalue, 2, "pgcstack"); pgcstack->replaceAllUsesWith(phi); pgcstack->moveBefore(slowTerm); - auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); + // refresh the basic block in the builder + builder.SetInsertPoint(pgcstack); + auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter); @@ -240,14 +246,14 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, // variable to be filled (in `staticdata.c`) at initialization time of the sysimg. // This way we can bypass the extra indirection in `jl_get_pgcstack` // since we may not know which getter function to use ahead of time. - auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack); + auto getter = builder.CreateLoad(T_pgcstack_getter, pgcstack_func_slot); getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); if (TargetTriple.isOSDarwin()) { - auto key = new LoadInst(T_size, pgcstack_key_slot, "", false, pgcstack); + auto key = builder.CreateLoad(T_size, pgcstack_key_slot); key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None)); - auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack); + auto new_pgcstack = builder.CreateCall(FT_pgcstack_getter, getter, {key}); new_pgcstack->takeName(pgcstack); pgcstack->replaceAllUsesWith(new_pgcstack); pgcstack->eraseFromParent(); From 934cab6a18e5f37f8427f12767e5fa2e460d7fe1 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:04:35 +0000 Subject: [PATCH 271/290] Add escape hatch to avoid demoting float16 operations for unknown architectures (#50343) --- src/llvm-demote-float16.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp index 41ccdca327f8c..b2428860c2882 100644 --- a/src/llvm-demote-float16.cpp +++ b/src/llvm-demote-float16.cpp @@ -66,6 +66,9 @@ static bool have_fp16(Function &caller, const Triple &TT) { return true; } } + if (caller.hasFnAttribute("julia.hasfp16")) { + return true; + } return false; } From 530113f519fd5b1edc9cb458a9fe2c89e6848559 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 30 Jun 2023 14:28:25 -0300 Subject: [PATCH 272/290] ensure objects beyond bump allocated region are inserted into the object pool freelist * addresses follow-up comments from #50137, particularly https://github.com/JuliaLang/julia/pull/50137#discussion_r1246964490 --- src/gc.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/gc.c b/src/gc.c index 9e588c171a676..e1de715857e53 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1238,7 +1238,10 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly jl_ptls_t ptls = jl_current_task->ptls; - jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); + jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed); + if (pg == NULL) { + pg = jl_gc_alloc_page(); + } pg->osize = p->osize; pg->thread_n = ptls->tid; set_page_metadata(pg); @@ -1295,20 +1298,8 @@ STATIC_INLINE jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; - pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed); - if (pg != NULL) { - v = gc_reset_page(ptls, p, pg); - pg->osize = p->osize; - push_page_metadata_back(&ptls->page_metadata_allocd, pg); - } - else { - v = NULL; - } - } - // Not an else!! - if (v == NULL) { - v = gc_add_page(p); } + v = gc_add_page(p); next = (jl_taggedvalue_t*)((char*)v + osize); } p->newpages = next; @@ -1353,15 +1344,19 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo char *data = pg->data; jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET); char *lim = data + GC_PAGE_SZ - osize; + char *lim_newpages = data + GC_PAGE_SZ; + if (gc_page_data((char*)p->newpages - 1) == data) { + lim_newpages = (char*)p->newpages; + } size_t old_nfree = pg->nfree; size_t nfree; - int reuse_page = 1; + int re_use_page = 1; int freed_lazily = 0; int freedall = 1; int pg_skpd = 1; if (!pg->has_marked) { - reuse_page = 0; + re_use_page = 0; #ifdef _P64 // TODO: re-enable on `_P32`? // lazy version: (empty) if the whole page was already unused, free it (return it to the pool) // eager version: (freedall) free page as soon as possible @@ -1402,7 +1397,8 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo jl_taggedvalue_t **pfl_begin = NULL; while ((char*)v <= lim) { int bits = v->bits.gc; - if (!gc_marked(bits)) { + // if an object is past `lim_newpages` then we can guarantee it's garbage + if (!gc_marked(bits) || (char*)v >= lim_newpages) { *pfl = v; pfl = &v->next; pfl_begin = (pfl_begin != NULL) ? pfl_begin : pfl; @@ -1439,7 +1435,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo nfree = pg->nfree; done: - if (reuse_page) { + if (re_use_page) { push_page_metadata_back(allocd, pg); } else if (freed_lazily) { From 8fc641ccc19f54ada513b9c8997499064a8d0ba2 Mon Sep 17 00:00:00 2001 From: Daniel Karrasch Date: Fri, 30 Jun 2023 19:58:15 +0200 Subject: [PATCH 273/290] Fix typos in symmetric eigendecomposition code (#50366) --- stdlib/LinearAlgebra/src/symmetriceigen.jl | 4 ++-- stdlib/LinearAlgebra/test/symmetriceigen.jl | 12 +++++++++--- stdlib/LinearAlgebra/test/testgroups | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl index bafeb50f35459..279577c31d664 100644 --- a/stdlib/LinearAlgebra/src/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl @@ -158,7 +158,7 @@ eigmin(A::RealHermSymComplexHerm{<:Real}) = eigvals(A, 1:1)[1] function eigen(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - return eigen!(eigencopy_oftype{S}(A), eigencopy_oftype(B, S); kws...) + return eigen!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) end function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} @@ -192,7 +192,7 @@ _UtiAUi!(A, U) = rdiv!(ldiv!(U', A), U) function eigvals(A::HermOrSym{TA}, B::HermOrSym{TB}; kws...) where {TA,TB} S = promote_type(eigtype(TA), TB) - return eigen!(eigencopy_oftype{S}(A), eigencopy_oftype(B, S); kws...) + return eigvals!(eigencopy_oftype(A, S), eigencopy_oftype(B, S); kws...) end function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix} diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl index 6744db7c477ad..c28c17255c222 100644 --- a/stdlib/LinearAlgebra/test/symmetriceigen.jl +++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl @@ -47,26 +47,32 @@ end e0,v0 = eigen(A, B) e1,v1 = eigen(A, Symmetric(B)) e2,v2 = eigen(Symmetric(A), B) + e3,v3 = eigen(Symmetric(A), Symmetric(B)) @test e0 ≈ e1 && v0 ≈ v1 @test e0 ≈ e2 && v0 ≈ v2 + @test e0 ≈ e3 && v0 ≈ v3 # eigvals @test eigvals(A, B) ≈ eigvals(A, Symmetric(B)) @test eigvals(A, B) ≈ eigvals(Symmetric(A), B) + @test eigvals(A, B) ≈ eigvals(Symmetric(A), Symmetric(B)) ## Complex valued A = [1.0+im 1.0+1.0im 0 0; 1.0+1.0im 2.0+3.0im 1.0+1.0im 0; 0 1.0+2.0im 3.0+4.0im 1.0+5.0im; 0 0 1.0+1.0im 4.0+4.0im] - AH = (A+A')/2 + AH = A'A B = [2.0+2.0im 1.0+1.0im 4.0+4.0im 3.0+3.0im; 0 3.0+2.0im 1.0+1.0im 3.0+4.0im; 3.0+3.0im 1.0+4.0im 0 0; 0 1.0+2.0im 3.0+1.0im 1.0+1.0im] - BH = (B+B')/2 + BH = B'B # eigen sf = x->(real(x),imag(x)) e1,v1 = eigen(A, Hermitian(BH)) - e2,v2 = eigen(Hermitian(AH), B) @test A*v1 ≈ Hermitian(BH)*v1*Diagonal(e1) + e2,v2 = eigen(Hermitian(AH), B) @test Hermitian(AH)*v2 ≈ B*v2*Diagonal(e2) + e3,v3 = eigen(Hermitian(AH), Hermitian(BH)) + @test Hermitian(AH)*v3 ≈ Hermitian(BH)*v3*Diagonal(e3) # eigvals @test eigvals(A, BH; sortby=sf) ≈ eigvals(A, Hermitian(BH); sortby=sf) @test eigvals(AH, B; sortby=sf) ≈ eigvals(Hermitian(AH), B; sortby=sf) + @test eigvals(AH, BH; sortby=sf) ≈ eigvals(Hermitian(AH), Hermitian(BH); sortby=sf) end end # module TestSymmetricEigen diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups index e281203bf3fa3..0f2f4f4af8708 100644 --- a/stdlib/LinearAlgebra/test/testgroups +++ b/stdlib/LinearAlgebra/test/testgroups @@ -27,3 +27,4 @@ pinv factorization abstractq ldlt +symmetriceigen From 92437c2a989e7efe9994b2277183222a0f2273c7 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 30 Jun 2023 15:42:26 -0300 Subject: [PATCH 274/290] Move ccall tests to node 1 (#50350) The ccall tests seems to be a very common failure point so move it to node 1 because it usually has less maxrss. --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 16f60ddcf6764..1264acae985b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -71,6 +71,7 @@ function move_to_node1(t) end # Base.compilecache only works from node 1, so precompile test is handled specially +move_to_node1("ccall") move_to_node1("precompile") move_to_node1("SharedArrays") move_to_node1("threads") From 196a5b4a91d216eb3ca08fbc8788b41a111d7af5 Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:45:09 -0300 Subject: [PATCH 275/290] use a single uv_cond_t to coordinate wakeup of GC threads (#50339) * Should avoid going to the kernel multiple times to wake GC threads up. --- src/gc.c | 14 ++++++++------ src/gc.h | 2 ++ src/partr.c | 6 +++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/gc.c b/src/gc.c index e1de715857e53..db4cc16112e46 100644 --- a/src/gc.c +++ b/src/gc.c @@ -21,6 +21,9 @@ _Atomic(int) gc_n_threads_marking; _Atomic(int) gc_master_tid; // `tid` of first GC thread int gc_first_tid; +// Mutex/cond used to synchronize wakeup of GC threads on parallel marking +uv_mutex_t gc_threads_lock; +uv_cond_t gc_threads_cond; // To indicate whether concurrent sweeping should run uv_sem_t gc_sweep_assists_needed; @@ -2766,13 +2769,10 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) if (master) { jl_atomic_store(&gc_master_tid, ptls->tid); // Wake threads up and try to do some work + uv_mutex_lock(&gc_threads_lock); jl_atomic_fetch_add(&gc_n_threads_marking, 1); - for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { - jl_ptls_t ptls2 = gc_all_tls_states[i]; - uv_mutex_lock(&ptls2->sleep_lock); - uv_cond_signal(&ptls2->wake_signal); - uv_mutex_unlock(&ptls2->sleep_lock); - } + uv_cond_broadcast(&gc_threads_cond); + uv_mutex_unlock(&gc_threads_lock); gc_mark_and_steal(ptls); jl_atomic_fetch_add(&gc_n_threads_marking, -1); } @@ -3532,6 +3532,8 @@ void jl_gc_init(void) JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); uv_mutex_init(&gc_cache_lock); uv_mutex_init(&gc_perm_lock); + uv_mutex_init(&gc_threads_lock); + uv_cond_init(&gc_threads_cond); uv_sem_init(&gc_sweep_assists_needed, 0); jl_gc_init_page(); diff --git a/src/gc.h b/src/gc.h index 8e06e91571b31..891ecbc3dd2f3 100644 --- a/src/gc.h +++ b/src/gc.h @@ -429,6 +429,8 @@ STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFE *list = hdr; } +extern uv_mutex_t gc_threads_lock; +extern uv_cond_t gc_threads_cond; extern uv_sem_t gc_sweep_assists_needed; extern _Atomic(int) gc_n_threads_marking; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); diff --git a/src/partr.c b/src/partr.c index 5f543565928a1..428389db7f218 100644 --- a/src/partr.c +++ b/src/partr.c @@ -130,11 +130,11 @@ void jl_gc_mark_threadfun(void *arg) free(targ); while (1) { - uv_mutex_lock(&ptls->sleep_lock); + uv_mutex_lock(&gc_threads_lock); while (!may_mark()) { - uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock); + uv_cond_wait(&gc_threads_cond, &gc_threads_lock); } - uv_mutex_unlock(&ptls->sleep_lock); + uv_mutex_unlock(&gc_threads_lock); gc_mark_loop_parallel(ptls, 0); } } From a40dec12b3fab123a08dd54db817e13279deb61f Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 30 Jun 2023 17:37:06 -0400 Subject: [PATCH 276/290] sroa: Mark dead setfields as EFFECT_FREE (#50373) sroa tries to delete any `setfield!` call for allocations that it knows it can remove. However, if it does not know that the type is correct for the allocation, it may not be able to remove the setfield!. If the type later gets improved (e.g. by irinterp), the statement becomes eligible for removal, but it currently requires another sroa pass to actually remove it. Improve that situation my marking such a statement that is known-dead as IR_FLAG_EFFECT_FREE, so if we later also prove it nothrow, it (and the corresponding allocation) immediately become DCE-eligible. --- base/compiler/ssair/passes.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 974f0c32ea7ca..9a312bec8f202 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1525,8 +1525,16 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse idx == newidx && continue # this is allocation # verify this statement won't throw, otherwise it can't be eliminated safely ssa = SSAValue(idx) - is_nothrow(ir, ssa) || continue - ir[ssa][:inst] = nothing + if is_nothrow(ir, ssa) + ir[ssa][:inst] = nothing + else + # We can't eliminate this statement, because it might still + # throw an error, but we can mark it as effect-free since we + # know we have removed all uses of the mutable allocation. + # As a result, if we ever do prove nothrow, we can delete + # this statement then. + ir[ssa][:flag] |= IR_FLAG_EFFECT_FREE + end end end preserve_uses === nothing && continue From 36a39b0fed2927964c98b66fc45300b70b05d567 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 1 Jul 2023 06:08:38 -0400 Subject: [PATCH 277/290] add note to CONTRIBUTING.md about making issues and PR names self explanatory (#50370) --- CONTRIBUTING.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f08b71e2f3c30..0131dcbc4a278 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -185,7 +185,14 @@ At the moment, this should always be done with the following `compat` admonition *By contributing code to Julia, you are agreeing to release it under the [MIT License](https://github.com/JuliaLang/julia/tree/master/LICENSE.md).* -The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR). You can make pull requests for incomplete features to get code review. The convention is to prefix the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready for merging. This will prevent accidental merging of work that is in progress. +The Julia community uses [GitHub issues](https://github.com/JuliaLang/julia/issues) to track and discuss problems, feature requests, and pull requests (PR). + +Issues and pull requests should have self explanatory titles such that they can be understood from the list of PRs and Issues. +i.e. `Add {feature}` and `Fix {bug}` are good, `Fix #12345. Corrects the bug.` is bad. + +You can make pull requests for incomplete features to get code review. The convention is to open these a draft PRs and prefix +the pull request title with "WIP:" for Work In Progress, or "RFC:" for Request for Comments when work is completed and ready +for merging. This will prevent accidental merging of work that is in progress. Note: These instructions are for adding to or improving functionality in the base library. Before getting started, it can be helpful to discuss the proposed changes or additions on the [Julia Discourse forum](https://discourse.julialang.org) or in a GitHub issue---it's possible your proposed change belongs in a package rather than the core language. Also, keep in mind that changing stuff in the base can potentially break a lot of things. Finally, because of the time required to build Julia, note that it's usually faster to develop your code in stand-alone files, get it working, and then migrate it into the base libraries. From 27e21c8865ffc0d8620d3b5a18b5002162d3c0b7 Mon Sep 17 00:00:00 2001 From: Nicholas Bauer Date: Sat, 1 Jul 2023 08:01:49 -0400 Subject: [PATCH 278/290] `hvncat`: Added inbounds annotations that improve performance (#41200) * Added judicious inbounds/inline decorations * add inline to other one * bump * grammar Co-authored-by: Jeff Bezanson * Remove `@inline` * bump CI * bump CI 2 * Merge fix * Ensure `hvncat_fill!` can't execute when N < 2 * Bounds check in three-arg `hvncat_fill!` * Narrow inbounds * Moved bounds check up --------- Co-authored-by: Jeff Bezanson --- base/abstractarray.jl | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index 20566265b0c76..1417987847ec4 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -2404,18 +2404,22 @@ function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Num end function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple) + nr, nc = size(A, 1), size(A, 2) + na = prod(size(A)[3:end]) + len = length(xs) + nrc = nr * nc + if nrc * na != len + throw(ArgumentError("argument count $(len) does not match specified shape $(size(A))")) + end # putting these in separate functions leads to unnecessary allocations if row_first - nr, nc = size(A, 1), size(A, 2) - nrc = nr * nc - na = prod(size(A)[3:end]) k = 1 for d ∈ 1:na dd = nrc * (d - 1) for i ∈ 1:nr Ai = dd + i for j ∈ 1:nc - A[Ai] = xs[k] + @inbounds A[Ai] = xs[k] k += 1 Ai += nr end @@ -2423,7 +2427,7 @@ function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple) end else for k ∈ eachindex(xs) - A[k] = xs[k] + @inbounds A[k] = xs[k] end end end @@ -2609,28 +2613,36 @@ function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as:: return A end -function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N} +function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, + d1::Int, d2::Int, as::Tuple) where {T, N} + N > 1 || throw(ArgumentError("dimensions of the destination array must be at least 2")) + length(scratch1) == length(scratch2) == N || + throw(ArgumentError("scratch vectors must have as many elements as the destination array has dimensions")) + 0 < d1 < 3 && + 0 < d2 < 3 && + d1 != d2 || + throw(ArgumentError("d1 and d2 must be either 1 or 2, exclusive.")) outdims = size(A) offsets = scratch1 inneroffsets = scratch2 for a ∈ as if isa(a, AbstractArray) for ai ∈ a - Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) A[Ai] = ai - for j ∈ 1:N + @inbounds for j ∈ 1:N inneroffsets[j] += 1 inneroffsets[j] < cat_size(a, j) && break inneroffsets[j] = 0 end end else - Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) + @inbounds Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N) A[Ai] = a end - for j ∈ (d1, d2, 3:N...) + @inbounds for j ∈ (d1, d2, 3:N...) offsets[j] += cat_size(a, j) offsets[j] < outdims[j] && break offsets[j] = 0 From 36e188f9dcd99555af74dadcfcfdda6a41e348b4 Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Sat, 1 Jul 2023 22:40:56 +0000 Subject: [PATCH 279/290] Parse pass options in opt/PassBuilder (#50383) --- src/pipeline.cpp | 39 ++++++++++++++++++++++++++++++++++++++ test/llvmpasses/parsing.ll | 7 +++++++ 2 files changed, 46 insertions(+) create mode 100644 test/llvmpasses/parsing.ll diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 06d5aa2d3bfa8..3b434a33a4ca4 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -830,6 +830,19 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { #define FUNCTION_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef FUNCTION_PASS + if (Name.consume_front("GCInvariantVerifier")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool strong = true; + if (Name.consume_front("no-")) { + strong = false; + } + if (Name == "strong") { + PM.addPass(GCInvariantVerifierPass(strong)); + return true; + } + } + return false; + } return false; }); @@ -839,6 +852,32 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { #define MODULE_PASS(NAME, CLASS, CREATE_PASS) if (Name == NAME) { PM.addPass(CREATE_PASS); return true; } #include "llvm-julia-passes.inc" #undef MODULE_PASS + if (Name.consume_front("LowerPTLSPass")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool imaging_mode = true; + if (Name.consume_front("no-")) { + imaging_mode = false; + } + if (Name == "imaging") { + PM.addPass(LowerPTLSPass(imaging_mode)); + return true; + } + } + return false; + } + if (Name.consume_front("JuliaMultiVersioning")) { + if (Name.consume_front("<") && Name.consume_back(">")) { + bool external_use = true; + if (Name.consume_front("no-")) { + external_use = false; + } + if (Name == "external") { + PM.addPass(MultiVersioningPass(external_use)); + return true; + } + } + return false; + } //Add full pipelines here auto julia_options = parseJuliaPipelineOptions(Name); if (julia_options) { diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll new file mode 100644 index 0000000000000..434ffbb26c95f --- /dev/null +++ b/test/llvmpasses/parsing.ll @@ -0,0 +1,7 @@ +; COM: NewPM-only test, tests for ability to parse Julia passes + +; RUN: opt --opaque-pointers=0 --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,LowerSIMDLoop,FinalLowerGC,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(JuliaLICM),GCInvariantVerifier,GCInvariantVerifier),LowerPTLSPass,LowerPTLSPass,JuliaMultiVersioning,JuliaMultiVersioning)' -S %s -o /dev/null + +define void @test() { + ret void +} From 6336f681996cfc18cab7006a7c17bbbb725049c1 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Sun, 2 Jul 2023 07:00:42 -0400 Subject: [PATCH 280/290] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20?= =?UTF-8?q?Pkg=20stdlib=20from=204de1826bc=20to=20e8197dd0e=20(#50388)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 | 1 - .../Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 | 1 - .../Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 | 1 + .../Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 | 1 + stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 create mode 100644 deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 create mode 100644 deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 diff --git a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 deleted file mode 100644 index 5bcff2bb6fe1a..0000000000000 --- a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -44deb23c240d210544eea31317ef56ab diff --git a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 b/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 deleted file mode 100644 index b6407afe49ea4..0000000000000 --- a/deps/checksums/Pkg-4de1826bcb49b8576603ba6a22ed1c6fecc66a10.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -9d043a647efad34be0fd68778f7ae9643c2620ec3b69e80868dee4d3a889aab90b9fdd1932de043f65ea29348d65ceee331aaba041a9f9773d90d0eb30d920af diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 new file mode 100644 index 0000000000000..8710722b5409c --- /dev/null +++ b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/md5 @@ -0,0 +1 @@ +f0e62f7b63dc9400caa2fec1b91b7889 diff --git a/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 new file mode 100644 index 0000000000000..c92e62d861633 --- /dev/null +++ b/deps/checksums/Pkg-e8197dd0ed8132d4a7619f3657363c8415249c47.tar.gz/sha512 @@ -0,0 +1 @@ +e48ee01791f58d41715fd44e16238d835315e930d3ef529dd3f3b5660935f7f0ca2c5163ec9c4e4d90e4ead5328f39e0bfffa88223c2094c8727460eac022cc1 diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index dd65fe3340b77..6551c7e24049f 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 4de1826bcb49b8576603ba6a22ed1c6fecc66a10 +PKG_SHA1 = e8197dd0ed8132d4a7619f3657363c8415249c47 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From ecca2c5a2f0719a306c44628736c8f6c8ac6453f Mon Sep 17 00:00:00 2001 From: pchintalapudi <34727397+pchintalapudi@users.noreply.github.com> Date: Sun, 2 Jul 2023 13:10:38 +0000 Subject: [PATCH 281/290] Expose PassBuilder callback registration via C api (#50390) --- src/codegen-stubs.c | 2 ++ src/jl_exported_funcs.inc | 1 + src/pipeline.cpp | 17 +++++++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c index 9f0250d1f14f9..de5f2a2770c04 100644 --- a/src/codegen-stubs.c +++ b/src/codegen-stubs.c @@ -112,6 +112,8 @@ JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, i JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, int Speedup, int Size, int lower_intrinsics, int dump_native, int external_use, int llvm_only) UNAVAILABLE +JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { } + JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 33b431fe12a76..fd824131bdbda 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -559,6 +559,7 @@ YY(jl_get_libllvm) \ YY(jl_add_optimization_passes) \ YY(jl_build_newpm_pipeline) \ + YY(jl_register_passbuilder_callbacks) \ YY(LLVMExtraAddLowerSimdLoopPass) \ YY(LLVMExtraAddFinalLowerGCPass) \ YY(LLVMExtraAddPropagateJuliaAddrspaces) \ diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 3b434a33a4ca4..ca5992b6f3135 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -680,7 +680,7 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); return PIC; } - FunctionAnalysisManager createFAM(OptimizationLevel O, TargetIRAnalysis analysis, const Triple &triple) JL_NOTSAFEPOINT { + FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT { FunctionAnalysisManager FAM; // Register the AA manager first so that our version is the one used. @@ -691,12 +691,12 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); AA.registerFunctionAnalysis(); AA.registerFunctionAnalysis(); } - // TM->registerDefaultAliasAnalyses(AA); + TM.registerDefaultAliasAnalyses(AA); return AA; }); // Register our TargetLibraryInfoImpl. - FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetIRAnalysis(analysis); }); - FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(triple)); }); + FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetIRAnalysis(TM.getTargetIRAnalysis()); }); + FAM.registerPass([&] JL_NOTSAFEPOINT { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(TM.getTargetTriple())); }); return FAM; } @@ -714,7 +714,7 @@ NewPM::NewPM(std::unique_ptr TM, OptimizationLevel O, Optimizatio NewPM::~NewPM() = default; -AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM.getTargetIRAnalysis(), TM.getTargetTriple())), CGAM(), MAM() { +AnalysisManagers::AnalysisManagers(TargetMachine &TM, PassBuilder &PB, OptimizationLevel O) : LAM(), FAM(createFAM(O, TM)), CGAM(), MAM() { PB.registerLoopAnalyses(LAM); PB.registerFunctionAnalyses(FAM); PB.registerCGSCCAnalyses(CGAM); @@ -819,7 +819,7 @@ static llvm::Optional> parseJu // NOTE: Instead of exporting all the constructors in passes.h we could // forward the callbacks to the respective passes. LLVM seems to prefer this, // and when we add the full pass builder having them directly will be helpful. -void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { +static void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { auto PIC = PB.getPassInstrumentationCallbacks(); if (PIC) { adjustPIC(*PIC); @@ -899,6 +899,11 @@ void registerCallbacks(PassBuilder &PB) JL_NOTSAFEPOINT { }); } +extern "C" JL_DLLEXPORT_CODEGEN +void jl_register_passbuilder_callbacks_impl(void *PB) JL_NOTSAFEPOINT { + registerCallbacks(*static_cast(PB)); +} + extern "C" JL_DLLEXPORT_CODEGEN ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() JL_NOTSAFEPOINT { return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks}; From 43bf2c8ac7f2964fed77acb26b0a632d46e61a1b Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Sun, 2 Jul 2023 19:27:41 -0300 Subject: [PATCH 282/290] ensure GC_FINAL_STATS is consistent with new page metadata layout (#50374) * ensure GC_FINAL_STATS is consistent with new page metadata layout --- src/gc-debug.c | 48 ++++++++++++++++++++---------------------------- src/gc-pages.c | 8 ++++---- src/gc.c | 1 + src/gc.h | 7 ++++++- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index d6cb89f574022..bab2c5b0fa607 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -744,45 +744,37 @@ void gc_final_pause_end(int64_t t0, int64_t tend) static void gc_stats_pagetable0(pagetable0_t *pagetable0, unsigned *p0) { - for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable0->allocmap[pg_i] | pagetable0->freemap[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p0)++; - } - } + for (int pg_i = 0; pg_i < REGION0_PG_COUNT; pg_i++) { + uint8_t meta = pagetable0->meta[pg_i]; + assert(meta == GC_PAGE_UNMAPPED || meta == GC_PAGE_ALLOCATED || + meta == GC_PAGE_LAZILY_FREED || meta == GC_PAGE_FREED); + if (meta != GC_PAGE_UNMAPPED) { + (*p0)++; } } } static void gc_stats_pagetable1(pagetable1_t *pagetable1, unsigned *p1, unsigned *p0) { - for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { - uint32_t line = pagetable1->allocmap0[pg_i] | pagetable1->freemap0[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p1)++; - gc_stats_pagetable0(pagetable1->meta0[pg_i * 32 + j], p0); - } - } + for (int pg_i = 0; pg_i < REGION1_PG_COUNT; pg_i++) { + pagetable0_t *pagetable0 = pagetable1->meta0[pg_i]; + if (pagetable0 == NULL) { + continue; } + (*p1)++; + gc_stats_pagetable0(pagetable0, p0); } } static void gc_stats_pagetable(unsigned *p2, unsigned *p1, unsigned *p0) { - for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { - uint32_t line = memory_map.allocmap1[pg_i] | memory_map.freemap1[pg_i]; - if (line) { - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - (*p2)++; - gc_stats_pagetable1(memory_map.meta1[pg_i * 32 + j], p1, p0); - } - } + for (int pg_i = 0; pg_i < REGION2_PG_COUNT; pg_i++) { + pagetable1_t *pagetable1 = alloc_map.meta1[pg_i]; + if (pagetable1 == NULL) { + continue; } + (*p2)++; + gc_stats_pagetable1(pagetable1, p1, p0); } } @@ -791,7 +783,7 @@ void jl_print_gc_stats(JL_STREAM *s) #ifdef _OS_LINUX_ malloc_stats(); #endif - double ptime = jl_clock_now() - process_t0; + double ptime = jl_hrtime() - process_t0; jl_safe_printf("exec time\t%.5f sec\n", ptime); if (gc_num.pause > 0) { jl_safe_printf("gc time \t%.5f sec (%2.1f%%) in %d (%d full) collections\n", @@ -1012,7 +1004,7 @@ void jl_gc_debug_init(void) #endif #ifdef GC_FINAL_STATS - process_t0 = jl_clock_now(); + process_t0 = jl_hrtime(); #endif } diff --git a/src/gc-pages.c b/src/gc-pages.c index 3cb28d5827b55..682e76611f5d9 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -100,7 +100,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT // try to get page from `pool_lazily_freed` meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed); if (meta != NULL) { - gc_alloc_map_set(meta->data, 1); + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); // page is already mapped return meta; } @@ -108,14 +108,14 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT // try to get page from `pool_clean` meta = pop_lf_page_metadata_back(&global_page_pool_clean); if (meta != NULL) { - gc_alloc_map_set(meta->data, 1); + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); goto exit; } // try to get page from `pool_freed` meta = pop_lf_page_metadata_back(&global_page_pool_freed); if (meta != NULL) { - gc_alloc_map_set(meta->data, 1); + gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED); goto exit; } @@ -155,7 +155,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT { void *p = pg->data; - gc_alloc_map_set((char*)p, 0); + gc_alloc_map_set((char*)p, GC_PAGE_FREED); // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { diff --git a/src/gc.c b/src/gc.c index db4cc16112e46..9fd93b7340d56 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1451,6 +1451,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo push_lf_page_metadata_back(&global_page_pool_freed, pg); } else { + gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED); push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg); } #else diff --git a/src/gc.h b/src/gc.h index 891ecbc3dd2f3..b1eee5a1d5bda 100644 --- a/src/gc.h +++ b/src/gc.h @@ -257,6 +257,11 @@ typedef struct { pagetable1_t *meta1[REGION2_PG_COUNT]; } pagetable_t; +#define GC_PAGE_UNMAPPED 0 +#define GC_PAGE_ALLOCATED 1 +#define GC_PAGE_LAZILY_FREED 2 +#define GC_PAGE_FREED 3 + extern pagetable_t alloc_map; STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT @@ -272,7 +277,7 @@ STATIC_INLINE uint8_t gc_alloc_map_is_set(char *_data) JL_NOTSAFEPOINT if (r0 == NULL) return 0; i = REGION0_INDEX(data); - return r0->meta[i]; + return (r0->meta[i] == GC_PAGE_ALLOCATED); } STATIC_INLINE void gc_alloc_map_set(char *_data, uint8_t v) JL_NOTSAFEPOINT From 05c42ddc757a0aa2b308f4edb2aa01f9c905ca14 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 3 Jul 2023 05:09:40 +0000 Subject: [PATCH 283/290] Notify GC when loading pkg image --- src/staticdata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/staticdata.c b/src/staticdata.c index 6b21b2f80437d..452e4380deb02 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -3540,6 +3540,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From fdada6c65a0c53de447abb2f331dae081dcb77cf Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 21 Aug 2023 01:49:01 +0000 Subject: [PATCH 284/290] Fix some build issues --- src/mmtk-gc.c | 7 +++---- src/partr.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index e87e7f0e1449f..a390de3ddffd9 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,8 +350,6 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // If the two values are the same, we can use either. Otherwise, we need to be careful. - assert(jl_n_gcthreads == jl_options.ngcthreads); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; @@ -366,11 +364,12 @@ void jl_gc_init(void) // if only max size is specified initialize MMTk with a fixed size heap // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. + // If the two values are the same, we can use either. Otherwise, we need to be careful. uintptr_t gcthreads = jl_options.nmarkthreads; if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { - mmtk_gc_init(0, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } else { - mmtk_gc_init(min_heap_size, max_heap_size, jl_options.ngcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); } } diff --git a/src/partr.c b/src/partr.c index bfdc4ed727973..37cf9ca310d24 100644 --- a/src/partr.c +++ b/src/partr.c @@ -168,6 +168,20 @@ void jl_gc_sweep_threadfun(void *arg) } } +#else + +// gc thread mark function +void jl_gc_mark_threadfun(void *arg) +{ + mmtk_unreachable(); +} + +// gc thread sweep function +void jl_gc_sweep_threadfun(void *arg) +{ + mmtk_unreachable(); +} + #endif // thread function: used by all mutator threads except the main thread From 27fc1013a130f7da2ae7f47b69763c4455bb405c Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 22 Aug 2023 02:26:48 +0000 Subject: [PATCH 285/290] Apply lock before schedule finalizers --- src/gc-common.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/gc-common.c b/src/gc-common.c index 38f737ada576f..80365ec5e4a97 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -339,12 +339,18 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + // this is called from `jl_atexit_hook`; threads could still be running + // so we have to guard the finalizers' lists + JL_LOCK_NOGC(&finalizers_lock); schedule_all_finalizers(&finalizer_list_marked); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) schedule_all_finalizers(&ptls2->finalizers); } + // this is called from `jl_atexit_hook`; threads could still be running + // so we have to guard the finalizers' lists + JL_LOCK_NOGC(&finalizers_lock); gc_n_threads = 0; gc_all_tls_states = NULL; run_finalizers(ct); From e591ad86d475323b2079fc71f99a74ba0750a0cc Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 22 Aug 2023 01:40:26 +0000 Subject: [PATCH 286/290] Fixing issue when setting up the number of stock GC threads - it should be 0 when using MMTk --- src/mmtk-gc.c | 3 +++ src/threading.c | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index a390de3ddffd9..8e87860c7b6ab 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,6 +350,9 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } + // when using mmtk, we don't spawn any stock GC thread + // and mmtk should use jl_options.ngcthreads to set the number of workers + assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index 78ecdcc98ae21..d1157a02dada0 100644 --- a/src/threading.c +++ b/src/threading.c @@ -694,6 +694,12 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -711,11 +717,6 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive; From 00bab46345dd2fc8dd73d9a94ff7aa57ddd90e62 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 9 Aug 2023 01:15:56 +0000 Subject: [PATCH 287/290] Fixing issue that prevented building Julia or running tests with Julia's debug build --- Makefile | 2 +- src/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 895dbe8100b82..d5cce165dc596 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ check-whitespace: ifneq ($(NO_GIT), 1) @# Append the directory containing the julia we just built to the end of `PATH`, @# to give us the best chance of being able to run this check. - @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) + @PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl) else $(warn "Skipping whitespace check because git is unavailable") endif diff --git a/src/Makefile b/src/Makefile index 41629e5a686bf..5ea0a3d5cf76b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -179,7 +179,7 @@ DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) ifeq ($(WITH_MMTK), 1) MMTK_SRCS := mmtk_julia MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) -MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) else MMTK_OBJS := MMTK_DOBJS := From 8eab37df935555155a919fcbf0a55b9b4a0fa9f0 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Mon, 28 Aug 2023 02:04:39 +0000 Subject: [PATCH 288/290] Add write barrier for excstack update --- src/julia_internal.h | 2 +- src/rtutils.c | 7 ++++--- src/task.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index ed8e40bca4b01..737553ec98845 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1264,7 +1264,7 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA return itr-2 - jl_excstack_bt_size(stack, itr); } // Exception stack manipulation -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size); diff --git a/src/rtutils.c b/src/rtutils.c index 01ea11014a6db..7a31d37e4175c 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -320,7 +320,7 @@ static void jl_copy_excstack(jl_excstack_t *dest, jl_excstack_t *src) JL_NOTSAFE dest->top = src->top; } -static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, +static void jl_reserve_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, size_t reserved_size) { jl_excstack_t *s = *stack; @@ -334,13 +334,14 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT, if (s) jl_copy_excstack(new_s, s); *stack = new_s; + jl_gc_wb(task, new_s); } -void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, +void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT, jl_value_t *exception JL_ROOTED_ARGUMENT, jl_bt_element_t *bt_data, size_t bt_size) { - jl_reserve_excstack(stack, (*stack ? (*stack)->top : 0) + bt_size + 2); + jl_reserve_excstack(task, stack, (*stack ? (*stack)->top : 0) + bt_size + 2); jl_excstack_t *s = *stack; jl_bt_element_t *rawstack = jl_excstack_raw(s); memcpy(rawstack + s->top, bt_data, sizeof(jl_bt_element_t)*bt_size); diff --git a/src/task.c b/src/task.c index 1dab8688cb079..73d9033f0cb50 100644 --- a/src/task.c +++ b/src/task.c @@ -721,7 +721,7 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct) /* The temporary ptls->bt_data is rooted by special purpose code in the\ GC. This exists only for the purpose of preserving bt_data until we \ set ptls->bt_size=0 below. */ \ - jl_push_excstack(&ct->excstack, exception, \ + jl_push_excstack(ct, &ct->excstack, exception, \ ptls->bt_data, ptls->bt_size); \ ptls->bt_size = 0; \ } \ @@ -1224,7 +1224,7 @@ CFI_NORETURN jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { record_backtrace(ptls, 0); - jl_push_excstack(&ct->excstack, ct->result, + jl_push_excstack(ct, &ct->excstack, ct->result, ptls->bt_data, ptls->bt_size); res = ct->result; } From d0cbd133727fb0135826ba09128f259aaf34d403 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 29 Aug 2023 02:30:35 +0000 Subject: [PATCH 289/290] Revert "Apply lock before schedule finalizers" This reverts commit 27fc1013a130f7da2ae7f47b69763c4455bb405c. --- src/gc-common.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 80365ec5e4a97..38f737ada576f 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -339,18 +339,12 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) jl_ptls_t* gc_all_tls_states; gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); - // this is called from `jl_atexit_hook`; threads could still be running - // so we have to guard the finalizers' lists - JL_LOCK_NOGC(&finalizers_lock); schedule_all_finalizers(&finalizer_list_marked); for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) schedule_all_finalizers(&ptls2->finalizers); } - // this is called from `jl_atexit_hook`; threads could still be running - // so we have to guard the finalizers' lists - JL_LOCK_NOGC(&finalizers_lock); gc_n_threads = 0; gc_all_tls_states = NULL; run_finalizers(ct); From 8d0d8b5db22469d2d3f4a0a65af635f67d7701e3 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Tue, 29 Aug 2023 05:51:04 +0000 Subject: [PATCH 290/290] Revert "Fixing issue when setting up the number of stock GC threads - it should be 0 when using MMTk" This reverts commit e591ad86d475323b2079fc71f99a74ba0750a0cc. --- src/mmtk-gc.c | 3 --- src/threading.c | 11 +++++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c index 8e87860c7b6ab..a390de3ddffd9 100644 --- a/src/mmtk-gc.c +++ b/src/mmtk-gc.c @@ -350,9 +350,6 @@ void jl_gc_init(void) max_heap_size = uv_get_free_memory() * 70 / 100; } - // when using mmtk, we don't spawn any stock GC thread - // and mmtk should use jl_options.ngcthreads to set the number of workers - assert(jl_n_gcthreads == 0); // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined int copy_stacks; diff --git a/src/threading.c b/src/threading.c index d1157a02dada0..78ecdcc98ae21 100644 --- a/src/threading.c +++ b/src/threading.c @@ -694,12 +694,6 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; -#ifdef MMTK_GC - // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. - // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. - ngcthreads = 0; -#endif - jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; @@ -717,6 +711,11 @@ void jl_start_threads(void) { int nthreads = jl_atomic_load_relaxed(&jl_n_threads); int ngcthreads = jl_n_gcthreads; +#ifdef MMTK_GC + // MMTk gets the number of GC threads from jl_options.ngcthreads, and spawn its GC threads. + // So we just set ngcthreads to 0 here to avoid spawning any GC threads in Julia. + ngcthreads = 0; +#endif int cpumasksize = uv_cpumask_size(); char *cp; int i, exclusive;