From bce3d4d1a1cc2da737eb18dc1efe24256d6fe4cb Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 6 Nov 2024 10:29:01 -0500 Subject: [PATCH] Allow taking Matrix slices without an extra allocation (#56236) Since changing Array to use Memory as the backing, we had the option of making non-Vector arrays more flexible, but had instead preserved the restriction that they must be zero offset and equal in length to the Memory. This results in extra complexity, restrictions, and allocations however, but doesn't gain many known benefits. Nanosoldier shows a decrease in performance on linear eachindex loops, which we theorize is due to a minor failure to CSE before SCEV or a lack of NUW/NSW on the length multiplication calculation. --- base/Base.jl | 8 ++++---- base/abstractarray.jl | 2 +- base/array.jl | 4 ---- base/essentials.jl | 3 ++- base/reshapedarray.jl | 32 +++++++++++++++++--------------- src/codegen.cpp | 15 +-------------- src/genericmemory.c | 35 +---------------------------------- src/julia.h | 6 +----- src/staticdata.c | 24 +++++++++--------------- 9 files changed, 36 insertions(+), 93 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 3b56dca166cee..874cec56329d1 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -354,14 +354,14 @@ include("set.jl") include("char.jl") function array_new_memory(mem::Memory{UInt8}, newlen::Int) # add an optimization to array_new_memory for StringVector - if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) isa String + if (@assume_effects :total @ccall jl_genericmemory_owner(mem::Any,)::Any) === mem + # TODO: when implemented, this should use a memory growing call + return typeof(mem)(undef, newlen) + else # If data is in a String, keep it that way. # When implemented, this could use jl_gc_expand_string(oldstr, newlen) as an optimization str = _string_n(newlen) return (@assume_effects :total !:consistent @ccall jl_string_to_genericmemory(str::Any,)::Memory{UInt8}) - else - # TODO: when implemented, this should use a memory growing call - return typeof(mem)(undef, newlen) end end include("strings/basic.jl") diff --git a/base/abstractarray.jl b/base/abstractarray.jl index cbbae8e852b2e..5413f4e177518 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1584,7 +1584,7 @@ their component parts. A typical definition for an array that wraps a parent is `Base.dataids(C::CustomArray) = dataids(C.parent)`. """ dataids(A::AbstractArray) = (UInt(objectid(A)),) -dataids(A::Memory) = (B = ccall(:jl_genericmemory_owner, Any, (Any,), A); (UInt(pointer(B isa typeof(A) ? B : A)),)) +dataids(A::Memory) = (UInt(A.ptr),) dataids(A::Array) = dataids(A.ref.mem) dataids(::AbstractRange) = () dataids(x) = () diff --git a/base/array.jl b/base/array.jl index 40907b2b00317..7a9649f20dded 100644 --- a/base/array.jl +++ b/base/array.jl @@ -3141,10 +3141,6 @@ function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N} mem_len = length(mem) + 1 - memoryrefoffset(ref) len = Core.checked_dims(dims...) @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len) - if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len) - mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len) - ref = memoryref(mem) - end return ref end diff --git a/base/essentials.jl b/base/essentials.jl index 750ee0f9c434c..64fbaea95d4e7 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -9,7 +9,8 @@ const Bottom = Union{} # Define minimal array interface here to help code used in macros: length(a::Array{T, 0}) where {T} = 1 length(a::Array{T, 1}) where {T} = getfield(a, :size)[1] -length(a::Array) = getfield(getfield(getfield(a, :ref), :mem), :length) +length(a::Array{T, 2}) where {T} = (sz = getfield(a, :size); sz[1] * sz[2]) +# other sizes are handled by generic prod definition for AbstractArray length(a::GenericMemory) = getfield(a, :length) throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I))) diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 019f1d30a25c2..07f608588837b 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -35,31 +35,33 @@ end length(R::ReshapedArrayIterator) = length(R.iter) eltype(::Type{<:ReshapedArrayIterator{I}}) where {I} = @isdefined(I) ? ReshapedIndex{eltype(I)} : Any -## reshape(::Array, ::Dims) returns an Array, except for isbitsunion eltypes (issue #28611) +@noinline throw_dmrsa(dims, len) = + throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len")) + +## reshape(::Array, ::Dims) returns a new Array (to avoid conditionally aliasing the structure, only the data) # reshaping to same # of dimensions @eval function reshape(a::Array{T,M}, dims::NTuple{N,Int}) where {T,N,M} - throw_dmrsa(dims, len) = - throw(DimensionMismatch("new dimensions $(dims) must be consistent with array length $len")) len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a)) if len != length(a) throw_dmrsa(dims, length(a)) end - isbitsunion(T) && return ReshapedArray(a, dims, ()) - if N == M && dims == size(a) - return a - end ref = a.ref - if M == 1 && N !== 1 - mem = ref.mem::Memory{T} - if !(ref === memoryref(mem) && len === mem.length) - mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len) - ref = memoryref(mem)::typeof(ref) - end - end - # or we could use `a = Array{T,N}(undef, ntuple(0, Val(N))); a.ref = ref; a.size = dims; return a` here + # or we could use `a = Array{T,N}(undef, ntuple(i->0, Val(N))); a.ref = ref; a.size = dims; return a` here to avoid the eval return $(Expr(:new, :(Array{T,N}), :ref, :dims)) end +## reshape!(::Array, ::Dims) returns the original array, but must have the same dimensions and length as the original +# see also resize! for a similar operation that can change the length +function reshape!(a::Array{T,N}, dims::NTuple{N,Int}) where {T,N} + len = Core.checked_dims(dims...) # make sure prod(dims) doesn't overflow (and because of the comparison to length(a)) + if len != length(a) + throw_dmrsa(dims, length(a)) + end + setfield!(a, :dims, dims) + return a +end + + """ reshape(A, dims...) -> AbstractArray diff --git a/src/codegen.cpp b/src/codegen.cpp index e2cccafd42e5f..e2bc8fe6e43d1 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1435,18 +1435,6 @@ static const auto jl_allocgenericmemory = new JuliaFunction{ - XSTR(jl_array_data_owner), - [](LLVMContext &C) { - auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C); - return FunctionType::get(T_prjlvalue, - {T_prjlvalue}, false); - }, - [](LLVMContext &C) { return AttributeList::get(C, - Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}), - Attributes(C, {Attribute::NonNull}), - None); }, -}; #define BOX_FUNC(ct,at,attrs,nbytes) \ static const auto box_##ct##_func = new JuliaFunction<>{ \ XSTR(jl_box_##ct), \ @@ -4341,8 +4329,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } Value *data_owner = NULL; // owner object against which the write barrier must check if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier - Value *V = emit_memoryref_FCA(ctx, ref, layout); - data_owner = emit_genericmemoryowner(ctx, CreateSimplifiedExtractValue(ctx, V, 1)); + data_owner = emit_memoryref_mem(ctx, ref, layout); } *ret = typed_store(ctx, ptr, diff --git a/src/genericmemory.c b/src/genericmemory.c index c310eb829e198..0bf2cf46edaae 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -197,7 +197,7 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_ if (how != 0) { jl_value_t *o = jl_genericmemory_data_owner_field(m); jl_genericmemory_data_owner_field(m) = NULL; - if (how == 3 && jl_is_string(o) && + if (how == 3 && // implies jl_is_string(o) ((mlength + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS))) { if (jl_string_data(o)[len] != '\0') jl_string_data(o)[len] = '\0'; @@ -221,39 +221,6 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n) return jl_alloc_genericmemory(jl_memory_any_type, n); } -JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_slice(jl_genericmemory_t *mem, void *data, size_t len) -{ - // Given a GenericMemoryRef represented as `jl_genericmemory_ref ref = {data, mem}`, - // return a new GenericMemory that only accesses the slice from the given GenericMemoryRef to - // the given length if this is possible to return. This allows us to make - // `length(Array)==length(Array.ref.mem)`, for simplification of this. - jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(mem); - const jl_datatype_layout_t *layout = dt->layout; - // repeated checks here ensure the values cannot overflow, since we know mem->length is a reasonable value - if (len > mem->length) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError - if (layout->flags.arrayelem_isunion) { - if (!((size_t)data == 0 && mem->length == len)) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // only exact slices are supported - data = mem->ptr; - } - else if (layout->size == 0) { - if ((size_t)data > mem->length || (size_t)data + len > mem->length) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError - data = mem->ptr; - } - else { - if (data < mem->ptr || (char*)data > (char*)mem->ptr + mem->length * layout->size || (char*)data + len * layout->size > (char*)mem->ptr + mem->length * layout->size) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory slice"); // TODO: make a BoundsError - } - jl_task_t *ct = jl_current_task; - jl_genericmemory_t *newmem = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, sizeof(jl_genericmemory_t) + sizeof(void*), dt); - newmem->length = len; - newmem->ptr = data; - jl_genericmemory_data_owner_field(newmem) = jl_genericmemory_owner(mem); - return newmem; -} - JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata, jl_genericmemory_t *src, char* srcdata, size_t n) JL_NOTSAFEPOINT diff --git a/src/julia.h b/src/julia.h index 5b9986a5e68ee..301650540a15c 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1233,7 +1233,7 @@ STATIC_INLINE jl_value_t *jl_svecset( 0 = data is inlined 1 = owns the gc-managed data, exclusively (will free it) 2 = malloc-allocated pointer (does not own it) - 3 = has a pointer to the object that owns the data pointer + 3 = has a pointer to the String object that owns the data pointer (m must be isbits) */ STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -1249,8 +1249,6 @@ STATIC_INLINE int jl_genericmemory_how(jl_genericmemory_t *m) JL_NOTSAFEPOINT STATIC_INLINE jl_value_t *jl_genericmemory_owner(jl_genericmemory_t *m JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT { - if (jl_genericmemory_how(m) == 3) - return jl_genericmemory_data_owner_field(m); return (jl_value_t*)m; } @@ -1280,8 +1278,6 @@ STATIC_INLINE jl_value_t *jl_genericmemory_ptr_set( assert(i < m_->length); jl_atomic_store_release(((_Atomic(jl_value_t*)*)(m_->ptr)) + i, (jl_value_t*)x); if (x) { - if (jl_genericmemory_how(m_) == 3) - m = (void*)jl_genericmemory_data_owner_field(m_); jl_gc_wb(m, x); } return (jl_value_t*)x; diff --git a/src/staticdata.c b/src/staticdata.c index 0d609db03aebc..decc0ce6570aa 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -932,7 +932,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ jl_genericmemory_t *m = (jl_genericmemory_t*)v; const char *data = (const char*)m->ptr; if (jl_genericmemory_how(m) == 3) { - jl_queue_for_serialization_(s, jl_genericmemory_data_owner_field(v), 1, immediate); + assert(jl_is_string(jl_genericmemory_data_owner_field(m))); } else if (layout->flags.arrayelem_isboxed) { size_t i, l = m->length; @@ -1472,17 +1472,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED jl_genericmemory_t *m = (jl_genericmemory_t*)v; const jl_datatype_layout_t *layout = t->layout; size_t len = m->length; - if (jl_genericmemory_how(m) == 3 && jl_is_genericmemory(jl_genericmemory_data_owner_field(m))) { - jl_genericmemory_t *owner = (jl_genericmemory_t*)jl_genericmemory_data_owner_field(m); - size_t data = ((char*)m->ptr - (char*)owner->ptr); // relocation offset (bytes) - write_uint(f, len); - write_uint(f, data); - write_pointerfield(s, (jl_value_t*)owner); - // similar to record_memoryref, but the field is always an (offset) pointer - arraylist_push(&s->memowner_list, (void*)(reloc_offset + offsetof(jl_genericmemory_t, ptr))); // relocation location - arraylist_push(&s->memowner_list, NULL); // relocation target (ignored) - } - // else if (jl_genericmemory_how(m) == 3) { + // if (jl_genericmemory_how(m) == 3) { // jl_value_t *owner = jl_genericmemory_data_owner_field(m); // write_uint(f, len); // write_pointerfield(s, owner); @@ -1491,7 +1481,8 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED // assert(new_mem->ptr == NULL); // new_mem->ptr = (void*)((char*)m->ptr - (char*)owner); // relocation offset // } - else { + // else + { size_t datasize = len * layout->size; size_t tot = datasize; int isbitsunion = layout->flags.arrayelem_isunion; @@ -1538,10 +1529,13 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED ios_write(s->const_data, (char*)m->ptr, tot); } } - if (len == 0) // TODO: should we have a zero-page, instead of writing each type's fragment separately? + if (len == 0) { // TODO: should we have a zero-page, instead of writing each type's fragment separately? write_padding(s->const_data, layout->size ? layout->size : isbitsunion); - else if (jl_genericmemory_how(m) == 3 && jl_is_string(jl_genericmemory_data_owner_field(m))) + } + else if (jl_genericmemory_how(m) == 3) { + assert(jl_is_string(jl_genericmemory_data_owner_field(m))); write_padding(s->const_data, 1); + } } else { // Pointer eltypes are encoded in the mutable data section