-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
copyto!(::Field, ::Field)
performance is 2x slower than parent array
#1403
Labels
bug
Something isn't working
Milestone
Comments
Same is true for using Revise
import ClimaCore;
import ClimaCore.Fields as Fields
using BenchmarkTools;
@isdefined(TU) || include(joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"));
import .TestUtilities as TU;
FT = Float64;
space = TU.CenterExtrudedFiniteDifferenceSpace(FT;zelem=25, helem=10);
Base.@kwdef struct BigStruct{T}
x1::T = 0
x2::T = 0
end;
x = fill((;s=BigStruct{FT}()), space);
y = fill((;s=BigStruct{FT}()), space);
function bc_arr!(x, y)
Base.copyto!(x, Base.Broadcast.broadcasted(identity, y))
return nothing
end;
function copy_arr_parent!(x, y)
Base.copyto!(parent(x), parent(y))
return nothing
end;
trial = BenchmarkTools.@benchmark bc_arr!($x, $y);
show(stdout, MIME("text/plain"), trial);
trial = BenchmarkTools.@benchmark copy_arr_parent!($x, $y);
show(stdout, MIME("text/plain"), trial);
nothing |
This is related to cartesian indexing: using Revise
import ClimaCore;
import ClimaCore.Fields as Fields
import ClimaCore.DataLayouts as DataLayouts
using BenchmarkTools;
@isdefined(TU) || include(joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"));
import .TestUtilities as TU;
FT = Float64;
space = TU.CenterExtrudedFiniteDifferenceSpace(FT;zelem=25, helem=10);
Base.@kwdef struct BigStruct{T}
x1::T = 0
x2::T = 0
end;
x = fill((;s=BigStruct{FT}()), space);
y = fill((;s=BigStruct{FT}()), space);
function bc_arr!(x, y)
Base.copyto!(x, Base.Broadcast.broadcasted(identity, y))
return nothing
end;
function copy_arr_parent!(x, y)
Base.copyto!(parent(x), parent(y))
return nothing
end;
function cart_copyto!(x, y)
xp = parent(x)
yp = parent(y)
(Nij, _, _, Nv, Nh) = size(xp)
@inbounds for h in 1:Nh, v in 1:Nv, j in 1:Nij, i in 1:Nij
idx = CartesianIndex((i, j, 1, v, h))
xp[idx] = yp[idx]
end
return nothing
end;
function linear_index_copyto!(x, y)
xp = parent(x)
yp = parent(y)
@inbounds for i in eachindex(xp, yp)
xp[i] = yp[i]
end
return nothing
end;
function reshape_loop!(x, y)
xp = parent(x)
yp = parent(y)
xr = reshape(xp, length(xp))
yr = reshape(yp, length(yp))
@inbounds for idx in 1:length(xr)
xp[idx] = yp[idx]
end
return nothing
end;
trial = BenchmarkTools.@benchmark bc_arr!($x, $y);
show(stdout, MIME("text/plain"), trial);
trial = BenchmarkTools.@benchmark copy_arr_parent!($x, $y);
show(stdout, MIME("text/plain"), trial);
trial = BenchmarkTools.@benchmark cart_copyto!($x, $y);
show(stdout, MIME("text/plain"), trial);
trial = BenchmarkTools.@benchmark linear_index_copyto!($x, $y);
show(stdout, MIME("text/plain"), trial);
trial = BenchmarkTools.@benchmark reshape_loop!($x, $y);
show(stdout, MIME("text/plain"), trial);
nothing |
More notes: using Revise
import ClimaCore;
import ClimaCore.Fields as Fields
import ClimaCore.DataLayouts as DataLayouts
using BenchmarkTools;
@isdefined(TU) || include(joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"));
import .TestUtilities as TU;
FT = Float64;
has_fast_indexing(::SubArray{T,N,P,I,L}) where {T,N,P,I,L} = L
space = TU.CenterExtrudedFiniteDifferenceSpace(FT;zelem=25, helem=10);
Base.@kwdef struct BigStruct{T}
x1::T = 0
x2::T = 0
x3::T = 0
x4::T = 0
x5::T = 0
x6::T = 0
x7::T = 0
x8::T = 0
x9::T = 0
x10::T = 0
x11::T = 0
x12::T = 0
x13::T = 0
x14::T = 0
x15::T = 0
end;
x = fill((;s=BigStruct{FT}()), space);
y = fill((;s=BigStruct{FT}()), space);
function oranges(a, i)
pa = parent(a)
return ( # Specific to VIJFH
Base.Slice(Base.OneTo(size(pa, 1))),
Base.Slice(Base.OneTo(size(pa, 2))),
Base.Slice(Base.OneTo(size(pa, 3))),
i,
Base.Slice(Base.OneTo(size(pa, 5))),
);
end
px = parent(x);
py = parent(y);
perm = (
1, # Nv
2, # Nij
3, # Nij
5, # Nh
4, # fields
)
fi = findfirst(j->j==4, perm)
pda(a, perm) = permutedims(parent(a), perm)
vpda(a, perm, i) = view(pda(a, perm), ntuple(j->oranges(a, i)[perm[j]], Val(5))...)
pdx = pda(x, perm)
pdy = pda(y, perm)
# @show has_fast_indexing(vpda(x, perm, 10))
# error("Done")
function bc_arr_unfused!(x, y) # Unfused Cartesian
Base.copyto!(x.s.x1, Base.Broadcast.broadcasted(identity, y.s.x1))
Base.copyto!(x.s.x2, Base.Broadcast.broadcasted(identity, y.s.x2))
Base.copyto!(x.s.x3, Base.Broadcast.broadcasted(identity, y.s.x3))
Base.copyto!(x.s.x4, Base.Broadcast.broadcasted(identity, y.s.x4))
Base.copyto!(x.s.x5, Base.Broadcast.broadcasted(identity, y.s.x5))
Base.copyto!(x.s.x6, Base.Broadcast.broadcasted(identity, y.s.x6))
Base.copyto!(x.s.x7, Base.Broadcast.broadcasted(identity, y.s.x7))
Base.copyto!(x.s.x8, Base.Broadcast.broadcasted(identity, y.s.x8))
Base.copyto!(x.s.x9, Base.Broadcast.broadcasted(identity, y.s.x9))
Base.copyto!(x.s.x10, Base.Broadcast.broadcasted(identity, y.s.x10))
Base.copyto!(x.s.x11, Base.Broadcast.broadcasted(identity, y.s.x11))
Base.copyto!(x.s.x12, Base.Broadcast.broadcasted(identity, y.s.x12))
Base.copyto!(x.s.x13, Base.Broadcast.broadcasted(identity, y.s.x13))
Base.copyto!(x.s.x14, Base.Broadcast.broadcasted(identity, y.s.x14))
Base.copyto!(x.s.x15, Base.Broadcast.broadcasted(identity, y.s.x15))
return nothing
end;
function bc_arr_fused!(x, y) # Fused Cartesian
Base.copyto!(x, Base.Broadcast.broadcasted(identity, y))
return nothing
end;
reshaped_sa(a) = reshape(parent(a), length(parent(a)))
function copy_arr_parent_unfused_reshaped_sa!(x, y) # Unfused reshaped subarray (linear, but slow)
Base.copyto!(reshaped_sa(x.s.x1), reshaped_sa(y.s.x1))
Base.copyto!(reshaped_sa(x.s.x2), reshaped_sa(y.s.x2))
Base.copyto!(reshaped_sa(x.s.x3), reshaped_sa(y.s.x3))
Base.copyto!(reshaped_sa(x.s.x4), reshaped_sa(y.s.x4))
Base.copyto!(reshaped_sa(x.s.x5), reshaped_sa(y.s.x5))
Base.copyto!(reshaped_sa(x.s.x6), reshaped_sa(y.s.x6))
Base.copyto!(reshaped_sa(x.s.x7), reshaped_sa(y.s.x7))
Base.copyto!(reshaped_sa(x.s.x8), reshaped_sa(y.s.x8))
Base.copyto!(reshaped_sa(x.s.x9), reshaped_sa(y.s.x9))
Base.copyto!(reshaped_sa(x.s.x10), reshaped_sa(y.s.x10))
Base.copyto!(reshaped_sa(x.s.x11), reshaped_sa(y.s.x11))
Base.copyto!(reshaped_sa(x.s.x12), reshaped_sa(y.s.x12))
Base.copyto!(reshaped_sa(x.s.x13), reshaped_sa(y.s.x13))
Base.copyto!(reshaped_sa(x.s.x14), reshaped_sa(y.s.x14))
Base.copyto!(reshaped_sa(x.s.x15), reshaped_sa(y.s.x15))
return nothing
end;
function copy_arr_parent_unfused_sa!(x, y) # Unfused SubArray
Base.copyto!(parent(x.s.x1), parent(y.s.x1))
Base.copyto!(parent(x.s.x2), parent(y.s.x2))
Base.copyto!(parent(x.s.x3), parent(y.s.x3))
Base.copyto!(parent(x.s.x4), parent(y.s.x4))
Base.copyto!(parent(x.s.x5), parent(y.s.x5))
Base.copyto!(parent(x.s.x6), parent(y.s.x6))
Base.copyto!(parent(x.s.x7), parent(y.s.x7))
Base.copyto!(parent(x.s.x8), parent(y.s.x8))
Base.copyto!(parent(x.s.x9), parent(y.s.x9))
Base.copyto!(parent(x.s.x10), parent(y.s.x10))
Base.copyto!(parent(x.s.x11), parent(y.s.x11))
Base.copyto!(parent(x.s.x12), parent(y.s.x12))
Base.copyto!(parent(x.s.x13), parent(y.s.x13))
Base.copyto!(parent(x.s.x14), parent(y.s.x14))
Base.copyto!(parent(x.s.x15), parent(y.s.x15))
return nothing
end;
function sa_permuted_dims(a, i, perm, pda)
o = oranges(a, i)
return view(pda, ntuple(i->o[perm[i]], Val(5))...)
end
function reshaped_sa_permuted_dims(a, i, perm, pda)
pd = sa_permuted_dims(a, i, perm, pda)
reshape(pd, length(pd))
end
function copy_arr_parent_unfused_sa_permuted_dims!(x, y, perm, pdx, pdy) # Unfused linear
# subarray of reshaped
Base.copyto!(sa_permuted_dims(x, 1, perm, pdx), sa_permuted_dims(y, 1, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 2, perm, pdx), sa_permuted_dims(y, 2, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 3, perm, pdx), sa_permuted_dims(y, 3, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 4, perm, pdx), sa_permuted_dims(y, 4, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 5, perm, pdx), sa_permuted_dims(y, 5, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 6, perm, pdx), sa_permuted_dims(y, 6, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 7, perm, pdx), sa_permuted_dims(y, 7, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 8, perm, pdx), sa_permuted_dims(y, 8, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 9, perm, pdx), sa_permuted_dims(y, 9, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 10, perm, pdx), sa_permuted_dims(y, 10, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 11, perm, pdx), sa_permuted_dims(y, 11, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 12, perm, pdx), sa_permuted_dims(y, 12, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 13, perm, pdx), sa_permuted_dims(y, 13, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 14, perm, pdx), sa_permuted_dims(y, 14, perm, pdy))
Base.copyto!(sa_permuted_dims(x, 15, perm, pdx), sa_permuted_dims(y, 15, perm, pdy))
return nothing
end;
function copy_arr_parent_unfused_reshaped_sa_permuted_dims!(x, y, perm, pdx, pdy) # Unfused linear
# subarray of reshaped
Base.copyto!(reshaped_sa_permuted_dims(x, 1, perm, pdx), reshaped_sa_permuted_dims(y, 1, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 2, perm, pdx), reshaped_sa_permuted_dims(y, 2, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 3, perm, pdx), reshaped_sa_permuted_dims(y, 3, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 4, perm, pdx), reshaped_sa_permuted_dims(y, 4, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 5, perm, pdx), reshaped_sa_permuted_dims(y, 5, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 6, perm, pdx), reshaped_sa_permuted_dims(y, 6, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 7, perm, pdx), reshaped_sa_permuted_dims(y, 7, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 8, perm, pdx), reshaped_sa_permuted_dims(y, 8, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 9, perm, pdx), reshaped_sa_permuted_dims(y, 9, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 10, perm, pdx), reshaped_sa_permuted_dims(y, 10, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 11, perm, pdx), reshaped_sa_permuted_dims(y, 11, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 12, perm, pdx), reshaped_sa_permuted_dims(y, 12, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 13, perm, pdx), reshaped_sa_permuted_dims(y, 13, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 14, perm, pdx), reshaped_sa_permuted_dims(y, 14, perm, pdy))
Base.copyto!(reshaped_sa_permuted_dims(x, 15, perm, pdx), reshaped_sa_permuted_dims(y, 15, perm, pdy))
return nothing
end;
reshaped(a) = reshape(parent(a), length(parent(a)))
function copy_arr_parent_fused!(x, y) # Fused linear
Base.copyto!(reshaped(x), reshaped(y))
return nothing
end;
bc_arr_unfused!(x, y)
bc_arr_fused!(x, y)
copy_arr_parent_unfused_reshaped_sa!(x, y)
copy_arr_parent_unfused_sa!(x, y)
copy_arr_parent_unfused_sa_permuted_dims!(x, y, perm, pdx, pdy)
copy_arr_parent_unfused_reshaped_sa_permuted_dims!(x, y, perm, pdx, pdy)
copy_arr_parent_fused!(x, y)
# println("\n******************* bc_arr_unfused")
# trial = BenchmarkTools.@benchmark bc_arr_unfused!($x, $y);
# show(stdout, MIME("text/plain"), trial);
# println("\n******************* bc_arr_fused")
# trial = BenchmarkTools.@benchmark bc_arr_fused!($x, $y);
# show(stdout, MIME("text/plain"), trial);
println("\n******************* copy_arr_parent_unfused_reshaped_sa")
trial = BenchmarkTools.@benchmark copy_arr_parent_unfused_reshaped_sa!($x, $y);
show(stdout, MIME("text/plain"), trial);
println("\n******************* copy_arr_parent_unfused_sa")
trial = BenchmarkTools.@benchmark copy_arr_parent_unfused_sa!($x, $y);
show(stdout, MIME("text/plain"), trial);
println("\n******************* copy_arr_parent_unfused_sa_permuted_dims")
trial = BenchmarkTools.@benchmark copy_arr_parent_unfused_sa_permuted_dims!($x, $y, $perm, $pdx, $pdy);
show(stdout, MIME("text/plain"), trial);
println("\n******************* copy_arr_parent_unfused_reshaped_sa_permuted_dims")
trial = BenchmarkTools.@benchmark copy_arr_parent_unfused_reshaped_sa_permuted_dims!($x, $y, $perm, $pdx, $pdy);
show(stdout, MIME("text/plain"), trial);
println("\n******************* copy_arr_parent_fused")
trial = BenchmarkTools.@benchmark copy_arr_parent_fused!($x, $y);
show(stdout, MIME("text/plain"), trial);
# nothing
#=
@code_typed bc_arr!(x, y)
copy_arr_parent!(x, y)
@code_typed DataLayouts._serial_copyto!(Fields.field_values(x), Base.Broadcast.broadcasted(identity, Fields.field_values(y)))
@code_typed Base.copyto!(parent(x), parent(y))
=# |
For printing permutations: using Revise
import ClimaCore;
import ClimaCore.Fields as Fields
import ClimaCore.DataLayouts as DataLayouts
using BenchmarkTools;
@isdefined(TU) || include(joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"));
import .TestUtilities as TU;
FT = Float64;
space = TU.CenterExtrudedFiniteDifferenceSpace(FT;zelem=25, helem=10);
Base.@kwdef struct BigStruct{T}
x1::T = 0
x2::T = 0
end;
x = fill((;s=BigStruct{FT}()), space);
y = fill((;s=BigStruct{FT}()), space);
function oranges(a, i)
pa = parent(a)
return ( # Specific to VIJFH
Base.Slice(Base.OneTo(size(pa, 1))),
Base.Slice(Base.OneTo(size(pa, 2))),
Base.Slice(Base.OneTo(size(pa, 3))),
i,
Base.Slice(Base.OneTo(size(pa, 5))),
);
end
px = parent(x);
py = parent(y);
has_fast_indexing(::SubArray{T,N,P,I,L}) where {T,N,P,I,L} = L
pda(a, perm) = permutedims(parent(a), perm);
vpda(a, perm, i) = view(pda(a, perm), ntuple(j->oranges(a, i)[perm[j]], Val(5))...);
##########################################################
##########################################################
##########################################################
# Move dimensions around to see which order supports fast indexing
# perm = (
# 1, # Nv
# 2, # Nij
# 3, # Nij
# 5, # Nh
# 4, # fields
# )
perm = (1,2,3,4,5); println(has_fast_indexing(vpda(x, perm, 10 #=field index 10=#))); # false (what we use)
perm = (1,2,3,5,4); println(has_fast_indexing(vpda(x, perm, 10 #=field index 10=#))); # true (swap field and Nh dims)
perm = (4,1,2,3,5); println(has_fast_indexing(vpda(x, perm, 10 #=field index 10=#))); # true (put field first)
# All other permutations are false
##########################################################
##########################################################
########################################################## This ( |
Solving this was only possible for datalayouts whose field indices are first or last. We fixed this issue for all such (supported) datalayouts in #2055. So I'm going to close this. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
MWE:
The text was updated successfully, but these errors were encountered: