From 35d2f7967cdf21dbb36577dec5505d276450d9f7 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 22:53:56 +0100 Subject: [PATCH 1/5] use SIMD.jl directly instead of LV.jl for `fast_findmin()` --- Project.toml | 8 ++++---- src/JetReconstruction.jl | 1 + src/PlainAlgo.jl | 2 -- src/TiledAlgoLL.jl | 1 - src/Utils.jl | 41 ++++++++++++++++++++++++++++++---------- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/Project.toml b/Project.toml index 3b2bab7..6499f19 100644 --- a/Project.toml +++ b/Project.toml @@ -9,19 +9,19 @@ CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" EnumX = "4e289a0a-7415-4d19-859d-a7e5c4648b56" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" LorentzVectorHEP = "f612022c-142a-473f-8cfd-a09cf3793c6c" LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3" MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" +SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" [weakdeps] -Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" EDM4hep = "eb32b910-dde9-4347-8fce-cd6be3498f0c" +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" [extensions] -JetVisualisation = "Makie" EDM4hepJets = "EDM4hep" +JetVisualisation = "Makie" [compat] Accessors = "0.1.36" @@ -29,11 +29,11 @@ CodecZlib = "0.7.4" EDM4hep = "0.4.0" EnumX = "1.0.4" JSON = "0.21.4" -LoopVectorization = "0.12.170" LorentzVectorHEP = "0.1.6" LorentzVectors = "0.4.3" Makie = "0.20, 0.21" MuladdMacro = "0.2.4" +SIMD = "3.6" StructArrays = "0.6.18" julia = "1.9" diff --git a/src/JetReconstruction.jl b/src/JetReconstruction.jl index 9929187..8f1495f 100644 --- a/src/JetReconstruction.jl +++ b/src/JetReconstruction.jl @@ -18,6 +18,7 @@ module JetReconstruction using LorentzVectorHEP using MuladdMacro using StructArrays +using SIMD # Import from LorentzVectorHEP methods for those 4-vector types pt2(p::LorentzVector) = LorentzVectorHEP.pt2(p) diff --git a/src/PlainAlgo.jl b/src/PlainAlgo.jl index 4c8b9e3..291e347 100644 --- a/src/PlainAlgo.jl +++ b/src/PlainAlgo.jl @@ -1,5 +1,3 @@ -using LoopVectorization - """ dist(i, j, rapidity_array, phi_array) diff --git a/src/TiledAlgoLL.jl b/src/TiledAlgoLL.jl index b908109..1f8431c 100644 --- a/src/TiledAlgoLL.jl +++ b/src/TiledAlgoLL.jl @@ -5,7 +5,6 @@ using Logging using Accessors -using LoopVectorization # Include struct definitions and basic operations include("TiledAlgoLLStructs.jl") diff --git a/src/Utils.jl b/src/Utils.jl index 2be1a68..b9ef5a8 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -123,7 +123,7 @@ end fast_findmin(dij, n) Find the minimum value and its index in the first `n` elements of the `dij` -array. The use of `@turbo` macro gives a significiant performance boost. +array. # Arguments - `dij`: An array of values. @@ -133,14 +133,35 @@ array. The use of `@turbo` macro gives a significiant performance boost. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -fast_findmin(dij, n) = begin - # findmin(@inbounds @view dij[1:n]) - best = 1 - @inbounds dij_min = dij[1] - @turbo for here in 2:n - newmin = dij[here] < dij_min - best = newmin ? here : best - dij_min = newmin ? dij[here] : dij_min +function fast_findmin(x, n) + laneIndices = SIMD.Vec{8, Int64}((1, 2, 3, 4, 5, 6, 7, 8)) + minvals = SIMD.Vec{8, Float64}(Inf) + min_indices = SIMD.Vec{8, Int64}(0) + + n_batches, remainder = divrem(n, 8) + lane = VecRange{8}(0) + i = 1 + @inbounds @fastmath for _ in 1:n_batches + predicate = x[lane + i] < minvals + minvals = vifelse(predicate, x[lane + i], minvals) + min_indices = vifelse(predicate, laneIndices, min_indices) + + i += 8 + laneIndices += 8 end - dij_min, best + + min_value = SIMD.minimum(minvals) + min_index = min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : + min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : + min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : + min_value == minvals[7] ? min_indices[7] : min_indices[8] + + @inbounds @fastmath for _ in 1:remainder + xi = x[i] + pred = x[i] < min_value + min_value = ifelse(pred, xi, min_value) + min_index = ifelse(pred, i, min_index) + i += 1 + end + return min_value, min_index end From 6659fe186e0aebc0e900fa54fe26555e7d3cfe6c Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 22:57:42 +0100 Subject: [PATCH 2/5] clean up --- src/Utils.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Utils.jl b/src/Utils.jl index b9ef5a8..49be546 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -133,17 +133,17 @@ array. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -function fast_findmin(x, n) - laneIndices = SIMD.Vec{8, Int64}((1, 2, 3, 4, 5, 6, 7, 8)) - minvals = SIMD.Vec{8, Float64}(Inf) - min_indices = SIMD.Vec{8, Int64}(0) +function fast_findmin(dij::DenseVector{T}, n) where T + laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8)) + minvals = SIMD.Vec{8, T}(Inf) + min_indices = SIMD.Vec{8, Int}(0) n_batches, remainder = divrem(n, 8) lane = VecRange{8}(0) i = 1 @inbounds @fastmath for _ in 1:n_batches - predicate = x[lane + i] < minvals - minvals = vifelse(predicate, x[lane + i], minvals) + predicate = dij[lane + i] < minvals + minvals = vifelse(predicate, dij[lane + i], minvals) min_indices = vifelse(predicate, laneIndices, min_indices) i += 8 @@ -157,9 +157,9 @@ function fast_findmin(x, n) min_value == minvals[7] ? min_indices[7] : min_indices[8] @inbounds @fastmath for _ in 1:remainder - xi = x[i] - pred = x[i] < min_value - min_value = ifelse(pred, xi, min_value) + xi = dij[i] + pred = dij[i] < min_value + min_value= ifelse(pred, xi, min_value) min_index = ifelse(pred, i, min_index) i += 1 end From a508028bd0e5908baccc769b6db1e6cb4c2ef629 Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 23:02:39 +0100 Subject: [PATCH 3/5] more inbounds just for safety --- src/Utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Utils.jl b/src/Utils.jl index 49be546..731fc57 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -151,7 +151,7 @@ function fast_findmin(dij::DenseVector{T}, n) where T end min_value = SIMD.minimum(minvals) - min_index = min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : + min_index = @inbounds min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : min_value == minvals[7] ? min_indices[7] : min_indices[8] From 8f980753cc3da36395f2378f661f081bda60373c Mon Sep 17 00:00:00 2001 From: Moelf Date: Sun, 27 Oct 2024 23:57:41 +0100 Subject: [PATCH 4/5] clean up --- src/Utils.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Utils.jl b/src/Utils.jl index 731fc57..01cb119 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -142,8 +142,9 @@ function fast_findmin(dij::DenseVector{T}, n) where T lane = VecRange{8}(0) i = 1 @inbounds @fastmath for _ in 1:n_batches - predicate = dij[lane + i] < minvals - minvals = vifelse(predicate, dij[lane + i], minvals) + dijs = dij[lane + i] + predicate = dijs < minvals + minvals = vifelse(predicate, dijs, minvals) min_indices = vifelse(predicate, laneIndices, min_indices) i += 8 From 395a28d24f55e22400f128e8efbacf636a8caa4c Mon Sep 17 00:00:00 2001 From: Moelf Date: Mon, 28 Oct 2024 00:20:39 +0100 Subject: [PATCH 5/5] format --- src/ClusterSequence.jl | 2 +- src/Utils.jl | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/ClusterSequence.jl b/src/ClusterSequence.jl index 4bf76f9..77fca14 100644 --- a/src/ClusterSequence.jl +++ b/src/ClusterSequence.jl @@ -117,7 +117,7 @@ final jets. to get the physical PseudoJet. - `Qtot::Any`: The total energy of the event. """ -struct ClusterSequence{T<:FourMomentum} +struct ClusterSequence{T <: FourMomentum} algorithm::JetAlgorithm.Algorithm power::Float64 R::Float64 diff --git a/src/Utils.jl b/src/Utils.jl index 01cb119..e2e6191 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -133,7 +133,7 @@ array. - `dij_min`: The minimum value in the first `n` elements of the `dij` array. - `best`: The index of the minimum value in the `dij` array. """ -function fast_findmin(dij::DenseVector{T}, n) where T +function fast_findmin(dij::DenseVector{T}, n) where {T} laneIndices = SIMD.Vec{8, Int}((1, 2, 3, 4, 5, 6, 7, 8)) minvals = SIMD.Vec{8, T}(Inf) min_indices = SIMD.Vec{8, Int}(0) @@ -152,15 +152,18 @@ function fast_findmin(dij::DenseVector{T}, n) where T end min_value = SIMD.minimum(minvals) - min_index = @inbounds min_value == minvals[1] ? min_indices[1] : min_value == minvals[2] ? min_indices[2] : - min_value == minvals[3] ? min_indices[3] : min_value == minvals[4] ? min_indices[4] : - min_value == minvals[5] ? min_indices[5] : min_value == minvals[6] ? min_indices[6] : - min_value == minvals[7] ? min_indices[7] : min_indices[8] + min_index = @inbounds min_value == minvals[1] ? min_indices[1] : + min_value == minvals[2] ? min_indices[2] : + min_value == minvals[3] ? min_indices[3] : + min_value == minvals[4] ? min_indices[4] : + min_value == minvals[5] ? min_indices[5] : + min_value == minvals[6] ? min_indices[6] : + min_value == minvals[7] ? min_indices[7] : min_indices[8] @inbounds @fastmath for _ in 1:remainder xi = dij[i] pred = dij[i] < min_value - min_value= ifelse(pred, xi, min_value) + min_value = ifelse(pred, xi, min_value) min_index = ifelse(pred, i, min_index) i += 1 end