From f0862014497756c4f58f92410f095ca7fcc602f5 Mon Sep 17 00:00:00 2001 From: Eric Cano Date: Tue, 13 Feb 2024 11:18:05 +0100 Subject: [PATCH] Splits ZVertexSoA into 2 layouts and wraps those in a multi layout collection. Removes versioning info from class description for dictionnaries. Ports comments from CUDA version. --- .../plugins/SiPixelCompareVertexSoAAlpaka.cc | 4 +- .../plugins/SiPixelMonitorVertexSoAAlpaka.cc | 16 ++-- .../VertexSoA/interface/ZVertexDevice.h | 12 +-- DataFormats/VertexSoA/interface/ZVertexHost.h | 15 ++-- DataFormats/VertexSoA/interface/ZVertexSoA.h | 25 ++++-- DataFormats/VertexSoA/src/classes.cc | 4 +- DataFormats/VertexSoA/src/classes_def.xml | 25 ++++-- .../VertexSoA/test/alpaka/ZVertexSoA_test.cc | 12 +-- .../test/alpaka/ZVertexSoA_test.dev.cc | 72 +++++++++-------- .../VertexSoA/test/alpaka/ZVertexSoA_test.h | 2 +- .../src/L2TauTagNNProducerAlpaka.cc | 4 +- .../plugins/PixelTrackDumpAlpaka.cc | 4 +- .../PixelVertexProducerFromSoAAlpaka.cc | 5 +- .../plugins/alpaka/clusterTracksByDensity.h | 8 +- .../plugins/alpaka/clusterTracksDBSCAN.h | 5 +- .../plugins/alpaka/clusterTracksIterative.h | 4 +- .../plugins/alpaka/fitVertices.h | 7 +- .../plugins/alpaka/sortByPt2.h | 9 ++- .../plugins/alpaka/splitVertices.h | 15 ++-- .../plugins/alpaka/vertexFinder.dev.cc | 50 ++++++++---- .../plugins/alpaka/vertexFinder.h | 1 + .../test/alpaka/VertexFinder_t.dev.cc | 79 +++++++++++++------ 22 files changed, 242 insertions(+), 136 deletions(-) diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc index 2eea6a980d9c5..4bcdd2a1bb1cb 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertexSoAAlpaka.cc @@ -104,7 +104,7 @@ void SiPixelCompareVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: auto yc = y0 + dydz * zc; zc += z0; - auto ndofHost = vsoaHost.view()[sic].ndof(); + auto ndofHost = vsoaHost.view()[sic].ndof(); auto chi2Host = vsoaHost.view()[sic].chi2(); const int32_t notFound = -1; @@ -130,7 +130,7 @@ void SiPixelCompareVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: auto xg = x0 + dxdz * zg; auto yg = y0 + dydz * zg; zg += z0; - auto ndofDevice = vsoaDevice.view()[closestVtxidx].ndof(); + auto ndofDevice = vsoaDevice.view()[closestVtxidx].ndof(); auto chi2Device = vsoaDevice.view()[closestVtxidx].chi2(); hx_->Fill(xc - x0, xg - x0); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc index d3121f77bccb8..7b488553626b8 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc @@ -67,7 +67,9 @@ void SiPixelMonitorVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: } auto const& vsoa = *vsoaHandle; - int nVertices = vsoa.view().nvFinal(); + auto vtx_view = vsoa.view(); + auto trk_view = vsoa.view(); + int nVertices = vtx_view.nvFinal(); auto bsHandle = iEvent.getHandle(tokenBeamSpot_); float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; if (!bsHandle.isValid()) { @@ -82,8 +84,8 @@ void SiPixelMonitorVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: } for (int iv = 0; iv < nVertices; iv++) { - auto si = vsoa.view()[iv].sortInd(); - auto z = vsoa.view()[si].zv(); + auto si = vtx_view[iv].sortInd(); + auto z = vtx_view[si].zv(); auto x = x0 + dxdz * z; auto y = y0 + dydz * z; @@ -91,10 +93,10 @@ void SiPixelMonitorVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: hx->Fill(x); hy->Fill(y); hz->Fill(z); - auto ndof = vsoa.view()[si].ndof(); - hchi2->Fill(vsoa.view()[si].chi2()); - hchi2oNdof->Fill(vsoa.view()[si].chi2() / ndof); - hptv2->Fill(vsoa.view()[si].ptv2()); + auto ndof = trk_view[si].ndof(); + hchi2->Fill(vtx_view[si].chi2()); + hchi2oNdof->Fill(vtx_view[si].chi2() / ndof); + hptv2->Fill(vtx_view[si].ptv2()); hntrks->Fill(ndof + 1); } hnVertex->Fill(nVertices); diff --git a/DataFormats/VertexSoA/interface/ZVertexDevice.h b/DataFormats/VertexSoA/interface/ZVertexDevice.h index 8d120ae190f3c..832fac9d9ee0f 100644 --- a/DataFormats/VertexSoA/interface/ZVertexDevice.h +++ b/DataFormats/VertexSoA/interface/ZVertexDevice.h @@ -9,18 +9,18 @@ #include "DataFormats/VertexSoA/interface/ZVertexHost.h" #include "DataFormats/Portable/interface/PortableDeviceCollection.h" -template -class ZVertexDeviceSoA : public PortableDeviceCollection, TDev> { +template +class ZVertexDeviceSoA : public PortableDeviceMultiCollection { public: ZVertexDeviceSoA() = default; // necessary for ROOT dictionaries - // Constructor which specifies the SoA size + // Constructor which specifies the queue template - explicit ZVertexDeviceSoA(TQueue queue) : PortableDeviceCollection, TDev>(S, queue) {} + explicit ZVertexDeviceSoA(TQueue queue) + : PortableDeviceMultiCollection({{NVTX, NTRK}}, queue) {} }; -using namespace ::zVertex; template -using ZVertexDevice = ZVertexDeviceSoA; +using ZVertexDevice = ZVertexDeviceSoA; #endif // DataFormats_VertexSoA_interface_ZVertexDevice_h diff --git a/DataFormats/VertexSoA/interface/ZVertexHost.h b/DataFormats/VertexSoA/interface/ZVertexHost.h index 2d72b83bfe385..b0abd1c2798a2 100644 --- a/DataFormats/VertexSoA/interface/ZVertexHost.h +++ b/DataFormats/VertexSoA/interface/ZVertexHost.h @@ -10,20 +10,25 @@ #include "DataFormats/VertexSoA/interface/ZVertexDefinitions.h" #include "DataFormats/Portable/interface/PortableHostCollection.h" -template -class ZVertexHostSoA : public PortableHostCollection { +// This alias is needed to feed the SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES macro without commas. +using ZVertexHostSoABase = PortableHostCollection2; + +template +class ZVertexHostSoA : public ZVertexHostSoABase { public: ZVertexHostSoA() = default; // Constructor which specifies the queue template - explicit ZVertexHostSoA(TQueue queue) : PortableHostCollection(S, queue) {} + explicit ZVertexHostSoA(TQueue queue) + : PortableHostCollection2({{NVTX, NTRK}}, queue) {} // Constructor which specifies the DevHost - explicit ZVertexHostSoA(alpaka_common::DevHost const& host) : PortableHostCollection(S, host) {} + explicit ZVertexHostSoA(alpaka_common::DevHost const& host) + : PortableHostCollection2({{NVTX, NTRK}}, host) {} }; //using namespace ::zVertex; -using ZVertexHost = ZVertexHostSoA; +using ZVertexHost = ZVertexHostSoA; #endif // DataFormats_VertexSoA_ZVertexHost_H diff --git a/DataFormats/VertexSoA/interface/ZVertexSoA.h b/DataFormats/VertexSoA/interface/ZVertexSoA.h index 045603618acd7..8c2ff9c5c1db1 100644 --- a/DataFormats/VertexSoA/interface/ZVertexSoA.h +++ b/DataFormats/VertexSoA/interface/ZVertexSoA.h @@ -10,20 +10,29 @@ namespace reco { GENERATE_SOA_LAYOUT(ZVertexLayout, - SOA_COLUMN(int16_t, idv), - SOA_COLUMN(float, zv), - SOA_COLUMN(float, wv), - SOA_COLUMN(float, chi2), - SOA_COLUMN(float, ptv2), - SOA_COLUMN(int32_t, ndof), - SOA_COLUMN(uint16_t, sortInd), - SOA_SCALAR(uint32_t, nvFinal)) + SOA_COLUMN(float, zv), // output z-posistion of found vertices + SOA_COLUMN(float, wv), // output weight (1/error^2) on the above + SOA_COLUMN(float, chi2), // vertices chi2 + SOA_COLUMN(float, ptv2), // vertices pt^2 + SOA_COLUMN(uint16_t, sortInd), // sorted index (by pt2) ascending + SOA_SCALAR(uint32_t, nvFinal)) // the number of vertices + + GENERATE_SOA_LAYOUT( + ZVertexTracksLayout, + SOA_COLUMN(int16_t, idv), // vertex index for each associated (original) track (-1 == not associate + SOA_COLUMN(int32_t, + ndof)) // vertices number of dof (reused as workspace for the number of nearest neighbours FIXME) // Common types for both Host and Device code using ZVertexSoA = ZVertexLayout<>; using ZVertexSoAView = ZVertexSoA::View; using ZVertexSoAConstView = ZVertexSoA::ConstView; + // Common types for both Host and Device code + using ZVertexTracksSoA = ZVertexTracksLayout<>; + using ZVertexTracksSoAView = ZVertexTracksSoA::View; + using ZVertexTracksSoAConstView = ZVertexTracksSoA::ConstView; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; } } // namespace reco diff --git a/DataFormats/VertexSoA/src/classes.cc b/DataFormats/VertexSoA/src/classes.cc index edffb6e08a9e5..66b3da7ff77d4 100644 --- a/DataFormats/VertexSoA/src/classes.cc +++ b/DataFormats/VertexSoA/src/classes.cc @@ -1,4 +1,4 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" -#include "DataFormats/VertexSoA/interface/ZVertexSoA.h" +#include "DataFormats/VertexSoA/interface/ZVertexHost.h" -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(ZVertexHostSoABase); diff --git a/DataFormats/VertexSoA/src/classes_def.xml b/DataFormats/VertexSoA/src/classes_def.xml index 820d28ecc3493..f06e8e25e946c 100644 --- a/DataFormats/VertexSoA/src/classes_def.xml +++ b/DataFormats/VertexSoA/src/classes_def.xml @@ -1,8 +1,21 @@ - - - - - + + + + + + + + + + + + + + + + + - + \ No newline at end of file diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc index 7c9d17a767682..4f70649d2990c 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc @@ -49,7 +49,7 @@ int main() { // Instantiate vertices on device. PortableCollection allocates // SoA on device automatically. ZVertexSoACollection zvertex_d(queue); - testZVertexSoAT::runKernels(zvertex_d.view(), queue); + testZVertexSoAT::runKernels(zvertex_d.view(), zvertex_d.view(), queue); // Instantate vertices on host. This is where the data will be // copied to from device. @@ -68,11 +68,13 @@ int main() { << "sortInd\t" << "nvFinal\n"; + auto vtx_v = zvertex_h.view(); + auto trk_v = zvertex_h.view(); for (int i = 0; i < 10; ++i) { - std::cout << (int)zvertex_h.view()[i].idv() << '\t' << zvertex_h.view()[i].zv() << '\t' - << zvertex_h.view()[i].wv() << '\t' << zvertex_h.view()[i].chi2() << '\t' - << zvertex_h.view()[i].ptv2() << '\t' << (int)zvertex_h.view()[i].ndof() << '\t' - << (int)zvertex_h.view()[i].sortInd() << '\t' << (int)zvertex_h.view().nvFinal() << '\n'; + auto vi = vtx_v[i]; + auto ti = trk_v[i]; + std::cout << (int)ti.idv() << "\t" << vi.zv() << "\t" << vi.wv() << "\t" << vi.chi2() << "\t" << vi.ptv2() << "\t" + << (int)ti.ndof() << "\t" << vi.sortInd() << "\t" << (int)vtx_v.nvFinal() << std::endl; } } } diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc index 75a1ebf6b9269..4abd5255285dd 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc @@ -11,49 +11,57 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { class TestFillKernel { public: template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, + reco::ZVertexSoAView zvertex_view, + reco::ZVertexTracksSoAView ztracks_view) const { if (cms::alpakatools::once_per_grid(acc)) { zvertex_view.nvFinal() = 420; - } + } - for (int32_t j : cms::alpakatools::uniform_elements(acc, zvertex_view.metadata().size())) { - zvertex_view[j].idv() = (int16_t)j; - zvertex_view[j].zv() = (float)j; - zvertex_view[j].wv() = (float)j; - zvertex_view[j].chi2() = (float)j; - zvertex_view[j].ptv2() = (float)j; - zvertex_view[j].ndof() = (int32_t)j; - zvertex_view[j].sortInd() = (uint16_t)j; + for (int32_t j : cms::alpakatools::uniform_elements(acc, zvertex_view.metadata().size())) { + zvertex_view[j].zv() = (float)j; + zvertex_view[j].wv() = (float)j; + zvertex_view[j].chi2() = (float)j; + zvertex_view[j].ptv2() = (float)j; + zvertex_view[j].sortInd() = (uint16_t)j; + } + for (int32_t j : cms::alpakatools::uniform_elements(acc, ztracks_view.metadata().size())) { + ztracks_view[j].idv() = (int16_t)j; + ztracks_view[j].ndof() = (int32_t)j; + } } - } - }; + }; - class TestVerifyKernel { - public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const { - if (cms::alpakatools::once_per_grid(acc)) { - ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420); - } + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + reco::ZVertexSoAView zvertex_view, + reco::ZVertexTracksSoAView ztracks_view) const { + if (cms::alpakatools::once_per_grid(acc)) { + ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420); + } - for (int32_t j : cms::alpakatools::uniform_elements(acc, zvertex_view.nvFinal())) { - assert(zvertex_view[j].idv() == j); - assert(zvertex_view[j].zv() - (float)j < 0.0001); - assert(zvertex_view[j].wv() - (float)j < 0.0001); - assert(zvertex_view[j].chi2() - (float)j < 0.0001); - assert(zvertex_view[j].ptv2() - (float)j < 0.0001); - assert(zvertex_view[j].ndof() == j); - assert(zvertex_view[j].sortInd() == uint32_t(j)); + for (int32_t j : cms::alpakatools::uniform_elements(acc, zvertex_view.nvFinal())) { + assert(zvertex_view[j].zv() - (float)j < 0.0001); + assert(zvertex_view[j].wv() - (float)j < 0.0001); + assert(zvertex_view[j].chi2() - (float)j < 0.0001); + assert(zvertex_view[j].ptv2() - (float)j < 0.0001); + assert(zvertex_view[j].sortInd() == uint32_t(j)); + } + for (int32_t j : cms::alpakatools::uniform_elements(acc, ztracks_view.metadata().size())) { + assert(ztracks_view[j].idv() == j); + assert(ztracks_view[j].ndof() == j); + } } - } - }; + }; - void runKernels(reco::ZVertexSoAView zvertex_view, Queue& queue) { + void runKernels(reco::ZVertexSoAView zvertex_view, reco::ZVertexTracksSoAView ztracks_view, Queue& queue) { uint32_t items = 64; uint32_t groups = cms::alpakatools::divide_up_by(zvertex_view.metadata().size(), items); auto workDiv = cms::alpakatools::make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, zvertex_view); - alpaka::exec(queue, workDiv, TestVerifyKernel{}, zvertex_view); + alpaka::exec(queue, workDiv, TestFillKernel{}, zvertex_view, ztracks_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, zvertex_view, ztracks_view); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h index bad69a4d92bb5..fa54fffa8ce38 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h @@ -6,7 +6,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { - void runKernels(reco::ZVertexSoAView zvertex_view, Queue& queue); + void runKernels(reco::ZVertexSoAView zvertex_view, reco::ZVertexTracksSoAView ztracks_view, Queue& queue); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc index 9772366c6b22e..8bc685a311832 100644 --- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc +++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc @@ -595,7 +595,7 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav if (nHits == 0) { break; } - int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); + int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); ++nTrkAssociated[vtx_ass_to_track]; @@ -692,7 +692,7 @@ void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, continue; const int patatrackNdof = 2 * std::min(6, nHits) - 5; - const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); + const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc index c4f0b97dba8a9..6ccb7789fc098 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc @@ -59,12 +59,12 @@ void PixelTrackDumpAlpakaT::analyze(edm::StreamID streamID, assert(tracks.view().nTracks()); auto const& vertices = iEvent.get(tokenSoAVertex_); - assert(vertices.view().idv()); + assert(vertices.view().idv()); assert(vertices.view().zv()); assert(vertices.view().wv()); assert(vertices.view().chi2()); assert(vertices.view().ptv2()); - assert(vertices.view().ndof()); + assert(vertices.view().ndof()); assert(vertices.view().sortInd()); assert(vertices.view().nvFinal()); } diff --git a/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc b/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc index 6e542f7870c2e..86561db386303 100644 --- a/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc +++ b/RecoTracker/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc @@ -103,7 +103,7 @@ void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, err(2, 2) *= 2.; // artifically inflate error //Copy also the tracks (no intention to be efficient....) for (auto k = 0U; k < indToEdm.size(); ++k) { - if (soa.view()[k].idv() == int16_t(i)) + if (soa.view()[k].idv() == int16_t(i)) itrk.push_back(k); } auto nt = itrk.size(); @@ -117,7 +117,8 @@ void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, itrk.clear(); continue; } // remove outliers - (*vertexes).emplace_back(reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); + (*vertexes).emplace_back( + reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); auto &v = (*vertexes).back(); v.reserve(itrk.size()); for (auto it : itrk) { diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h index 122457a7d05d2..f0b4993c60474 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksByDensity.h @@ -18,6 +18,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { using VtxSoAView = ::reco::ZVertexSoAView; + using TrkSoAView = ::reco::ZVertexTracksSoAView; using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; // this algo does not really scale as it works in a single block... // enough for <10K tracks we have @@ -28,6 +29,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) clusterTracksByDensity(const TAcc& acc, VtxSoAView& pdata, + TrkSoAView ptrkdata, WsSoAView& pws, int minT, // min number of neighbours to be "seed" float eps, // max absolute distance to cluster @@ -45,6 +47,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto er2mx = errmax * errmax; auto& __restrict__ data = pdata; + auto& __restrict__ trkdata = ptrkdata; auto& __restrict__ ws = pws; auto nt = ws.ntrks(); float const* __restrict__ zt = ws.zt(); @@ -54,7 +57,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { uint32_t& nvIntermediate = ws.nvIntermediate(); uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ nn = trkdata.ndof(); int32_t* __restrict__ iv = ws.iv(); ALPAKA_ASSERT_ACC(zt); @@ -238,13 +241,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { template ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, + TrkSoAView ptrkdata, WsSoAView pws, int minT, // min number of neighbours to be "seed" float eps, // max absolute distance to cluster float errmax, // max error to be "seed" float chi2max // max normalized distance to cluster ) const { - clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + clusterTracksByDensity(acc, pdata, ptrkdata, pws, minT, eps, errmax, chi2max); } }; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h index 7090599dcfdb0..9c779665211ca 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksDBSCAN.h @@ -18,6 +18,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { using VtxSoAView = ::reco::ZVertexSoAView; + using TrkSoAView = ::reco::ZVertexTracksSoAView; using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; // this algo does not really scale as it works in a single block... // enough for <10K tracks we have @@ -26,6 +27,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { template ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, + TrkSoAView ptrkdata, WsSoAView pws, int minT, // min number of neighbours to be "core" float eps, // max absolute distance to cluster @@ -41,6 +43,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { auto er2mx = errmax * errmax; auto& __restrict__ data = pdata; + auto& __restrict__ trkdata = ptrkdata; auto& __restrict__ ws = pws; auto nt = ws.ntrks(); float const* __restrict__ zt = ws.zt(); @@ -50,7 +53,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { uint32_t& nvIntermediate = ws.nvIntermediate(); uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ nn = trkdata.ndof(); int32_t* __restrict__ iv = ws.iv(); ALPAKA_ASSERT_ACC(zt); diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h index 38e8429c0d28f..d520af74374bb 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/clusterTracksIterative.h @@ -25,6 +25,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, + TrkSoAView ptrkdata, WsSoAView pws, int minT, // min number of neighbours to be "core" float eps, // max absolute distance to cluster @@ -40,6 +41,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto er2mx = errmax * errmax; auto& __restrict__ data = pdata; + auto& __restrict__ trkdata = ptrkdata; auto& __restrict__ ws = pws; auto nt = ws.ntrks(); float const* __restrict__ zt = ws.zt(); @@ -49,7 +51,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t& nvIntermediate = ws.nvIntermediate(); uint8_t* __restrict__ izt = ws.izt(); - int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ nn = trkdata.ndof(); int32_t* __restrict__ iv = ws.iv(); ALPAKA_ASSERT_ACC(zt); diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h index a8c428e2f5a00..420232d42b043 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/fitVertices.h @@ -18,12 +18,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { template ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void fitVertices(const TAcc& acc, VtxSoAView& pdata, + TrkSoAView& ptrkdata, WsSoAView& pws, float chi2Max // for outlier rejection ) { constexpr bool verbose = false; // in principle the compiler should optmize out if false auto& __restrict__ data = pdata; + auto& __restrict__ trkdata = ptrkdata; auto& __restrict__ ws = pws; auto nt = ws.ntrks(); float const* __restrict__ zt = ws.zt(); @@ -34,7 +36,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { uint32_t& nvFinal = data.nvFinal(); uint32_t& nvIntermediate = ws.nvIntermediate(); - int32_t* __restrict__ nn = data.ndof(); + int32_t* __restrict__ nn = trkdata.ndof(); int32_t* __restrict__ iv = ws.iv(); ALPAKA_ASSERT_ACC(nvFinal <= nvIntermediate); @@ -114,10 +116,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { template ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, + TrkSoAView ptrkdata, WsSoAView pws, float chi2Max // for outlier rejection ) const { - fitVertices(acc, pdata, pws, chi2Max); + fitVertices(acc, pdata, ptrkdata, pws, chi2Max); } }; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h index ff8fab8ab635f..41c1bf39a9aac 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/sortByPt2.h @@ -20,10 +20,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { using VtxSoAView = ::reco::ZVertexSoAView; + using TrkSoAView = ::reco::ZVertexTracksSoAView; using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void sortByPt2(const TAcc& acc, VtxSoAView& data, WsSoAView& ws) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void sortByPt2(const TAcc& acc, VtxSoAView& data, TrkSoAView& trkdata, WsSoAView& ws) { auto nt = ws.ntrks(); float const* __restrict__ ptt2 = ws.ptt2(); uint32_t const& nvFinal = data.nvFinal(); @@ -37,7 +38,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { // fill indexing for (auto i : cms::alpakatools::uniform_elements(acc, nt)) { - data.idv()[ws.itrk()[i]] = iv[i]; + trkdata.idv()[ws.itrk()[i]] = iv[i]; }; // can be done asynchronously at the end of previous event @@ -74,8 +75,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { class SortByPt2Kernel { public: template - ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws) const { - sortByPt2(acc, pdata, pws); + ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, TrkSoAView ptrkdata, WsSoAView pws) const { + sortByPt2(acc, pdata, ptrkdata, pws); } }; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h index e2ba0b46b8be4..380cfb478fe97 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/splitVertices.h @@ -16,16 +16,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { using VtxSoAView = ::reco::ZVertexSoAView; + using TrkSoAView = ::reco::ZVertexTracksSoAView; using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; template - ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices(const TAcc& acc, - VtxSoAView& pdata, - WsSoAView& pws, - float maxChi2) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) void splitVertices( + const TAcc& acc, VtxSoAView& pdata, TrkSoAView ptrkdata, WsSoAView& pws, float maxChi2) { constexpr bool verbose = false; // in principle the compiler should optmize out if false const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); auto& __restrict__ data = pdata; + auto& __restrict__ trkdata = ptrkdata; auto& __restrict__ ws = pws; auto nt = ws.ntrks(); float const* __restrict__ zt = ws.zt(); @@ -35,7 +35,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { float const* __restrict__ chi2 = data.chi2(); uint32_t& nvFinal = data.nvFinal(); - int32_t const* __restrict__ nn = data.ndof(); + int32_t const* __restrict__ nn = trkdata.ndof(); int32_t* __restrict__ iv = ws.iv(); ALPAKA_ASSERT_ACC(zt); @@ -156,8 +156,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { class SplitVerticesKernel { public: template - ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, WsSoAView pws, float maxChi2) const { - splitVertices(acc, pdata, pws, maxChi2); + ALPAKA_FN_ACC void operator()( + const TAcc& acc, VtxSoAView pdata, TrkSoAView ptrkdata, WsSoAView pws, float maxChi2) const { + splitVertices(acc, pdata, ptrkdata, pws, maxChi2); } }; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index b41e07aff56d5..bf736cf8b1373 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -33,6 +33,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_FN_ACC void operator()(const TAcc& acc, reco::TrackSoAConstView tracks_view, VtxSoAView soa, + TrkSoAView trksoa, WsSoAView pws, float ptMin, float ptMax) const { @@ -44,7 +45,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_ACC(nHits >= 3); // initialize soa... - soa[idx].idv() = -1; + trksoa[idx].idv() = -1; if (reco::isTriplet(tracks_view, idx)) continue; // no triplets @@ -75,6 +76,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template >> ALPAKA_FN_ACC void operator()(const TAcc& acc, VtxSoAView pdata, + TrkSoAView ptrkdata, WsSoAView pws, bool doSplit, int minT, // min number of neighbours to be "seed" @@ -82,17 +84,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { float errmax, // max error to be "seed" float chi2max // max normalized distance to cluster, ) const { - clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + clusterTracksByDensity(acc, pdata, ptrkdata, pws, minT, eps, errmax, chi2max); alpaka::syncBlockThreads(acc); - fitVertices(acc, pdata, pws, maxChi2ForFirstFit); + fitVertices(acc, pdata, ptrkdata, pws, maxChi2ForFirstFit); alpaka::syncBlockThreads(acc); if (doSplit) { - splitVertices(acc, pdata, pws, maxChi2ForSplit); + splitVertices(acc, pdata, ptrkdata, pws, maxChi2ForSplit); alpaka::syncBlockThreads(acc); - fitVertices(acc, pdata, pws, maxChi2ForFinalFit); + fitVertices(acc, pdata, ptrkdata, pws, maxChi2ForFinalFit); alpaka::syncBlockThreads(acc); } - sortByPt2(acc, pdata, pws); + sortByPt2(acc, pdata, ptrkdata, pws); } }; #else @@ -134,6 +136,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ZVertexSoACollection vertices(queue); auto soa = vertices.view(); + auto trksoa = vertices.view(); auto ws_d = PixelVertexWorkSpaceSoADevice(::zVertex::MAXTRACKS, queue); @@ -147,7 +150,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cms::alpakatools::divide_up_by(tracks_view.metadata().size() + blockSize - 1, blockSize); const auto loadTracksWorkDiv = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec( - queue, loadTracksWorkDiv, LoadTracks{}, tracks_view, soa, ws_d.view(), ptMin, ptMax); + queue, loadTracksWorkDiv, LoadTracks{}, tracks_view, soa, trksoa, ws_d.view(), ptMin, ptMax); // Running too many thread lead to problems when printf is enabled. const auto finderSorterWorkDiv = cms::alpakatools::make_workdiv(1, 1024 - 128); @@ -160,6 +163,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { finderSorterWorkDiv, VertexFinderOneKernel{}, soa, + trksoa, ws_d.view(), doSplitting_, minT, @@ -168,34 +172,46 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { chi2max); #else alpaka::exec( - queue, finderSorterWorkDiv, VertexFinderOneKernel{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + queue, finderSorterWorkDiv, VertexFinderOneKernel{}, soa, trksoa, ws_d.view(), minT, eps, errmax, chi2max); // one block per vertex... if (doSplitting_) - alpaka::exec(queue, splitterFitterWorkDiv, SplitVerticesKernel{}, soa, ws_d.view(), maxChi2ForSplit); + alpaka::exec( + queue, splitterFitterWorkDiv, SplitVerticesKernel{}, soa, trksoa, ws_d.view(), maxChi2ForSplit); alpaka::exec(queue, finderSorterWorkDiv{}, soa, ws_d.view()); #endif } else { // five kernels if (useDensity_) { - alpaka::exec( - queue, finderSorterWorkDiv, ClusterTracksByDensityKernel{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + alpaka::exec(queue, + finderSorterWorkDiv, + ClusterTracksByDensityKernel{}, + soa, + trksoa, + ws_d.view(), + minT, + eps, + errmax, + chi2max); } else if (useDBSCAN_) { alpaka::exec( - queue, finderSorterWorkDiv, ClusterTracksDBSCAN{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + queue, finderSorterWorkDiv, ClusterTracksDBSCAN{}, soa, trksoa, ws_d.view(), minT, eps, errmax, chi2max); } else if (useIterative_) { alpaka::exec( - queue, finderSorterWorkDiv, ClusterTracksIterative{}, soa, ws_d.view(), minT, eps, errmax, chi2max); + queue, finderSorterWorkDiv, ClusterTracksIterative{}, soa, trksoa, ws_d.view(), minT, eps, errmax, chi2max); } - alpaka::exec(queue, finderSorterWorkDiv, FitVerticesKernel{}, soa, ws_d.view(), maxChi2ForFirstFit); + alpaka::exec( + queue, finderSorterWorkDiv, FitVerticesKernel{}, soa, trksoa, ws_d.view(), maxChi2ForFirstFit); // one block per vertex... if (doSplitting_) { - alpaka::exec(queue, splitterFitterWorkDiv, SplitVerticesKernel{}, soa, ws_d.view(), maxChi2ForSplit); + alpaka::exec( + queue, splitterFitterWorkDiv, SplitVerticesKernel{}, soa, trksoa, ws_d.view(), maxChi2ForSplit); - alpaka::exec(queue, finderSorterWorkDiv, FitVerticesKernel{}, soa, ws_d.view(), maxChi2ForFinalFit); + alpaka::exec( + queue, finderSorterWorkDiv, FitVerticesKernel{}, soa, trksoa, ws_d.view(), maxChi2ForFinalFit); } - alpaka::exec(queue, finderSorterWorkDiv, SortByPt2Kernel{}, soa, ws_d.view()); + alpaka::exec(queue, finderSorterWorkDiv, SortByPt2Kernel{}, soa, trksoa, ws_d.view()); } return vertices; diff --git a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h index 92890b89bb9c4..1cee81efe83a4 100644 --- a/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h +++ b/RecoTracker/PixelVertexFinding/plugins/alpaka/vertexFinder.h @@ -19,6 +19,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { using namespace cms::alpakatools; using VtxSoAView = ::reco::ZVertexSoAView; + using TrkSoAView = ::reco::ZVertexTracksSoAView; using WsSoAView = ::vertexFinder::PixelVertexWorkSpaceSoAView; class Init { diff --git a/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc index bab64ea7a357a..963f5b99ce4bf 100644 --- a/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc +++ b/RecoTracker/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc @@ -87,21 +87,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template ALPAKA_FN_ACC void operator()(const TAcc& acc, vertexFinder::VtxSoAView pdata, + vertexFinder::TrkSoAView ptrkdata, vertexFinder::WsSoAView pws, int minT, // min number of neighbours to be "seed" float eps, // max absolute distance to cluster float errmax, // max error to be "seed" float chi2max // max normalized distance to cluster, ) const { - vertexFinder::clusterTracksByDensity(acc, pdata, pws, minT, eps, errmax, chi2max); + vertexFinder::clusterTracksByDensity(acc, pdata, ptrkdata, pws, minT, eps, errmax, chi2max); alpaka::syncBlockThreads(acc); - vertexFinder::fitVertices(acc, pdata, pws, 50.); + vertexFinder::fitVertices(acc, pdata, ptrkdata, pws, 50.); alpaka::syncBlockThreads(acc); - vertexFinder::splitVertices(acc, pdata, pws, 9.f); + vertexFinder::splitVertices(acc, pdata, ptrkdata, pws, 9.f); alpaka::syncBlockThreads(acc); - vertexFinder::fitVertices(acc, pdata, pws, 5000.); + vertexFinder::fitVertices(acc, pdata, ptrkdata, pws, 5000.); alpaka::syncBlockThreads(acc); - vertexFinder::sortByPt2(acc, pdata, pws); + vertexFinder::sortByPt2(acc, pdata, ptrkdata, pws); alpaka::syncBlockThreads(acc); } }; @@ -157,14 +158,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDivClusterizer, VertexFinderOneKernel{}, vertices_d.view(), + vertices_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); #else - alpaka::exec( - queue, workDivClusterizer, CLUSTERIZE{}, vertices_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); + alpaka::exec(queue, + workDivClusterizer, + CLUSTERIZE{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view(), + kk, + par[0], + par[1], + par[2]); #endif alpaka::wait(queue); alpaka::exec(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view()); @@ -172,8 +182,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto workDivFitter = make_workdiv(1, 1024 - 256); - alpaka::exec( - queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f); + alpaka::exec(queue, + workDivFitter, + vertexFinder::FitVerticesKernel{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view(), + 50.f); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); @@ -184,8 +199,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); { auto mx = std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal()); @@ -193,14 +208,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { << *mx.second << std::endl; } - alpaka::exec( - queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 50.f); + alpaka::exec(queue, + workDivFitter, + vertexFinder::FitVerticesKernel{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view(), + 50.f); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); { auto mx = std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal()); @@ -211,17 +231,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto workDivSplitter = make_workdiv(1024, 64); // one vertex per block!!! - alpaka::exec( - queue, workDivSplitter, vertexFinder::SplitVerticesKernel{}, vertices_d.view(), ws_d.view(), 9.f); + alpaka::exec(queue, + workDivSplitter, + vertexFinder::SplitVerticesKernel{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view(), + 9.f); alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer()); alpaka::wait(queue); std::cout << "after split " << ws_h.view().nvIntermediate() << std::endl; - alpaka::exec( - queue, workDivFitter, vertexFinder::FitVerticesKernel{}, vertices_d.view(), ws_d.view(), 5000.f); + alpaka::exec(queue, + workDivFitter, + vertexFinder::FitVerticesKernel{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view(), + 5000.f); auto workDivSorter = make_workdiv(1, 256); - alpaka::exec(queue, workDivSorter, vertexFinder::SortByPt2Kernel{}, vertices_d.view(), ws_d.view()); + alpaka::exec(queue, + workDivSorter, + vertexFinder::SortByPt2Kernel{}, + vertices_d.view(), + vertices_d.view(), + ws_d.view()); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); @@ -231,8 +266,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + if (vertices_h.view().ndof()[j] > 0) + vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); { auto mx = std::minmax_element(vertices_h.view().chi2(), vertices_h.view().chi2() + vertices_h.view().nvFinal());