Skip to content

Commit

Permalink
migrate TrackCandidate to SoA from DataFormats/SoATemplate: kernels i…
Browse files Browse the repository at this point in the history
…nternally are still using the POD TrackCandidates SoA (to be migrated later)
  • Loading branch information
slava77devel committed Sep 25, 2024
1 parent 3858cf3 commit 94df11b
Show file tree
Hide file tree
Showing 13 changed files with 238 additions and 254 deletions.
25 changes: 25 additions & 0 deletions RecoTracker/LSTCore/interface/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
#define RecoTracker_LSTCore_interface_Constants_h

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "DataFormats/Common/interface/StdArray.h"

#if defined(FP16_Base)
#if defined ALPAKA_ACC_GPU_CUDA_ENABLED
#include <cuda_fp16.h>
#elif defined ALPAKA_ACC_GPU_HIP_ENABLED
#include <hip/hip_fp16.h>
#endif
#endif

#ifdef CACHE_ALLOC
#include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h"
Expand Down Expand Up @@ -55,6 +64,17 @@ namespace lst {

constexpr unsigned int size_superbins = 45000;

// Half precision wrapper functions.
#if defined(FP16_Base)
#define __F2H __float2half
#define __H2F __half2float
typedef __half float FPX;
#else
#define __F2H
#define __H2F
typedef float FPX;
#endif

// Defining the constant host device variables right up here
// Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future.
struct Params_pLS {
Expand All @@ -74,8 +94,13 @@ namespace lst {
};
struct Params_pT5 {
static constexpr int kLayers = 7, kHits = 14;
using ArrayU8xLayers = edm::StdArray<uint8_t, kLayers>;
using ArrayU16xLayers = edm::StdArray<uint16_t, kLayers>;
using ArrayUxHits = edm::StdArray<unsigned int, kHits>;
};

using ArrayUx2 = edm::StdArray<unsigned int, 2>;

} //namespace lst

#endif
10 changes: 10 additions & 0 deletions RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h
#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h

#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h"
#include "DataFormats/Portable/interface/PortableHostCollection.h"

namespace lst {
using TrackCandidatesHostCollection = PortableHostCollection<TrackCandidatesSoA>;
} // namespace lst
#endif
39 changes: 39 additions & 0 deletions RecoTracker/LSTCore/interface/TrackCandidatesSoA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h
#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h

#include <alpaka/alpaka.hpp>
#include "DataFormats/Common/interface/StdArray.h"
#include "DataFormats/SoATemplate/interface/SoALayout.h"

#include "RecoTracker/LSTCore/interface/Constants.h"

namespace lst {
GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout,
SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS
SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers
SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices
SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), //
SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), //
SOA_COLUMN(int, pixelSeedIndex), //
SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), //
SOA_COLUMN(FPX, centerX), //
SOA_COLUMN(FPX, centerY), //
SOA_COLUMN(FPX, radius), //
SOA_SCALAR(unsigned int, nTrackCandidates), //
SOA_SCALAR(unsigned int, nTrackCandidatespT3), //
SOA_SCALAR(unsigned int, nTrackCandidatespT5), //
SOA_SCALAR(unsigned int, nTrackCandidatespLS), //
SOA_SCALAR(unsigned int, nTrackCandidatesT5)) //

using TrackCandidatesSoA = TrackCandidatesSoALayout<>;

ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void initScalars(TrackCandidatesSoA::View& v) {
v.nTrackCandidates() = 0;
v.nTrackCandidatesT5() = 0;
v.nTrackCandidatespT3() = 0;
v.nTrackCandidatespT5() = 0;
v.nTrackCandidatespLS() = 0;
}

} // namespace lst
#endif
17 changes: 0 additions & 17 deletions RecoTracker/LSTCore/interface/alpaka/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,10 @@

#include "RecoTracker/LSTCore/interface/Constants.h"

#if defined ALPAKA_ACC_GPU_CUDA_ENABLED
#include <cuda_fp16.h>
#elif defined ALPAKA_ACC_GPU_HIP_ENABLED
#include <hip/hip_fp16.h>
#endif

namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {

using namespace ::lst;

// Half precision wrapper functions.
#if defined(FP16_Base)
#define __F2H __float2half
#define __H2F __half2float
typedef __half float FPX;
#else
#define __F2H
#define __H2F
typedef float FPX;
#endif

Vec3D constexpr elementsPerThread(Vec3D::all(static_cast<Idx>(1)));

// Needed for files that are compiled by g++ to not throw an error.
Expand Down
4 changes: 0 additions & 4 deletions RecoTracker/LSTCore/interface/alpaka/LST.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::lst {
std::vector<float> const& ph2_z);

void getOutput(Event& event);
std::vector<unsigned int> getHitIdxs(short trackCandidateType,
unsigned int TCIdx,
unsigned int const* TCHitIndices,
unsigned int const* hitIndices);

// Input and output vectors
std::vector<float> in_trkX_;
Expand Down
131 changes: 69 additions & 62 deletions RecoTracker/LSTCore/src/alpaka/Event.dev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void Event::resetEventSync() {
quintupletsInGPU_.reset();
quintupletsBuffers_.reset();
trackCandidatesInGPU_.reset();
trackCandidatesBuffers_.reset();
trackCandidatesDC_.reset();
pixelTripletsInGPU_.reset();
pixelTripletsBuffers_.reset();
pixelQuintupletsInGPU_.reset();
Expand All @@ -78,7 +78,7 @@ void Event::resetEventSync() {
quintupletsInCPU_.reset();
pixelTripletsInCPU_.reset();
pixelQuintupletsInCPU_.reset();
trackCandidatesInCPU_.reset();
trackCandidatesHC_.reset();
modulesInCPU_.reset();
}

Expand Down Expand Up @@ -478,8 +478,10 @@ void Event::createTriplets() {
void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) {
if (!trackCandidatesInGPU_) {
trackCandidatesInGPU_.emplace();
trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_);
trackCandidatesInGPU_->setData(*trackCandidatesBuffers_);
trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_);
auto buf = trackCandidatesDC_->buffer();
alpaka::memset(queue, buf, 0u);
trackCandidatesInGPU_->setData(trackCandidatesDC_->view());
}

Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64};
Expand Down Expand Up @@ -595,10 +597,13 @@ void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) {
auto nTrackCanpT3Host_buf = allocBufWrapper<unsigned int>(cms::alpakatools::host(), 1, queue_);
auto nTrackCanpLSHost_buf = allocBufWrapper<unsigned int>(cms::alpakatools::host(), 1, queue_);
auto nTrackCanT5Host_buf = allocBufWrapper<unsigned int>(cms::alpakatools::host(), 1, queue_);
alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf);
alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf);
alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf);
alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf);
alpaka::memcpy(
queue_, nTrackCanpT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u));
alpaka::memcpy(
queue_, nTrackCanpT3Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u));
alpaka::memcpy(
queue_, nTrackCanpLSHost_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u));
alpaka::memcpy(queue_, nTrackCanT5Host_buf, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u));
alpaka::wait(queue_); // wait to get the values before using them

auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data();
Expand Down Expand Up @@ -821,8 +826,10 @@ void Event::createPixelQuintuplets() {
}
if (!trackCandidatesInGPU_) {
trackCandidatesInGPU_.emplace();
trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_);
trackCandidatesInGPU_->setData(*trackCandidatesBuffers_);
trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_);
auto buf = trackCandidatesDC_->buffer();
alpaka::memset(queue, buf, 0u);
trackCandidatesInGPU_->setData(trackCandidatesDC_->view());
}

auto superbins_buf = allocBufWrapper<int>(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_);
Expand Down Expand Up @@ -1209,7 +1216,7 @@ unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) {
int Event::getNumberOfTrackCandidates() {
auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf);
alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u));
alpaka::wait(queue_);

return *nTrackCandidates_buf_h.data();
Expand All @@ -1218,7 +1225,8 @@ int Event::getNumberOfTrackCandidates() {
int Event::getNumberOfPT5TrackCandidates() {
auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf);
alpaka::memcpy(
queue_, nTrackCandidatesPT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT5, 1u));
alpaka::wait(queue_);

return *nTrackCandidatesPT5_buf_h.data();
Expand All @@ -1227,7 +1235,8 @@ int Event::getNumberOfPT5TrackCandidates() {
int Event::getNumberOfPT3TrackCandidates() {
auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf);
alpaka::memcpy(
queue_, nTrackCandidatesPT3_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespT3, 1u));
alpaka::wait(queue_);

return *nTrackCandidatesPT3_buf_h.data();
Expand All @@ -1236,7 +1245,8 @@ int Event::getNumberOfPT3TrackCandidates() {
int Event::getNumberOfPLSTrackCandidates() {
auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf);
alpaka::memcpy(
queue_, nTrackCandidatesPLS_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatespLS, 1u));
alpaka::wait(queue_);

return *nTrackCandidatesPLS_buf_h.data();
Expand All @@ -1246,8 +1256,9 @@ int Event::getNumberOfPixelTrackCandidates() {
auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);
auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf);
alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf);
alpaka::memcpy(queue_, nTrackCandidates_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u));
alpaka::memcpy(
queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u));
alpaka::wait(queue_);

return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data());
Expand All @@ -1256,7 +1267,8 @@ int Event::getNumberOfPixelTrackCandidates() {
int Event::getNumberOfT5TrackCandidates() {
auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);

alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf);
alpaka::memcpy(
queue_, nTrackCandidatesT5_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidatesT5, 1u));
alpaka::wait(queue_);

return *nTrackCandidatesT5_buf_h.data();
Expand Down Expand Up @@ -1540,74 +1552,69 @@ PixelQuintupletsBuffer<alpaka_common::DevHost>& Event::getPixelQuintuplets(bool
return pixelQuintupletsInCPU_.value();
}

TrackCandidatesBuffer<alpaka_common::DevHost>& Event::getTrackCandidates(bool sync) {
if (!trackCandidatesInCPU_) {
// Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_
const TrackCandidatesHostCollection& Event::getTrackCandidates(bool sync) {
if (!trackCandidatesHC_) {
// Get nTrackCanHost parameter to initialize host based instance
auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);
alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf);
trackCandidatesInCPU_.emplace(
n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_);
trackCandidatesInCPU_->setData(*trackCandidatesInCPU_);
alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable
alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u));
trackCandidatesHC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_);
alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable

auto const nTrackCanHost = *nTrackCanHost_buf_h.data();

*trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost;
alpaka::memcpy(queue_,
trackCandidatesInCPU_->hitIndices_buf,
trackCandidatesBuffers_->hitIndices_buf,
Params_pT5::kHits * nTrackCanHost);
trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost;
alpaka::memcpy(
queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost);
queue_,
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost),
alpaka::createView(devAcc, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost));
alpaka::memcpy(queue_,
trackCandidatesInCPU_->logicalLayers_buf,
trackCandidatesBuffers_->logicalLayers_buf,
Params_pT5::kLayers * nTrackCanHost);
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost));
alpaka::memcpy(
queue_,
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->logicalLayers, Params_pT5::kLayers * nTrackCanHost));
alpaka::memcpy(queue_,
trackCandidatesInCPU_->directObjectIndices_buf,
trackCandidatesBuffers_->directObjectIndices_buf,
nTrackCanHost);
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().directObjectIndices(), nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->directObjectIndices, nTrackCanHost));
alpaka::memcpy(queue_,
trackCandidatesInCPU_->objectIndices_buf,
trackCandidatesBuffers_->objectIndices_buf,
2 * nTrackCanHost);
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().objectIndices()->data(), 2 * nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->objectIndices, 2 * nTrackCanHost));
alpaka::memcpy(queue_,
trackCandidatesInCPU_->trackCandidateType_buf,
trackCandidatesBuffers_->trackCandidateType_buf,
nTrackCanHost);
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost));
if (sync)
alpaka::wait(queue_); // host consumers expect filled data
}
return trackCandidatesInCPU_.value();
return trackCandidatesHC_.value();
}

TrackCandidatesBuffer<alpaka_common::DevHost>& Event::getTrackCandidatesInCMSSW(bool sync) {
if (!trackCandidatesInCPU_) {
// Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_
const TrackCandidatesHostCollection& Event::getTrackCandidatesInCMSSW(bool sync) {
if (!trackCandidatesHC_) {
// Get nTrackCanHost parameter to initialize host based instance
auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer<unsigned int[]>(queue_, 1u);
alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf);
trackCandidatesInCPU_.emplace(
n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_);
trackCandidatesInCPU_->setData(*trackCandidatesInCPU_);
alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable
alpaka::memcpy(queue_, nTrackCanHost_buf_h, alpaka::createView(devAcc_, trackCandidatesInGPU_->nTrackCandidates, 1u));
trackCandidatesHC_ =
new ::lst::TrackCandidatesHostCollection(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_);
alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU becomes usable

auto const nTrackCanHost = *nTrackCanHost_buf_h.data();

*trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost;
alpaka::memcpy(queue_,
trackCandidatesInCPU_->hitIndices_buf,
trackCandidatesBuffers_->hitIndices_buf,
Params_pT5::kHits * nTrackCanHost);
trackCandidatesHC_->view().nTrackCandidates() = nTrackCanHost;
alpaka::memcpy(
queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost);
queue_,
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().hitIndices()->data(), Params_pT5::kHits * nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->hitIndices, Params_pT5::kHits * nTrackCanHost));
alpaka::memcpy(queue_,
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().pixelSeedIndex(), nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->pixelSeedIndex, nTrackCanHost));
alpaka::memcpy(queue_,
trackCandidatesInCPU_->trackCandidateType_buf,
trackCandidatesBuffers_->trackCandidateType_buf,
nTrackCanHost);
alpaka::createView(cms::alpakatools::host(), trackCandidatesHC_->view().trackCandidateType(), nTrackCanHost),
alpaka::createView(devAcc_, trackCandidatesInGPU_->trackCandidateType, nTrackCanHost));
if (sync)
alpaka::wait(queue_); // host consumers expect filled data
}
return trackCandidatesInCPU_.value();
return trackCandidatesHC_.value();
}

ModulesBuffer<alpaka_common::DevHost>& Event::getModules(bool isFull, bool sync) {
Expand Down
Loading

0 comments on commit 94df11b

Please sign in to comment.