From 9624eba6ede0ee069d43306be6847e4a75574bc9 Mon Sep 17 00:00:00 2001 From: Kyungjoo Kim Date: Fri, 28 May 2021 18:10:54 -0600 Subject: [PATCH] Tacho - specialization for cuda 11 --- .../src/impl/Tacho_NumericTools_LevelSet.hpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index ef87eaf00942..331d2fe4c8ad 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -1726,7 +1726,18 @@ namespace Tacho { const ordinal_type half_level = _nlevel/2; //const ordinal_type team_size_factor[2] = { 64, 16 }, vector_size_factor[2] = { 8, 8}; //const ordinal_type team_size_factor[2] = { 16, 16 }, vector_size_factor[2] = { 32, 32}; +#if defined (CUDA_VERSION) +#if (11000 > CUDA_VERSION) + /// cuda 11.1 below + const ordinal_type team_size_factor[2] = { 32, 64 }, vector_size_factor[2] = { 8, 4}; +#else + /// cuda 11.1 and higher const ordinal_type team_size_factor[2] = { 64, 64 }, vector_size_factor[2] = { 8, 4}; +#endif +#else + /// not cuda ... whatever.. + const ordinal_type team_size_factor[2] = { 64, 64 }, vector_size_factor[2] = { 8, 4}; +#endif const ordinal_type team_size_update[2] = { 16, 8 }, vector_size_update[2] = { 32, 32}; { typedef TeamFunctor_FactorizeLDL functor_type; @@ -1848,7 +1859,18 @@ namespace Tacho { #endif // this should be considered with average problem sizes in levels const ordinal_type half_level = _nlevel/2; +#if defined (CUDA_VERSION) +#if (11000 > CUDA_VERSION) + /// cuda 11.1 below + const ordinal_type team_size_solve[2] = { 32, 16 }, vector_size_solve[2] = { 8, 8}; +#else + /// cuda 11.1 and higher + const ordinal_type team_size_solve[2] = { 32, 16 }, vector_size_solve[2] = { 8, 8}; +#endif +#else + /// not cuda whatever... const ordinal_type team_size_solve[2] = { 64, 16 }, vector_size_solve[2] = { 8, 8}; +#endif const ordinal_type team_size_update[2] = { 128, 32}, vector_size_update[2] = { 1, 1}; { typedef TeamFunctor_SolveLowerLDL functor_type;