Skip to content

Commit

Permalink
Phalanx: utilities for virtual functions on device (#8840)
Browse files Browse the repository at this point in the history
* Phalanx: utilities for virtual functions on device

* Phalanx: fix compiler warning
  • Loading branch information
rppawlo authored Mar 4, 2021
1 parent 216b961 commit 0f69cc4
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 1 deletion.
46 changes: 46 additions & 0 deletions packages/phalanx/src/Phalanx_VirtualFunctionOnDevice.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef PHALANX_VIRTUAL_FUNCTION_ON_DEVICE_HPP
#define PHALANX_VIRTUAL_FUNCTION_ON_DEVICE_HPP

#include "Phalanx_KokkosDeviceTypes.hpp"
#include <memory>

namespace PHX {

/// Struct for deleting device instantiation
template<typename Device>
struct DeviceDeleter {
template<typename T>
void operator()(T* ptr) {
Kokkos::parallel_for(Kokkos::RangePolicy<typename Device::execution_space>(0,1),
KOKKOS_LAMBDA (const int i) { ptr->~T(); });
typename Device::execution_space().fence();
Kokkos::kokkos_free<typename Device::memory_space>(ptr);
}
};

/// Function for creating a vtable on device (requires copy ctor for
/// derived object). Allocates device memory and must be called from
/// host.
template<typename Device,typename Derived>
std::unique_ptr<Derived,DeviceDeleter<Device>>
copy_virtual_class_to_device(const Derived& host_source)
{
auto* p = static_cast<Derived*>(Kokkos::kokkos_malloc<typename Device::memory_space>(sizeof(Derived)));
Kokkos::parallel_for(Kokkos::RangePolicy<typename Device::execution_space>(0,1),
KOKKOS_LAMBDA (const int i) {new (p) Derived(host_source); });
typename Device::execution_space().fence();
return std::unique_ptr<Derived,DeviceDeleter<Device>>(p);
}

/// Struct for holding pointers to objects in a Kokkos::View. Used
/// for putting virtual functions on device. We can't create a
/// pointer as the Scalar type since the "*" is used to show
/// rank. Need to wrap pointers in a struct.
template<typename T>
struct DevicePtrWrapper {
T* ptr;
};

}

#endif
7 changes: 7 additions & 0 deletions packages/phalanx/test/Kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,10 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST(
TESTONLYLIBS phalanx_unit_test_main phalanx_test_utilities
NUM_MPI_PROCS 1
)

TRIBITS_ADD_EXECUTABLE_AND_TEST(
tKokkosVirtualFunctionOnDevice
SOURCES tKokkosVirtualFunctionOnDevice.cpp
TESTONLYLIBS phalanx_unit_test_main phalanx_test_utilities
NUM_MPI_PROCS 1
)
2 changes: 1 addition & 1 deletion packages/phalanx/test/Kokkos/tKokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ namespace phalanx_test {
TEST_EQUALITY(f3.get(),6);
}

// yes we could do thsi with deep copy, but want to experiment with
// yes we could do this with deep copy, but want to experiment with
// wrapping tasks to insert functions into all PHX nodes
template <typename Scalar,typename Device>
class InitializeView {
Expand Down
286 changes: 286 additions & 0 deletions packages/phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
// @HEADER
// ************************************************************************
//
// Phalanx: A Partial Differential Equation Field Evaluation
// Kernel for Flexible Management of Complex Dependency Chains
// Copyright 2008 Sandia Corporation
//
// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
// license for use of this work by or on behalf of the U.S. Government.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Roger Pawlowski ([email protected]), Sandia
// National Laboratories.
//
// ************************************************************************
// @HEADER

#include "Teuchos_Assert.hpp"
#include "Teuchos_UnitTestHarness.hpp"
#include "Teuchos_TimeMonitor.hpp"
#include "Teuchos_StackedTimer.hpp"
#include "Phalanx_VirtualFunctionOnDevice.hpp"
#include <limits>

namespace phalanx_test {

// ******************************
// This test is a performance test to compare using templates vs
// inheritance for a function on device. Instantiate vtable on device
// so that we can use runtime polymorphism in device kernels.
// ******************************

// Base class for EoS
class EquationOfState {
public:
KOKKOS_DEFAULTED_FUNCTION
virtual ~EquationOfState() = default;

KOKKOS_FUNCTION
virtual double a(const double& rho,const double& P) const = 0;

KOKKOS_FUNCTION
virtual void to_primative(const double& rho,
const double& px,
const double& py,
const double& pz,
const double& rho_e,
double& P,
double& ux,
double& uy,
double& uz) const = 0;
};

// Derived class
class IdealGasLaw : public EquationOfState {
double mass_; // mass
double gamma_; // ratio of specific heats
double r_; // Boltzmann constant
public:
KOKKOS_FUNCTION
IdealGasLaw() : mass_(28.0), gamma_(5./3.), r_(1.38066e-23) {}

KOKKOS_FUNCTION
double a(const double& rho,
const double& P) const override
{
return std::sqrt(gamma_ * P / rho);
}

KOKKOS_FUNCTION
void to_primative(const double& rho,
const double& px,
const double& py,
const double& pz,
const double& rho_e,
double& P,
double& ux,
double& uy,
double& uz) const override
{
ux = px / rho;
uy = py / rho;
uz = pz / rho;
P = (gamma_ - 1.) * (rho_e - 0.5 * rho * (ux*ux+uy*uy+uz*uz));
}
};

// Evaluate kernel templated on the EOS
template<typename EOS>
void evaluateResidualTemplated(const PHX::View<double**> rho,
const PHX::View<double***> p,
const PHX::View<double**> rho_e,
const PHX::View<double**> /* residual */) {
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0,0},{rho.extent(0),rho.extent(1)});
EOS eos;
Kokkos::parallel_for(policy,KOKKOS_LAMBDA (const int cell, const int pt) {
double P = 0.0;
double v[3] = {0.0,0.0,0.0};

eos.to_primative(rho(cell,pt),
p(cell,pt,0),
p(cell,pt,1),
p(cell,pt,2),
rho_e(cell,pt),
P,
v[0],
v[1],
v[2]);

auto a = eos.a(rho(cell,pt),P);

(void)a; // suppress unused variable warning
});
}

// Evaluate kernel templated on the EOS
void evaluateResidualInheritance(const PHX::View<double**> rho,
const PHX::View<double***> p,
const PHX::View<double**> rho_e,
const PHX::View<double**> /* residual */,
EquationOfState* eos_ptr) {

auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0,0},{rho.extent(0),rho.extent(1)});
Kokkos::parallel_for(policy,KOKKOS_LAMBDA (const int cell, const int pt) {

double P = 0.0;
double v[3] = {0.0,0.0,0.0};

eos_ptr->to_primative(rho(cell,pt),
p(cell,pt,0),
p(cell,pt,1),
p(cell,pt,2),
rho_e(cell,pt),
P,
v[0],
v[1],
v[2]);

auto a = eos_ptr->a(rho(cell,pt),P);

(void)a; // suppress unused variable warning
});

}

TEUCHOS_UNIT_TEST(kokkos, SingleFunctionPerformanceTest)
{
// For true performance measurements on gpu, make num_cells much
// larger. We set it small for fast turn around in unit testing.
// const size_t num_cells = 10000000;
const size_t num_cells = 1000;
const size_t num_points = 27;
const size_t dim = 3;

// DOFs
PHX::View<double**> rho("rho",num_cells,num_points);
PHX::View<double***> p("p",num_cells,num_points,dim);
PHX::View<double**> rho_e("rho_r",num_cells,num_points);
Kokkos::deep_copy(rho,1.0);
Kokkos::deep_copy(p,2.0);
Kokkos::deep_copy(rho_e,3.0);

// Residual
PHX::View<double**> residual("residual",num_cells,num_points);

// Templated
{
Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::getNewTimer("Templates");
Teuchos::TimeMonitor tm(*timer);
evaluateResidualTemplated<IdealGasLaw>(rho,p,rho_e,residual);
Kokkos::fence();
}

// Inheritance
{
Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::getNewTimer("Inheritance");
Teuchos::TimeMonitor tm(*timer);
auto eos_host = Teuchos::rcp(new IdealGasLaw);
auto eos_device = PHX::copy_virtual_class_to_device<Kokkos::Device<PHX::ExecSpace,PHX::MemSpace>,IdealGasLaw>(*eos_host);
evaluateResidualInheritance(rho,p,rho_e,residual,eos_device.get());
Kokkos::fence();
}

std::cout << std::endl;
Teuchos::TimeMonitor::summarize();
std::cout << std::endl;
}

// ******************************
// Test a View of functors
// ******************************

// Base Class
class BaseSum {
public:
KOKKOS_DEFAULTED_FUNCTION
virtual ~BaseSum() = default;

KOKKOS_FUNCTION
virtual void sumInto(double& target) = 0;
};

// Derived class
template<int N>
class DerivedSum : public BaseSum {
public:
KOKKOS_DEFAULTED_FUNCTION
DerivedSum() = default;
KOKKOS_FUNCTION
void sumInto(double& target) override
{ target += static_cast<double>(N); }
};

TEUCHOS_UNIT_TEST(kokkos, ViewOfVirtualFunctions)
{
const size_t num_cells = 100;
PHX::View<double*> a("a",num_cells);
Kokkos::deep_copy(a,1.0);

// Create vector of virtual base class objects. The
// device_functors vector must exist while in use on device. The
// destructor here uses DeviceDeleter to clean up the memory
// correctly by calling dtor on device.
const int num_functors = 4;
std::vector<std::shared_ptr<BaseSum>> device_functors(num_functors);
{
DerivedSum<1> df1;
device_functors[0] = PHX::copy_virtual_class_to_device<Kokkos::Device<PHX::ExecSpace,PHX::MemSpace>,DerivedSum<1>>(df1);
DerivedSum<2> df2;
device_functors[1] = PHX::copy_virtual_class_to_device<Kokkos::Device<PHX::ExecSpace,PHX::MemSpace>,DerivedSum<2>>(df2);
DerivedSum<3> df3;
device_functors[2] = PHX::copy_virtual_class_to_device<Kokkos::Device<PHX::ExecSpace,PHX::MemSpace>,DerivedSum<3>>(df3);
DerivedSum<4> df4;
device_functors[3] = PHX::copy_virtual_class_to_device<Kokkos::Device<PHX::ExecSpace,PHX::MemSpace>,DerivedSum<4>>(df4);
}

// Create a view of virtual base class pointers
Kokkos::View<PHX::DevicePtrWrapper<BaseSum>*,PHX::Device> sum_into_functors("sum into functors",num_functors);
auto host_sum_into_functors = Kokkos::create_mirror_view(sum_into_functors);
for (int i=0; i < num_functors; ++i)
host_sum_into_functors(i).ptr = device_functors[i].get();
Kokkos::deep_copy(sum_into_functors,host_sum_into_functors);

// Run the functors on device
Kokkos::parallel_for(Kokkos::RangePolicy<PHX::Device>(0,a.extent(0)),
KOKKOS_LAMBDA (const int i) {
for (int functor=0; functor < num_functors; ++functor)
sum_into_functors(functor).ptr->sumInto(a(i));
},"do sum into functors");

// Check the values
auto host_a = Kokkos::create_mirror_view(a);
Kokkos::deep_copy(host_a,a);
const auto tol = 100.0 * std::numeric_limits<double>::epsilon();
for (std::size_t i=0; i < host_a.extent(0); ++i) {
TEST_FLOATING_EQUALITY(host_a(i),11.0,tol);
}
}

}

0 comments on commit 0f69cc4

Please sign in to comment.