Skip to content

Commit

Permalink
added clone as an operator. This allows oeprators for the filter in (#…
Browse files Browse the repository at this point in the history
…233)

convolution.

Co-authored-by: jluitjens <[email protected]>
  • Loading branch information
luitjens and luitjens authored Jul 28, 2022
1 parent a134c3d commit 33a0937
Show file tree
Hide file tree
Showing 4 changed files with 311 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs_input/api/tensorops.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,4 @@ Advanced Operators
.. doxygenfunction:: remap
.. doxygenfunction:: rcollapse
.. doxygenfunction:: lcollapse
.. doxygenfunction:: clone
53 changes: 45 additions & 8 deletions include/matx_conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "kernels/matx_conv_kernels.cuh"
#include "matx_error.h"
#include "matx_tensor.h"
#include "matx_tensor_ops.h"

namespace matx {
namespace detail {
Expand Down Expand Up @@ -165,25 +166,61 @@ inline void conv1d_impl(OutputType &o, const In1Type &i1, const In2Type &i2,
template <typename OutputType, typename In1Type, typename In2Type>
inline void conv1d(OutputType &o, const In1Type &i1, const In2Type &i2,
matxConvCorrMode_t mode, cudaStream_t stream) {
if constexpr ( In2Type::Rank()==1 && In1Type::Rank() > In2Type::Rank() ) {
//broadcast path. clone In2 across entire batch
if constexpr ( In1Type::Rank() > In2Type::Rank() ) {
// broadcast i2 path. clone i2 across batches

const int Rank = In1Type::Rank();
typename In2Type::shape_type shape[Rank];
constexpr int LRank = In1Type::Rank();
constexpr int SRank = In2Type::Rank();
constexpr int DRank = LRank - SRank;

index_t shape[LRank];

// copy left-most dimensions from i1
#pragma unroll
for(int i = 0; i < Rank-1; i++) {
for(int i = 0; i < DRank; i++) {
shape[i] = i1.Size(i);
}

// set right most dimensions as matxKeepDim
#pragma unroll
for(int i = 0; i < SRank; i++) {
shape[DRank+i] = matxKeepDim;
}

shape[Rank-1] = matxKeepDim;

auto ci2 = (i2.template Clone<Rank>(shape));
// clone i2
auto ci2 = clone<LRank>(i2, shape);

static_assert(i1.Rank() == ci2.Rank());

conv1d_impl(o, i1, ci2, mode, stream);

} else if constexpr ( In2Type::Rank() > In1Type::Rank()) {
// broadcast i1 path. clone i1 across batches

constexpr int LRank = In2Type::Rank();
constexpr int SRank = In1Type::Rank();
constexpr int DRank = LRank - SRank;
index_t shape[LRank];

// copy left-most dimensions from i2
#pragma unroll
for(int i = 0; i < DRank; i++) {
shape[i] = i2.Size(i);
}

// set right most dimensions as matxKeepDim
#pragma unroll
for(int i = 0; i < SRank; i++) {
shape[DRank+i] = matxKeepDim;
}

// clone i1
auto ci1 = clone<LRank>(i1, shape);

static_assert(ci1.Rank() == i2.Rank());

conv1d_impl(o, ci1, i2, mode, stream);

} else {
static_assert(In1Type::Rank() == In2Type::Rank());
// batched pass outer dims must match
Expand Down
80 changes: 79 additions & 1 deletion include/matx_tensor_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,84 @@ __MATX_INLINE__
}
};
}

namespace detail {
template <int CRank, typename T, typename Ind>
class CloneOp : public BaseOp<CloneOp<CRank, T, Ind>>
{
private:
mutable typename base_type<T>::type op_;
std::array<index_t, CRank> sizes_; // size of each dimension after cloning
std::array<index_t, T::Rank()> dims_; // gather map for computing operator() indices
public:
using matxop = bool;

using scalar_type = typename T::scalar_type;

__MATX_INLINE__ CloneOp(T op, std::array<index_t, CRank> shape) : op_(op) {
// create gather list
int d = 0;
for(int i = 0; i < Rank(); i++) {
if(shape[i]==matxKeepDim) {
sizes_[i] = op_.Size(d);
dims_[d++] = i;
} else {
sizes_[i] = shape[i];
}
}
MATX_ASSERT(d == T::Rank(), matxInvalidDim);

};

template <typename... Is>
__MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ auto operator()(Is... indices) const
{

// convert variadic type to tuple so we can read/update
std::array<index_t, Rank()> sind{indices...};
std::array<index_t, T::Rank()> gind;

// gather indices
for(int i = 0; i < T::Rank(); i++) {
auto idx = dims_[i];
gind[i] = sind[idx];
}

return mapply(op_, gind);
}

static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
{
return CRank;
}
constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
{
return sizes_[dim];
}

};
}


/**
* @brief Operator to clone an operator or tensor acorss dimensions
*
* @tparam Rank the rank of the cloned operator
* @tparam T source operator/tensor type
* @param t source operator/tensor
* @param shape the shape of the cloned operator/tensor.
* Each element is either the size of the cloned dimension or matxKeepDim to be from the source tensor
* @return operator to compute the cloned value
*/
template <int Rank, typename Op>
auto __MATX_INLINE__ clone(Op t, const index_t (&shape)[Rank])
{
std::array<index_t, Rank> lshape;
for(int i = 0; i < Rank ; i++) {
lshape[i]=shape[i];
}
return detail::CloneOp<Rank, Op, index_t>(t, lshape);
};

/**
* Remaps elements an operator according to an index array/operator.
Expand Down Expand Up @@ -595,7 +673,7 @@ __MATX_INLINE__
static_assert(sizeof...(Is)==Rank());
static_assert((std::is_convertible_v<Is, index_t> && ... ));

// convert variadic type to tupple so we can read/update
// convert variadic type to tuple so we can read/update
std::array<index_t, Rank()> ind{indices...};
// get current index for dim
auto i = ind[DIM];
Expand Down
186 changes: 186 additions & 0 deletions test/00_operators/OperatorTests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,192 @@ TYPED_TEST(OperatorTestsComplex, AngleOp)

MATX_EXIT_HANDLER();
}
TYPED_TEST(OperatorTestsNumericNonComplex, CloneOp)
{
int N = 10;
int M = 12;
int K = 14;

MATX_ENTER_HANDLER();
{ // clone from 0D
auto tiv = make_tensor<TypeParam>();
auto tov = make_tensor<TypeParam>({N,M,K});

tiv() = 3;

auto op = clone<3>(tiv, {N, M, K});

ASSERT_EQ(op.Size(0), N);
ASSERT_EQ(op.Size(1), M);
ASSERT_EQ(op.Size(2), K);


for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(op(n,m,k) , tiv());
}
}
}

(tov = op).run();
cudaDeviceSynchronize();

for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(tov(n,m,k) , tiv());
}
}
}
}

{ // clone from 1D
auto tiv = make_tensor<TypeParam>({K});
auto tov = make_tensor<TypeParam>({N,M,K});

for(int k = 0; k < K; k++) {
tiv(k) = TypeParam(k);
}

auto op = clone<3>(tiv, {N, M, matxKeepDim});

ASSERT_EQ(op.Size(0), N);
ASSERT_EQ(op.Size(1), M);
ASSERT_EQ(op.Size(2), K);


for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(op(n,m,k) , tiv(k));
}
}
}

(tov = op).run();
cudaDeviceSynchronize();

for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(tov(n,m,k) , tiv(k));
}
}
}
}

{ // clone from 1D
auto tiv = make_tensor<TypeParam>({M});
auto tov = make_tensor<TypeParam>({N,M,K});

for(int m = 0; m < K; m++) {
tiv(m) = TypeParam(m);
}

auto op = clone<3>(tiv, {N, matxKeepDim, K});

ASSERT_EQ(op.Size(0), N);
ASSERT_EQ(op.Size(1), M);
ASSERT_EQ(op.Size(2), K);


for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(op(n,m,k) , tiv(m));
}
}
}

(tov = op).run();
cudaDeviceSynchronize();

for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(tov(n,m,k) , tiv(m));
}
}
}
}

{ // clone from 2D and operator
auto tiv = make_tensor<TypeParam>({M,K});
auto tov = make_tensor<TypeParam>({N,M,K});

for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
tiv(m,k) = TypeParam(m*K)+TypeParam(k);
}
}

auto op = clone<3>(tiv, {N, matxKeepDim, matxKeepDim});

ASSERT_EQ(op.Size(0), N);
ASSERT_EQ(op.Size(1), M);
ASSERT_EQ(op.Size(2), K);


for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(op(n,m,k) , tiv(m,k));
}
}
}

(tov = op).run();
cudaDeviceSynchronize();

for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(tov(n,m,k) , tiv(m,k));
}
}
}
}

{ // clone from 2D
auto tiv = make_tensor<TypeParam>({M,K});
auto tov = make_tensor<TypeParam>({N,M,K});

for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
tiv(m,k) = TypeParam(m*K)+TypeParam(k);
}
}

auto op = clone<3>(TypeParam(2)*tiv, {N, matxKeepDim, matxKeepDim});

ASSERT_EQ(op.Size(0), N);
ASSERT_EQ(op.Size(1), M);
ASSERT_EQ(op.Size(2), K);


for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(op(n,m,k) , TypeParam(2)*tiv(m,k));
}
}
}

(tov = op).run();
cudaDeviceSynchronize();

for(int n = 0; n < N; n++) {
for(int m = 0; m < M; m++) {
for(int k = 0; k < K; k++) {
ASSERT_EQ(tov(n,m,k) , TypeParam(2)*tiv(m,k));
}
}
}
}

MATX_EXIT_HANDLER();
}

TYPED_TEST(OperatorTestsNumericNonComplex, CollapseOp)
{
Expand Down

0 comments on commit 33a0937

Please sign in to comment.