From b3ed5a60e4d402ce731a9969c5824bf190ecc3f1 Mon Sep 17 00:00:00 2001 From: Gabriele Merlo Date: Thu, 8 Feb 2024 22:04:32 -0800 Subject: [PATCH 1/7] add support for 7D arrays --- include/gtensor/assign.h | 68 +++++++++++++++++++++++++++++++++++- include/gtensor/expression.h | 6 ++++ include/gtensor/gtensor.h | 62 ++++++++++++++++++++++++++++++++ include/gtensor/operator.h | 32 +++++++++++++++++ 4 files changed, 167 insertions(+), 1 deletion(-) diff --git a/include/gtensor/assign.h b/include/gtensor/assign.h index d8f56398..c834ff43 100644 --- a/include/gtensor/assign.h +++ b/include/gtensor/assign.h @@ -134,6 +134,32 @@ struct assigner<6, space::host> } }; + +template <> +struct assigner<7, space::host> +{ + template + static void run(E1& lhs, const E2& rhs, stream_view stream) + { + // printf("assigner<7, host>\n"); + for (int o = 0; o < lhs.shape(6); o++) { + for (int n = 0; n < lhs.shape(5); n++) { + for (int m = 0; m < lhs.shape(4); m++) { + for (int l = 0; l < lhs.shape(3); l++) { + for (int k = 0; k < lhs.shape(2); k++) { + for (int j = 0; j < lhs.shape(1); j++) { + for (int i = 0; i < lhs.shape(0); i++) { + lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o); + } + } + } + } + } + } + } + } +}; + #if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP) #ifdef GTENSOR_PER_DIM_KERNELS @@ -219,6 +245,25 @@ __global__ void kernel_assign_6(Elhs lhs, Erhs _rhs) } } +template +__global__ void kernel_assign_7(Elhs lhs, Erhs _rhs) +{ + auto rhs = _rhs; + int tidx = threadIdx.x + blockIdx.x * blockDim.x; + int tidy = threadIdx.y + blockIdx.y * blockDim.y; + int tidz = blockIdx.z; + if (tidx < lhs.shape(0) * lhs.shape(1) && + tidy < lhs.shape(2) * lhs.shape(3)) { + int j = tidx / lhs.shape(0), i = tidx % lhs.shape(0); + int l = tidy / lhs.shape(2), k = tidy % lhs.shape(2); + int m = tidz % lhs.shape(5) % lhs.shape(4); + int n = tidz % lhs.shape(5) / lhs.shape(4), + int o = tidz / lhs.shape(5); + + lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o); + } +} + template <> struct assigner<1, space::device> { @@ -305,7 +350,7 @@ struct assigner<5, space::device> template static void run(E1& lhs, const E2& rhs, stream_view stream) { - // printf("assigner<6, device>\n"); + // printf("assigner<5, device>\n"); dim3 numThreads(BS_X, BS_Y); dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X, (lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y, @@ -341,6 +386,27 @@ struct assigner<6, space::device> } }; +template <> +struct assigner<7, space::device> +{ + template + static void run(E1& lhs, const E2& rhs, stream_view stream) + { + // printf("assigner<7, device>\n"); + dim3 numThreads(BS_X, BS_Y); + dim3 numBlocks((lhs.shape(0) * lhs.shape(1) + BS_X - 1) / BS_X, + (lhs.shape(2) * lhs.shape(3) + BS_Y - 1) / BS_Y, + lhs.shape(4) * lhs.shape(5) * lhs.shape(6)); + + gpuSyncIfEnabledStream(stream); + // std::cout << "rhs " << typeid(rhs.to_kernel()).name() << "\n"; + gtLaunchKernel(kernel_assign_7, numBlocks, numThreads, 0, + stream.get_backend_stream(), lhs.to_kernel(), + rhs.to_kernel()); + gpuSyncIfEnabledStream(stream); + } +}; + #else // not defined GTENSOR_PER_DIM_KERNELS template diff --git a/include/gtensor/expression.h b/include/gtensor/expression.h index dc72b036..0c867fc0 100644 --- a/include/gtensor/expression.h +++ b/include/gtensor/expression.h @@ -124,6 +124,12 @@ GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<6> idx) return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); } + template +GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<7> idx) +{ + return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6]); +} + } // namespace gt #endif diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 507f0d20..00aca410 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -293,6 +293,28 @@ __global__ void kernel_launch(gt::shape_type<6> shape, F f) } } + +template +__global__ void kernel_launch(gt::shape_type<7> shape, F f) +{ + int i = threadIdx.x + blockIdx.x * BS_X; + int j = threadIdx.y + blockIdx.y * BS_Y; + int b = blockIdx.z; + int o = b / (shape[2] * shape[3] * shape[4] * shape[5]); + b -= o * (shape[2] * shape[3] * shape[4] * hspae[5]); + int n = b / (shape[2] * shape[3] * shape[4]); + b -= n * (shape[2] * shape[3] * shape[4]); + int m = b / (shape[2] * shape[3]); + b -= m * (shape[2] * shape[3]); + int l = b / shape[2]; + b -= l * shape[2]; + int k = b; + + if (i < shape[0] && j < shape[1]) { + f(i, j, k, l, m, n, o); + } +} + #else // not GTENSOR_PER_DIM_KERNELS template @@ -418,6 +440,31 @@ struct launch<6, space::host> } }; + +template <> +struct launch<7, space::host> +{ + template + static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream) + { + for (int o = 0; o < shape[6]; o++) { + for (int n = 0; n < shape[5]; n++) { + for (int m = 0; m < shape[4]; m++) { + for (int l = 0; l < shape[3]; l++) { + for (int k = 0; k < shape[2]; k++) { + for (int j = 0; j < shape[1]; j++) { + for (int i = 0; i < shape[0]; i++) { + std::forward(f)(i, j, k, l, m, n, o); + } + } + } + } + } + } + } + } +}; + #if defined(GTENSOR_DEVICE_CUDA) || defined(GTENSOR_DEVICE_HIP) #ifdef GTENSOR_PER_DIM_KERNELS @@ -519,6 +566,21 @@ struct launch<6, space::device> } }; +template <> +struct launch<7, space::device> +{ + template + static void run(const gt::shape_type<7>& shape, F&& f, gt::stream_view stream) + { + dim3 numThreads(BS_X, BS_Y); + dim3 numBlocks((shape[0] + BS_X - 1) / BS_X, (shape[1] + BS_Y - 1) / BS_Y, + shape[2] * shape[3] * shape[4] * shape[5] * shape[6]); + + gtLaunchKernel(kernel_launch, numBlocks, numThreads, 0, + stream.get_backend_stream(), shape, std::forward(f)); + } +}; + #else // not GTENSOR_PER_DIM_KERNELS template diff --git a/include/gtensor/operator.h b/include/gtensor/operator.h index 774dc6d6..df0acea2 100644 --- a/include/gtensor/operator.h +++ b/include/gtensor/operator.h @@ -355,6 +355,38 @@ struct equals<6, 6, space::host, space::host> } }; + +template <> +struct equals<7, 7, space::host, space::host> +{ + template + static bool run(const E1& e1, const E2& e2) + { + if (e1.shape() != e2.shape()) { + return false; + } + + for (int v = 0; v < e1.shape(6); v++) { + for (int z = 0; z < e1.shape(5); z++) { + for (int y = 0; y < e1.shape(4); y++) { + for (int x = 0; x < e1.shape(3); x++) { + for (int k = 0; k < e1.shape(2); k++) { + for (int j = 0; j < e1.shape(1); j++) { + for (int i = 0; i < e1.shape(0); i++) { + if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) { + return false; + } + } + } + } + } + } + } + } + return true; + } +}; + #ifdef GTENSOR_HAVE_DEVICE template From 14388cc882b03f1884d68e7ff837574c477a012a Mon Sep 17 00:00:00 2001 From: gabriele merlo Date: Sat, 31 Aug 2024 16:07:16 +0200 Subject: [PATCH 2/7] run clang-format --- include/gtensor/assign.h | 20 +++++++++----------- include/gtensor/expression.h | 2 +- include/gtensor/gtensor.h | 18 ++++++++---------- include/gtensor/operator.h | 17 ++++++++--------- tests/test_launch.cxx | 11 ++++++----- 5 files changed, 32 insertions(+), 36 deletions(-) diff --git a/include/gtensor/assign.h b/include/gtensor/assign.h index c834ff43..947c9b72 100644 --- a/include/gtensor/assign.h +++ b/include/gtensor/assign.h @@ -134,7 +134,6 @@ struct assigner<6, space::host> } }; - template <> struct assigner<7, space::host> { @@ -144,14 +143,14 @@ struct assigner<7, space::host> // printf("assigner<7, host>\n"); for (int o = 0; o < lhs.shape(6); o++) { for (int n = 0; n < lhs.shape(5); n++) { - for (int m = 0; m < lhs.shape(4); m++) { - for (int l = 0; l < lhs.shape(3); l++) { - for (int k = 0; k < lhs.shape(2); k++) { - for (int j = 0; j < lhs.shape(1); j++) { - for (int i = 0; i < lhs.shape(0); i++) { - lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o); - } - } + for (int m = 0; m < lhs.shape(4); m++) { + for (int l = 0; l < lhs.shape(3); l++) { + for (int k = 0; k < lhs.shape(2); k++) { + for (int j = 0; j < lhs.shape(1); j++) { + for (int i = 0; i < lhs.shape(0); i++) { + lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, n, o); + } + } } } } @@ -257,8 +256,7 @@ __global__ void kernel_assign_7(Elhs lhs, Erhs _rhs) int j = tidx / lhs.shape(0), i = tidx % lhs.shape(0); int l = tidy / lhs.shape(2), k = tidy % lhs.shape(2); int m = tidz % lhs.shape(5) % lhs.shape(4); - int n = tidz % lhs.shape(5) / lhs.shape(4), - int o = tidz / lhs.shape(5); + int n = tidz % lhs.shape(5) / lhs.shape(4), int o = tidz / lhs.shape(5); lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o); } diff --git a/include/gtensor/expression.h b/include/gtensor/expression.h index 0c867fc0..c3580254 100644 --- a/include/gtensor/expression.h +++ b/include/gtensor/expression.h @@ -124,7 +124,7 @@ GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<6> idx) return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5]); } - template +template GT_INLINE decltype(auto) index_expression(E&& expr, shape_type<7> idx) { return expr(idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6]); diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 970736d5..a7d5637d 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -303,7 +303,6 @@ __global__ void kernel_launch(gt::shape_type<6> shape, F f) } } - template __global__ void kernel_launch(gt::shape_type<7> shape, F f) { @@ -450,7 +449,6 @@ struct launch<6, space::host> } }; - template <> struct launch<7, space::host> { @@ -459,14 +457,14 @@ struct launch<7, space::host> { for (int o = 0; o < shape[6]; o++) { for (int n = 0; n < shape[5]; n++) { - for (int m = 0; m < shape[4]; m++) { - for (int l = 0; l < shape[3]; l++) { - for (int k = 0; k < shape[2]; k++) { - for (int j = 0; j < shape[1]; j++) { - for (int i = 0; i < shape[0]; i++) { - std::forward(f)(i, j, k, l, m, n, o); - } - } + for (int m = 0; m < shape[4]; m++) { + for (int l = 0; l < shape[3]; l++) { + for (int k = 0; k < shape[2]; k++) { + for (int j = 0; j < shape[1]; j++) { + for (int i = 0; i < shape[0]; i++) { + std::forward(f)(i, j, k, l, m, n, o); + } + } } } } diff --git a/include/gtensor/operator.h b/include/gtensor/operator.h index df0acea2..3b8bb5f7 100644 --- a/include/gtensor/operator.h +++ b/include/gtensor/operator.h @@ -355,7 +355,6 @@ struct equals<6, 6, space::host, space::host> } }; - template <> struct equals<7, 7, space::host, space::host> { @@ -368,14 +367,14 @@ struct equals<7, 7, space::host, space::host> for (int v = 0; v < e1.shape(6); v++) { for (int z = 0; z < e1.shape(5); z++) { - for (int y = 0; y < e1.shape(4); y++) { - for (int x = 0; x < e1.shape(3); x++) { - for (int k = 0; k < e1.shape(2); k++) { - for (int j = 0; j < e1.shape(1); j++) { - for (int i = 0; i < e1.shape(0); i++) { - if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) { - return false; - } + for (int y = 0; y < e1.shape(4); y++) { + for (int x = 0; x < e1.shape(3); x++) { + for (int k = 0; k < e1.shape(2); k++) { + for (int j = 0; j < e1.shape(1); j++) { + for (int i = 0; i < e1.shape(0); i++) { + if (e1(i, j, k, x, y, z, v) != e2(i, j, k, x, y, z, v)) { + return false; + } } } } diff --git a/tests/test_launch.cxx b/tests/test_launch.cxx index 928c1b3a..0b86cb95 100644 --- a/tests/test_launch.cxx +++ b/tests/test_launch.cxx @@ -193,8 +193,9 @@ void device_double_add_7d(gt::gtensor_device& a, auto k_b = b.to_kernel(); gt::launch<7>( - a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) { - k_b(i, j, k, l, m, n, o) = k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o); + a.shape(), GT_LAMBDA(int i, int j, int k, int l, int m, int n, int o) { + k_b(i, j, k, l, m, n, o) = + k_a(i, j, k, l, m, n, o) + k_a(i, j, k, l, m, n, o); }); gt::copy(b, out); } @@ -270,9 +271,9 @@ TEST(gtensor, device_launch_7d) for (int l = 0; l < h_a.shape(3); l++) { for (int m = 0; m < h_a.shape(4); m++) { for (int n = 0; n < h_a.shape(5); n++) { - for (int o = 0; n < h_a.shape(6); o++) { - h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n +o; - } + for (int o = 0; n < h_a.shape(6); o++) { + h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n + o; + } } } } From 66a58a7efceed89d219f7659e522e329becb06c1 Mon Sep 17 00:00:00 2001 From: gabriele merlo Date: Mon, 2 Sep 2024 08:43:12 +0200 Subject: [PATCH 3/7] use newline instead of commas --- include/gtensor/assign.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/gtensor/assign.h b/include/gtensor/assign.h index 947c9b72..cc622468 100644 --- a/include/gtensor/assign.h +++ b/include/gtensor/assign.h @@ -253,10 +253,13 @@ __global__ void kernel_assign_7(Elhs lhs, Erhs _rhs) int tidz = blockIdx.z; if (tidx < lhs.shape(0) * lhs.shape(1) && tidy < lhs.shape(2) * lhs.shape(3)) { - int j = tidx / lhs.shape(0), i = tidx % lhs.shape(0); - int l = tidy / lhs.shape(2), k = tidy % lhs.shape(2); + int j = tidx / lhs.shape(0); + int i = tidx % lhs.shape(0); + int l = tidy / lhs.shape(2); + int k = tidy % lhs.shape(2); + int n = tidz % lhs.shape(5) / lhs.shape(4); int m = tidz % lhs.shape(5) % lhs.shape(4); - int n = tidz % lhs.shape(5) / lhs.shape(4), int o = tidz / lhs.shape(5); + int o = tidz / lhs.shape(5); lhs(i, j, k, l, m, n, o) = rhs(i, j, k, l, m, o); } From 5757f63e268a5155166a103de230b91a488152da Mon Sep 17 00:00:00 2001 From: gabriele merlo Date: Mon, 2 Sep 2024 08:43:32 +0200 Subject: [PATCH 4/7] Add tests for assign 7d --- tests/test_assign.cxx | 600 ++++++++++++++++++++++++------------------ 1 file changed, 338 insertions(+), 262 deletions(-) diff --git a/tests/test_assign.cxx b/tests/test_assign.cxx index 6d43111d..ff63ac5f 100644 --- a/tests/test_assign.cxx +++ b/tests/test_assign.cxx @@ -23,6 +23,22 @@ TEST(assign, gtensor_6d) EXPECT_EQ(a, b); } +TEST(assign, gtensor_7d) +{ + gt::gtensor a(gt::shape(2, 3, 4, 5, 6, 7, 8)); + gt::gtensor b(a.shape()); + + int* adata = a.data(); + + for (int i = 0; i < a.size(); i++) { + adata[i] = i; + } + + EXPECT_NE(a, b); + b = a; + EXPECT_EQ(a, b); +} + TEST(assign, gview_1d_scalar) { auto a = gt::empty(gt::shape(5)); @@ -81,326 +97,386 @@ TEST(assign, broadcast_6d) } } -#ifdef GTENSOR_HAVE_DEVICE - -TEST(assign, device_gtensor_6d) +TEST(assign, broadcast_7d) { - gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7)); - gt::gtensor_device b(a.shape()); - gt::gtensor h_a(a.shape()); - gt::gtensor h_b(a.shape()); + gt::gtensor a(gt::shape(8, 1, 2, 4, 1, 1, 6), 0); + gt::gtensor b(gt::shape(8, 1, 2, 1, 1, 1, 6), -7); - int* adata = h_a.data(); + gt::assign(a, b); - for (int i = 0; i < a.size(); i++) { - adata[i] = i; + for (int i = 0; i < a.shape(0); i++) { + for (int j = 0; j < a.shape(2); j++) { + for (int k = 0; k < a.shape(3); k++) { + for (int l = 0; l < a.shape(6); l++) { + EXPECT_EQ(a(i, 0, j, k, 0, 0, l), -7); + } + } + } } - gt::copy(h_a, a); - b = a; - gt::copy(b, h_b); +#ifdef GTENSOR_HAVE_DEVICE - EXPECT_EQ(h_a, h_b); -} + TEST(assign, device_gtensor_6d) + { + gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); -TEST(assign, device_gtensor_fill) -{ - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); + int* adata = h_a.data(); - a.fill(9001); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; + } - gt::copy(a, h_a); - EXPECT_EQ(h_a, (gt::gtensor_device{ - {9001, 9001}, {9001, 9001}, {9001, 9001}})); -} + gt::copy(h_a, a); + b = a; + gt::copy(b, h_b); -TEST(assign, device_gview_fill) -{ - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); - auto av = a.view(gt::all, gt::all); + EXPECT_EQ(h_a, h_b); + } - av.fill(9001); + TEST(assign, device_gtensor_7d) + { + gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7, 8)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); - gt::copy(a, h_a); - EXPECT_EQ(h_a, - (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); -} + int* adata = h_a.data(); -TEST(assign, device_span_fill) -{ - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); - auto as = a.to_kernel(); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; + } - as.fill(9001); + gt::copy(h_a, a); + b = a; + gt::copy(b, h_b); - gt::copy(a, h_a); - EXPECT_EQ(h_a, - (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); -} + EXPECT_EQ(h_a, h_b); + } -TEST(assign, device_gview_1d_scalar) -{ - auto a = gt::empty_device(gt::shape(5)); - auto h_a = gt::empty_like(a); - auto aview = a.view(gt::all); + TEST(assign, device_gtensor_fill) + { + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); - aview = gt::scalar(5); + a.fill(9001); + + gt::copy(a, h_a); + EXPECT_EQ(h_a, (gt::gtensor_device{ + {9001, 9001}, {9001, 9001}, {9001, 9001}})); + } - gt::copy(a, h_a); + TEST(assign, device_gview_fill) + { + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); + auto av = a.view(gt::all, gt::all); - EXPECT_EQ(h_a, (gt::gtensor{5, 5, 5, 5, 5})); -} + av.fill(9001); -TEST(assign, device_gtensor_large_2d) -{ - gt::gtensor_device a(gt::shape(2, 17920000)); - gt::gtensor_device b(a.shape()); - gt::gtensor h_a(a.shape()); - gt::gtensor h_b(a.shape()); + gt::copy(a, h_a); + EXPECT_EQ( + h_a, (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); + } - int* adata = h_a.data(); + TEST(assign, device_span_fill) + { + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); + auto as = a.to_kernel(); - for (int i = 0; i < a.size(); i++) { - adata[i] = i; + as.fill(9001); + + gt::copy(a, h_a); + EXPECT_EQ( + h_a, (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); } - gt::copy(h_a, a); - // NB: b = a calls the default operator= which ends up triggering - // and underlying storage vector copy, usually a device memcpy, so - // it doesn't launch the gtensor assign kernel. Call assign directly - // to exercise the code - assign(b, a); - gt::copy(b, h_b); + TEST(assign, device_gview_1d_scalar) + { + auto a = gt::empty_device(gt::shape(5)); + auto h_a = gt::empty_like(a); + auto aview = a.view(gt::all); - EXPECT_EQ(h_a, h_b); -} + aview = gt::scalar(5); -TEST(assign, device_view_noncontiguous_6d) -{ - using T = gt::complex; - - int nzb = 2; - int nvb = 2; - int nwb = 2; - - // ijklmn, no ghost - auto g_shape = gt::shape(32, 4, 48, 40, 30, 2); - - // ijklmn, ghost in z, v, w - auto f_shape = - gt::shape(g_shape[0], g_shape[1], g_shape[2] + 2 * nzb, - g_shape[3] + 2 * nvb, g_shape[4] + 2 * nwb, g_shape[5]); - // i klmn, no ghost - auto papbar_shape = - gt::shape(g_shape[0], g_shape[2], g_shape[3], g_shape[4], g_shape[5]); - // ijz mn, ghost in z - auto bar_apar_shape = - gt::shape(g_shape[0], g_shape[1], f_shape[2], g_shape[4], g_shape[5]); - auto h_g = gt::full(g_shape, T(2.0)); - auto d_g = gt::empty_device(g_shape); - auto h_papbar = gt::full(papbar_shape, T(1.5)); - auto d_papbar = gt::empty_device(papbar_shape); - auto h_bar_apar = gt::full(bar_apar_shape, T(0.0, -1.0)); - auto d_bar_apar = gt::empty_device(bar_apar_shape); - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); - - gt::copy(h_g, d_g); - gt::copy(h_papbar, d_papbar); - gt::copy(h_bar_apar, d_bar_apar); - gt::copy(h_f, d_f); - - auto lhs_view = d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), - gt::slice(nvb, -nvb), gt::slice(nwb, -nwb), gt::all); - auto d_papbar_view = - d_papbar.view(gt::all, gt::newaxis, gt::all, gt::all, gt::all, gt::all); - auto d_bar_apar_view = d_bar_apar.view(gt::all, gt::all, gt::slice(nzb, -nzb), - gt::newaxis, gt::all, gt::all); - auto rhs_view = d_g + d_papbar_view * d_bar_apar_view; - - GT_DEBUG_VAR(d_g.shape()); - GT_DEBUG_VAR(d_papbar_view.shape()); - GT_DEBUG_VAR(d_bar_apar_view.shape()); - GT_DEBUG_VAR(lhs_view.shape()); - GT_DEBUG_VAR(rhs_view.shape()); - - lhs_view = rhs_view; - - /* - d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), - gt::slice(nwb, -nwb), gt::all) = d_g + d_papbar.view(gt::all, gt::newaxis, - gt::all, gt::all, gt::all, gt::all) * d_bar_apar.view(gt::all, gt::all, - gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); - */ - - gt::copy(d_f, h_f); - - // spot check boundary, not changed - EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb - 1, 1), T(100.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - nwb, 1), - T(100.0)); - - // spot check inside that was changed - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, - f_shape[4] - nwb - 1, 1), - T(2.0, -1.5)); - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(2.0, -1.5)); - - gt::synchronize(); -} + gt::copy(a, h_a); -TEST(assign, device_view_noncontiguous_6d_scalar) -{ - using T = gt::complex; + EXPECT_EQ(h_a, (gt::gtensor{5, 5, 5, 5, 5})); + } - int nzb = 2; - int nvb = 2; - int nwb = 0; + TEST(assign, device_gtensor_large_2d) + { + gt::gtensor_device a(gt::shape(2, 17920000)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); - // ijklmn, ghost in z, v, w - auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); + int* adata = h_a.data(); - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; + } - gt::copy(h_f, d_f); + gt::copy(h_a, a); + // NB: b = a calls the default operator= which ends up triggering + // and underlying storage vector copy, usually a device memcpy, so + // it doesn't launch the gtensor assign kernel. Call assign directly + // to exercise the code + assign(b, a); + gt::copy(b, h_b); - auto f_size = d_f.size(); - GT_DEBUG_VAR(d_f.shape()); - GT_DEBUG_VAR(f_size); + EXPECT_EQ(h_a, h_b); + } - auto d_f_noghost = + TEST(assign, device_view_noncontiguous_6d) + { + using T = gt::complex; + + int nzb = 2; + int nvb = 2; + int nwb = 2; + + // ijklmn, no ghost + auto g_shape = gt::shape(32, 4, 48, 40, 30, 2); + + // ijklmn, ghost in z, v, w + auto f_shape = + gt::shape(g_shape[0], g_shape[1], g_shape[2] + 2 * nzb, + g_shape[3] + 2 * nvb, g_shape[4] + 2 * nwb, g_shape[5]); + // i klmn, no ghost + auto papbar_shape = + gt::shape(g_shape[0], g_shape[2], g_shape[3], g_shape[4], g_shape[5]); + // ijz mn, ghost in z + auto bar_apar_shape = + gt::shape(g_shape[0], g_shape[1], f_shape[2], g_shape[4], g_shape[5]); + auto h_g = gt::full(g_shape, T(2.0)); + auto d_g = gt::empty_device(g_shape); + auto h_papbar = gt::full(papbar_shape, T(1.5)); + auto d_papbar = gt::empty_device(papbar_shape); + auto h_bar_apar = gt::full(bar_apar_shape, T(0.0, -1.0)); + auto d_bar_apar = gt::empty_device(bar_apar_shape); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); + + gt::copy(h_g, d_g); + gt::copy(h_papbar, d_papbar); + gt::copy(h_bar_apar, d_bar_apar); + gt::copy(h_f, d_f); + + auto lhs_view = + d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), + gt::slice(nwb, -nwb), gt::all); + auto d_papbar_view = + d_papbar.view(gt::all, gt::newaxis, gt::all, gt::all, gt::all, gt::all); + auto d_bar_apar_view = d_bar_apar.view( + gt::all, gt::all, gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); + auto rhs_view = d_g + d_papbar_view * d_bar_apar_view; + + GT_DEBUG_VAR(d_g.shape()); + GT_DEBUG_VAR(d_papbar_view.shape()); + GT_DEBUG_VAR(d_bar_apar_view.shape()); + GT_DEBUG_VAR(lhs_view.shape()); + GT_DEBUG_VAR(rhs_view.shape()); + + lhs_view = rhs_view; + + /* d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), - gt::slice(nwb, -nwb), gt::all); - auto f_noghost_size = d_f_noghost.size(); - GT_DEBUG_TYPE(d_f_noghost); - GT_DEBUG_VAR(d_f_noghost.shape()); - GT_DEBUG_VAR(f_noghost_size); - - d_f_noghost = 1.0; - - gt::copy(d_f, h_f); - - // spot check boundary, not changed - EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, f_shape[4] - nwb - 1, 1), - T(100.0)); - - // note: interior, since nwb == 0 - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - 1, 1), - T(1.0)); - - // spot check inside that was changed - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); - - gt::synchronize(); -} + gt::slice(nwb, -nwb), gt::all) = d_g + d_papbar.view(gt::all, gt::newaxis, + gt::all, gt::all, gt::all, gt::all) * d_bar_apar.view(gt::all, gt::all, + gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); + */ + + gt::copy(d_f, h_f); + + // spot check boundary, not changed + EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb - 1, 1), T(100.0)); + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, + f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, + f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, + f_shape[4] - nwb, 1), + T(100.0)); + + // spot check inside that was changed + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, + f_shape[4] - nwb - 1, 1), + T(2.0, -1.5)); + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(2.0, -1.5)); + + gt::synchronize(); + } -TEST(assign, device_gfunction_mismatch_throw) -{ - using T = gt::complex; + TEST(assign, device_view_noncontiguous_6d_scalar) + { + using T = gt::complex; - int nzb = 2; - int nvb = 2; - int nwb = 0; + int nzb = 2; + int nvb = 2; + int nwb = 0; - // ijklmn, ghost in z, v, w - auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); + // ijklmn, ghost in z, v, w + auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); - auto g_shape = - gt::shape(f_shape[0], f_shape[1], f_shape[2] - 2 * nzb, - f_shape[3] - 2 * nvb, f_shape[4] - 2 * nwb, f_shape[5]); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); - auto h_g = gt::full(g_shape, T(2.0)); - auto d_g = gt::empty_device(g_shape); + gt::copy(h_f, d_f); - gt::copy(h_f, d_f); - gt::copy(h_g, d_g); + auto f_size = d_f.size(); + GT_DEBUG_VAR(d_f.shape()); + GT_DEBUG_VAR(f_size); - EXPECT_THROW(h_g + h_f, std::runtime_error); - EXPECT_THROW(d_g + d_f, std::runtime_error); -} + auto d_f_noghost = + d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), + gt::slice(nwb, -nwb), gt::all); + auto f_noghost_size = d_f_noghost.size(); + GT_DEBUG_TYPE(d_f_noghost); + GT_DEBUG_VAR(d_f_noghost.shape()); + GT_DEBUG_VAR(f_noghost_size); -namespace test -{ -template -using gtensor_managed = gt::gtensor_container, N>; -} // end namespace test + d_f_noghost = 1.0; -TEST(assign, device_gene_h_from_f) -{ - using T = gt::complex; + gt::copy(d_f, h_f); - const int nwb = 0; + // spot check boundary, not changed + EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, + f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, + f_shape[4] - nwb - 1, 1), + T(100.0)); - // ijklmn, ghost in w - auto hdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); - auto fdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); + // note: interior, since nwb == 0 + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - 1, 1), + T(1.0)); - // ijklmn, no ghost - auto prefac_shape = gt::shape(32, 4, 48, 40, 30, 2); + // spot check inside that was changed + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); - // ijk + ? axis - auto phi_shape = gt::shape(32, 4, 48, 2); + gt::synchronize(); + } - test::gtensor_managed hdist(hdist_shape, T(2.0)); - test::gtensor_managed fdist(fdist_shape, T(1.0)); - test::gtensor_managed prefac(prefac_shape, T(-1.0)); - test::gtensor_managed phi(phi_shape, T(-2.0)); - gt::gtensor expected(hdist_shape, T(3.0)); + TEST(assign, device_gfunction_mismatch_throw) + { + using T = gt::complex; - hdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), - gt::all) = - fdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), - gt::all) + - prefac * phi.view(gt::all, gt::all, gt::all, 0, gt::newaxis, gt::newaxis, - gt::newaxis); + int nzb = 2; + int nvb = 2; + int nwb = 0; - gt::synchronize(); + // ijklmn, ghost in z, v, w + auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); - // spot check - EXPECT_EQ(hdist, expected); -} + auto g_shape = + gt::shape(f_shape[0], f_shape[1], f_shape[2] - 2 * nzb, + f_shape[3] - 2 * nvb, f_shape[4] - 2 * nwb, f_shape[5]); -TEST(assign, device_broadcast_6d) -{ - gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1), 0); - gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1), -7); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); + auto h_g = gt::full(g_shape, T(2.0)); + auto d_g = gt::empty_device(g_shape); - gt::gtensor h_a(a.shape()); + gt::copy(h_f, d_f); + gt::copy(h_g, d_g); - gt::assign(a, b); + EXPECT_THROW(h_g + h_f, std::runtime_error); + EXPECT_THROW(d_g + d_f, std::runtime_error); + } + + namespace test + { + template + using gtensor_managed = + gt::gtensor_container, N>; + } // end namespace test + + TEST(assign, device_gene_h_from_f) + { + using T = gt::complex; + + const int nwb = 0; + + // ijklmn, ghost in w + auto hdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); + auto fdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); + + // ijklmn, no ghost + auto prefac_shape = gt::shape(32, 4, 48, 40, 30, 2); + + // ijk + ? axis + auto phi_shape = gt::shape(32, 4, 48, 2); + + test::gtensor_managed hdist(hdist_shape, T(2.0)); + test::gtensor_managed fdist(fdist_shape, T(1.0)); + test::gtensor_managed prefac(prefac_shape, T(-1.0)); + test::gtensor_managed phi(phi_shape, T(-2.0)); + gt::gtensor expected(hdist_shape, T(3.0)); + + hdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), + gt::all) = + fdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), + gt::all) + + prefac * phi.view(gt::all, gt::all, gt::all, 0, gt::newaxis, gt::newaxis, + gt::newaxis); + + gt::synchronize(); + + // spot check + EXPECT_EQ(hdist, expected); + } - gt::copy(a, h_a); + TEST(assign, device_broadcast_6d) + { + gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1), 0); + gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1), -7); - for (int i = 0; i < h_a.shape(0); i++) { - for (int j = 0; j < h_a.shape(2); j++) { - for (int k = 0; k < h_a.shape(3); k++) { - EXPECT_EQ(h_a(i, 0, j, k, 0, 0), -7); + gt::gtensor h_a(a.shape()); + + gt::assign(a, b); + + gt::copy(a, h_a); + + for (int i = 0; i < h_a.shape(0); i++) { + for (int j = 0; j < h_a.shape(2); j++) { + for (int k = 0; k < h_a.shape(3); k++) { + EXPECT_EQ(h_a(i, 0, j, k, 0, 0), -7); + } } } } -} + + TEST(assign, device_broadcast_7d) + { + gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1, 4), 0); + gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1, 4), -7); + + gt::gtensor h_a(a.shape()); + + gt::assign(a, b); + + gt::copy(a, h_a); + + for (int i = 0; i < h_a.shape(0); i++) { + for (int j = 0; j < h_a.shape(2); j++) { + for (int k = 0; k < h_a.shape(3); k++) { + for (int l = 0; l < h_a.shape(6); l++) { + EXPECT_EQ(h_a(i, 0, j, k, 0, 0, l), -7); + } + } + } + } #endif // GTENSOR_HAVE_DEVICE From 774360ebd60d0ed7cf89223fa2e4931096afc79b Mon Sep 17 00:00:00 2001 From: gabriele merlo Date: Mon, 2 Sep 2024 15:02:57 +0200 Subject: [PATCH 5/7] Bugfix, forgot parenthesis --- tests/test_assign.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_assign.cxx b/tests/test_assign.cxx index ff63ac5f..1b1d6b5b 100644 --- a/tests/test_assign.cxx +++ b/tests/test_assign.cxx @@ -113,7 +113,7 @@ TEST(assign, broadcast_7d) } } } - +} #ifdef GTENSOR_HAVE_DEVICE TEST(assign, device_gtensor_6d) @@ -478,5 +478,6 @@ TEST(assign, broadcast_7d) } } } + } #endif // GTENSOR_HAVE_DEVICE From 9217325515e6c8819d0525fc6bda214f11f67a78 Mon Sep 17 00:00:00 2001 From: gabriele merlo Date: Mon, 2 Sep 2024 16:44:51 +0200 Subject: [PATCH 6/7] run clang-format --- tests/test_assign.cxx | 580 +++++++++++++++++++++--------------------- 1 file changed, 289 insertions(+), 291 deletions(-) diff --git a/tests/test_assign.cxx b/tests/test_assign.cxx index 1b1d6b5b..06f76660 100644 --- a/tests/test_assign.cxx +++ b/tests/test_assign.cxx @@ -116,368 +116,366 @@ TEST(assign, broadcast_7d) } #ifdef GTENSOR_HAVE_DEVICE - TEST(assign, device_gtensor_6d) - { - gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7)); - gt::gtensor_device b(a.shape()); - gt::gtensor h_a(a.shape()); - gt::gtensor h_b(a.shape()); - - int* adata = h_a.data(); - - for (int i = 0; i < a.size(); i++) { - adata[i] = i; - } +TEST(assign, device_gtensor_6d) +{ + gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); - gt::copy(h_a, a); - b = a; - gt::copy(b, h_b); + int* adata = h_a.data(); - EXPECT_EQ(h_a, h_b); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; } - TEST(assign, device_gtensor_7d) - { - gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7, 8)); - gt::gtensor_device b(a.shape()); - gt::gtensor h_a(a.shape()); - gt::gtensor h_b(a.shape()); + gt::copy(h_a, a); + b = a; + gt::copy(b, h_b); - int* adata = h_a.data(); + EXPECT_EQ(h_a, h_b); +} - for (int i = 0; i < a.size(); i++) { - adata[i] = i; - } +TEST(assign, device_gtensor_7d) +{ + gt::gtensor_device a(gt::shape(2, 3, 4, 5, 6, 7, 8)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); - gt::copy(h_a, a); - b = a; - gt::copy(b, h_b); + int* adata = h_a.data(); - EXPECT_EQ(h_a, h_b); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; } - TEST(assign, device_gtensor_fill) - { - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); - - a.fill(9001); + gt::copy(h_a, a); + b = a; + gt::copy(b, h_b); - gt::copy(a, h_a); - EXPECT_EQ(h_a, (gt::gtensor_device{ - {9001, 9001}, {9001, 9001}, {9001, 9001}})); - } + EXPECT_EQ(h_a, h_b); +} - TEST(assign, device_gview_fill) - { - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); - auto av = a.view(gt::all, gt::all); +TEST(assign, device_gtensor_fill) +{ + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); - av.fill(9001); + a.fill(9001); - gt::copy(a, h_a); - EXPECT_EQ( - h_a, (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); - } + gt::copy(a, h_a); + EXPECT_EQ(h_a, (gt::gtensor_device{ + {9001, 9001}, {9001, 9001}, {9001, 9001}})); +} - TEST(assign, device_span_fill) - { - gt::gtensor_device a(gt::shape(2, 3)); - gt::gtensor h_a(a.shape()); - auto as = a.to_kernel(); +TEST(assign, device_gview_fill) +{ + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); + auto av = a.view(gt::all, gt::all); - as.fill(9001); + av.fill(9001); - gt::copy(a, h_a); - EXPECT_EQ( - h_a, (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); - } + gt::copy(a, h_a); + EXPECT_EQ(h_a, + (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); +} - TEST(assign, device_gview_1d_scalar) - { - auto a = gt::empty_device(gt::shape(5)); - auto h_a = gt::empty_like(a); - auto aview = a.view(gt::all); +TEST(assign, device_span_fill) +{ + gt::gtensor_device a(gt::shape(2, 3)); + gt::gtensor h_a(a.shape()); + auto as = a.to_kernel(); - aview = gt::scalar(5); + as.fill(9001); - gt::copy(a, h_a); + gt::copy(a, h_a); + EXPECT_EQ(h_a, + (gt::gtensor{{9001, 9001}, {9001, 9001}, {9001, 9001}})); +} - EXPECT_EQ(h_a, (gt::gtensor{5, 5, 5, 5, 5})); - } +TEST(assign, device_gview_1d_scalar) +{ + auto a = gt::empty_device(gt::shape(5)); + auto h_a = gt::empty_like(a); + auto aview = a.view(gt::all); - TEST(assign, device_gtensor_large_2d) - { - gt::gtensor_device a(gt::shape(2, 17920000)); - gt::gtensor_device b(a.shape()); - gt::gtensor h_a(a.shape()); - gt::gtensor h_b(a.shape()); + aview = gt::scalar(5); - int* adata = h_a.data(); + gt::copy(a, h_a); - for (int i = 0; i < a.size(); i++) { - adata[i] = i; - } + EXPECT_EQ(h_a, (gt::gtensor{5, 5, 5, 5, 5})); +} - gt::copy(h_a, a); - // NB: b = a calls the default operator= which ends up triggering - // and underlying storage vector copy, usually a device memcpy, so - // it doesn't launch the gtensor assign kernel. Call assign directly - // to exercise the code - assign(b, a); - gt::copy(b, h_b); +TEST(assign, device_gtensor_large_2d) +{ + gt::gtensor_device a(gt::shape(2, 17920000)); + gt::gtensor_device b(a.shape()); + gt::gtensor h_a(a.shape()); + gt::gtensor h_b(a.shape()); - EXPECT_EQ(h_a, h_b); - } + int* adata = h_a.data(); - TEST(assign, device_view_noncontiguous_6d) - { - using T = gt::complex; - - int nzb = 2; - int nvb = 2; - int nwb = 2; - - // ijklmn, no ghost - auto g_shape = gt::shape(32, 4, 48, 40, 30, 2); - - // ijklmn, ghost in z, v, w - auto f_shape = - gt::shape(g_shape[0], g_shape[1], g_shape[2] + 2 * nzb, - g_shape[3] + 2 * nvb, g_shape[4] + 2 * nwb, g_shape[5]); - // i klmn, no ghost - auto papbar_shape = - gt::shape(g_shape[0], g_shape[2], g_shape[3], g_shape[4], g_shape[5]); - // ijz mn, ghost in z - auto bar_apar_shape = - gt::shape(g_shape[0], g_shape[1], f_shape[2], g_shape[4], g_shape[5]); - auto h_g = gt::full(g_shape, T(2.0)); - auto d_g = gt::empty_device(g_shape); - auto h_papbar = gt::full(papbar_shape, T(1.5)); - auto d_papbar = gt::empty_device(papbar_shape); - auto h_bar_apar = gt::full(bar_apar_shape, T(0.0, -1.0)); - auto d_bar_apar = gt::empty_device(bar_apar_shape); - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); - - gt::copy(h_g, d_g); - gt::copy(h_papbar, d_papbar); - gt::copy(h_bar_apar, d_bar_apar); - gt::copy(h_f, d_f); - - auto lhs_view = - d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), - gt::slice(nwb, -nwb), gt::all); - auto d_papbar_view = - d_papbar.view(gt::all, gt::newaxis, gt::all, gt::all, gt::all, gt::all); - auto d_bar_apar_view = d_bar_apar.view( - gt::all, gt::all, gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); - auto rhs_view = d_g + d_papbar_view * d_bar_apar_view; - - GT_DEBUG_VAR(d_g.shape()); - GT_DEBUG_VAR(d_papbar_view.shape()); - GT_DEBUG_VAR(d_bar_apar_view.shape()); - GT_DEBUG_VAR(lhs_view.shape()); - GT_DEBUG_VAR(rhs_view.shape()); - - lhs_view = rhs_view; - - /* - d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), - gt::slice(nwb, -nwb), gt::all) = d_g + d_papbar.view(gt::all, gt::newaxis, - gt::all, gt::all, gt::all, gt::all) * d_bar_apar.view(gt::all, gt::all, - gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); - */ - - gt::copy(d_f, h_f); - - // spot check boundary, not changed - EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb - 1, 1), T(100.0)); - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, - f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, - f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, - f_shape[4] - nwb, 1), - T(100.0)); - - // spot check inside that was changed - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, - f_shape[4] - nwb - 1, 1), - T(2.0, -1.5)); - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(2.0, -1.5)); - - gt::synchronize(); + for (int i = 0; i < a.size(); i++) { + adata[i] = i; } - TEST(assign, device_view_noncontiguous_6d_scalar) - { - using T = gt::complex; + gt::copy(h_a, a); + // NB: b = a calls the default operator= which ends up triggering + // and underlying storage vector copy, usually a device memcpy, so + // it doesn't launch the gtensor assign kernel. Call assign directly + // to exercise the code + assign(b, a); + gt::copy(b, h_b); - int nzb = 2; - int nvb = 2; - int nwb = 0; - - // ijklmn, ghost in z, v, w - auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); - - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); - - gt::copy(h_f, d_f); + EXPECT_EQ(h_a, h_b); +} - auto f_size = d_f.size(); - GT_DEBUG_VAR(d_f.shape()); - GT_DEBUG_VAR(f_size); +TEST(assign, device_view_noncontiguous_6d) +{ + using T = gt::complex; + + int nzb = 2; + int nvb = 2; + int nwb = 2; + + // ijklmn, no ghost + auto g_shape = gt::shape(32, 4, 48, 40, 30, 2); + + // ijklmn, ghost in z, v, w + auto f_shape = + gt::shape(g_shape[0], g_shape[1], g_shape[2] + 2 * nzb, + g_shape[3] + 2 * nvb, g_shape[4] + 2 * nwb, g_shape[5]); + // i klmn, no ghost + auto papbar_shape = + gt::shape(g_shape[0], g_shape[2], g_shape[3], g_shape[4], g_shape[5]); + // ijz mn, ghost in z + auto bar_apar_shape = + gt::shape(g_shape[0], g_shape[1], f_shape[2], g_shape[4], g_shape[5]); + auto h_g = gt::full(g_shape, T(2.0)); + auto d_g = gt::empty_device(g_shape); + auto h_papbar = gt::full(papbar_shape, T(1.5)); + auto d_papbar = gt::empty_device(papbar_shape); + auto h_bar_apar = gt::full(bar_apar_shape, T(0.0, -1.0)); + auto d_bar_apar = gt::empty_device(bar_apar_shape); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); + + gt::copy(h_g, d_g); + gt::copy(h_papbar, d_papbar); + gt::copy(h_bar_apar, d_bar_apar); + gt::copy(h_f, d_f); + + auto lhs_view = d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), + gt::slice(nvb, -nvb), gt::slice(nwb, -nwb), gt::all); + auto d_papbar_view = + d_papbar.view(gt::all, gt::newaxis, gt::all, gt::all, gt::all, gt::all); + auto d_bar_apar_view = d_bar_apar.view(gt::all, gt::all, gt::slice(nzb, -nzb), + gt::newaxis, gt::all, gt::all); + auto rhs_view = d_g + d_papbar_view * d_bar_apar_view; + + GT_DEBUG_VAR(d_g.shape()); + GT_DEBUG_VAR(d_papbar_view.shape()); + GT_DEBUG_VAR(d_bar_apar_view.shape()); + GT_DEBUG_VAR(lhs_view.shape()); + GT_DEBUG_VAR(rhs_view.shape()); + + lhs_view = rhs_view; + + /* + d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), + gt::slice(nwb, -nwb), gt::all) = d_g + d_papbar.view(gt::all, gt::newaxis, + gt::all, gt::all, gt::all, gt::all) * d_bar_apar.view(gt::all, gt::all, + gt::slice(nzb, -nzb), gt::newaxis, gt::all, gt::all); + */ + + gt::copy(d_f, h_f); + + // spot check boundary, not changed + EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb - 1, 1), T(100.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - nwb, 1), + T(100.0)); + + // spot check inside that was changed + EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, + f_shape[4] - nwb - 1, 1), + T(2.0, -1.5)); + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(2.0, -1.5)); + + gt::synchronize(); +} - auto d_f_noghost = - d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), - gt::slice(nwb, -nwb), gt::all); - auto f_noghost_size = d_f_noghost.size(); - GT_DEBUG_TYPE(d_f_noghost); - GT_DEBUG_VAR(d_f_noghost.shape()); - GT_DEBUG_VAR(f_noghost_size); +TEST(assign, device_view_noncontiguous_6d_scalar) +{ + using T = gt::complex; - d_f_noghost = 1.0; + int nzb = 2; + int nvb = 2; + int nwb = 0; - gt::copy(d_f, h_f); + // ijklmn, ghost in z, v, w + auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); - // spot check boundary, not changed - EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, - f_shape[4] - nwb - 1, 1), - T(100.0)); - EXPECT_EQ(h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, - f_shape[4] - nwb - 1, 1), - T(100.0)); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); - // note: interior, since nwb == 0 - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); - EXPECT_EQ( - h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - 1, 1), - T(1.0)); + gt::copy(h_f, d_f); - // spot check inside that was changed - EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); + auto f_size = d_f.size(); + GT_DEBUG_VAR(d_f.shape()); + GT_DEBUG_VAR(f_size); - gt::synchronize(); - } + auto d_f_noghost = + d_f.view(gt::all, gt::all, gt::slice(nzb, -nzb), gt::slice(nvb, -nvb), + gt::slice(nwb, -nwb), gt::all); + auto f_noghost_size = d_f_noghost.size(); + GT_DEBUG_TYPE(d_f_noghost); + GT_DEBUG_VAR(d_f_noghost.shape()); + GT_DEBUG_VAR(f_noghost_size); + + d_f_noghost = 1.0; + + gt::copy(d_f, h_f); + + // spot check boundary, not changed + EXPECT_EQ(h_f(3, 3, nzb - 1, nvb, nwb, 1), T(100.0)); + EXPECT_EQ(h_f(3, 3, nzb, nvb - 1, nwb, 1), T(100.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb, f_shape[3] - nvb - 1, f_shape[4] - nwb - 1, 1), + T(100.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb, f_shape[4] - nwb - 1, 1), + T(100.0)); + + // note: interior, since nwb == 0 + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); + EXPECT_EQ( + h_f(0, 1, f_shape[2] - nzb - 1, f_shape[3] - nvb - 1, f_shape[4] - 1, 1), + T(1.0)); + + // spot check inside that was changed + EXPECT_EQ(h_f(3, 3, nzb, nvb, nwb, 1), T(1.0)); + + gt::synchronize(); +} - TEST(assign, device_gfunction_mismatch_throw) - { - using T = gt::complex; +TEST(assign, device_gfunction_mismatch_throw) +{ + using T = gt::complex; - int nzb = 2; - int nvb = 2; - int nwb = 0; + int nzb = 2; + int nvb = 2; + int nwb = 0; - // ijklmn, ghost in z, v, w - auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); + // ijklmn, ghost in z, v, w + auto f_shape = gt::shape(5, 7, 9, 11, 13, 2); - auto g_shape = - gt::shape(f_shape[0], f_shape[1], f_shape[2] - 2 * nzb, - f_shape[3] - 2 * nvb, f_shape[4] - 2 * nwb, f_shape[5]); + auto g_shape = + gt::shape(f_shape[0], f_shape[1], f_shape[2] - 2 * nzb, + f_shape[3] - 2 * nvb, f_shape[4] - 2 * nwb, f_shape[5]); - auto h_f = gt::full(f_shape, T(100.0)); - auto d_f = gt::empty_device(f_shape); - auto h_g = gt::full(g_shape, T(2.0)); - auto d_g = gt::empty_device(g_shape); + auto h_f = gt::full(f_shape, T(100.0)); + auto d_f = gt::empty_device(f_shape); + auto h_g = gt::full(g_shape, T(2.0)); + auto d_g = gt::empty_device(g_shape); - gt::copy(h_f, d_f); - gt::copy(h_g, d_g); + gt::copy(h_f, d_f); + gt::copy(h_g, d_g); - EXPECT_THROW(h_g + h_f, std::runtime_error); - EXPECT_THROW(d_g + d_f, std::runtime_error); - } + EXPECT_THROW(h_g + h_f, std::runtime_error); + EXPECT_THROW(d_g + d_f, std::runtime_error); +} - namespace test - { - template - using gtensor_managed = - gt::gtensor_container, N>; - } // end namespace test +namespace test +{ +template +using gtensor_managed = gt::gtensor_container, N>; +} // end namespace test - TEST(assign, device_gene_h_from_f) - { - using T = gt::complex; +TEST(assign, device_gene_h_from_f) +{ + using T = gt::complex; - const int nwb = 0; + const int nwb = 0; - // ijklmn, ghost in w - auto hdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); - auto fdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); + // ijklmn, ghost in w + auto hdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); + auto fdist_shape = gt::shape(32, 4, 48, 40, 30 + 2 * nwb, 2); - // ijklmn, no ghost - auto prefac_shape = gt::shape(32, 4, 48, 40, 30, 2); + // ijklmn, no ghost + auto prefac_shape = gt::shape(32, 4, 48, 40, 30, 2); - // ijk + ? axis - auto phi_shape = gt::shape(32, 4, 48, 2); + // ijk + ? axis + auto phi_shape = gt::shape(32, 4, 48, 2); - test::gtensor_managed hdist(hdist_shape, T(2.0)); - test::gtensor_managed fdist(fdist_shape, T(1.0)); - test::gtensor_managed prefac(prefac_shape, T(-1.0)); - test::gtensor_managed phi(phi_shape, T(-2.0)); - gt::gtensor expected(hdist_shape, T(3.0)); + test::gtensor_managed hdist(hdist_shape, T(2.0)); + test::gtensor_managed fdist(fdist_shape, T(1.0)); + test::gtensor_managed prefac(prefac_shape, T(-1.0)); + test::gtensor_managed phi(phi_shape, T(-2.0)); + gt::gtensor expected(hdist_shape, T(3.0)); - hdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), - gt::all) = - fdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), - gt::all) + - prefac * phi.view(gt::all, gt::all, gt::all, 0, gt::newaxis, gt::newaxis, - gt::newaxis); + hdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), + gt::all) = + fdist.view(gt::all, gt::all, gt::all, gt::all, gt::slice(nwb, -nwb), + gt::all) + + prefac * phi.view(gt::all, gt::all, gt::all, 0, gt::newaxis, gt::newaxis, + gt::newaxis); - gt::synchronize(); + gt::synchronize(); - // spot check - EXPECT_EQ(hdist, expected); - } + // spot check + EXPECT_EQ(hdist, expected); +} - TEST(assign, device_broadcast_6d) - { - gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1), 0); - gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1), -7); +TEST(assign, device_broadcast_6d) +{ + gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1), 0); + gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1), -7); - gt::gtensor h_a(a.shape()); + gt::gtensor h_a(a.shape()); - gt::assign(a, b); + gt::assign(a, b); - gt::copy(a, h_a); + gt::copy(a, h_a); - for (int i = 0; i < h_a.shape(0); i++) { - for (int j = 0; j < h_a.shape(2); j++) { - for (int k = 0; k < h_a.shape(3); k++) { - EXPECT_EQ(h_a(i, 0, j, k, 0, 0), -7); - } + for (int i = 0; i < h_a.shape(0); i++) { + for (int j = 0; j < h_a.shape(2); j++) { + for (int k = 0; k < h_a.shape(3); k++) { + EXPECT_EQ(h_a(i, 0, j, k, 0, 0), -7); } } } +} - TEST(assign, device_broadcast_7d) - { - gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1, 4), 0); - gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1, 4), -7); +TEST(assign, device_broadcast_7d) +{ + gt::gtensor_device a(gt::shape(8, 1, 2, 4, 1, 1, 4), 0); + gt::gtensor_device b(gt::shape(8, 1, 2, 1, 1, 1, 4), -7); - gt::gtensor h_a(a.shape()); + gt::gtensor h_a(a.shape()); - gt::assign(a, b); + gt::assign(a, b); - gt::copy(a, h_a); + gt::copy(a, h_a); - for (int i = 0; i < h_a.shape(0); i++) { - for (int j = 0; j < h_a.shape(2); j++) { - for (int k = 0; k < h_a.shape(3); k++) { - for (int l = 0; l < h_a.shape(6); l++) { - EXPECT_EQ(h_a(i, 0, j, k, 0, 0, l), -7); - } + for (int i = 0; i < h_a.shape(0); i++) { + for (int j = 0; j < h_a.shape(2); j++) { + for (int k = 0; k < h_a.shape(3); k++) { + for (int l = 0; l < h_a.shape(6); l++) { + EXPECT_EQ(h_a(i, 0, j, k, 0, 0, l), -7); } } } } +} #endif // GTENSOR_HAVE_DEVICE From 1480f034c4bb9aea913c0db3d6392ee80c53a85d Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Tue, 17 Sep 2024 12:48:43 -0400 Subject: [PATCH 7/7] tests: fix 7d launch init indexing --- tests/test_launch.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_launch.cxx b/tests/test_launch.cxx index 0b86cb95..6ad2a378 100644 --- a/tests/test_launch.cxx +++ b/tests/test_launch.cxx @@ -271,7 +271,7 @@ TEST(gtensor, device_launch_7d) for (int l = 0; l < h_a.shape(3); l++) { for (int m = 0; m < h_a.shape(4); m++) { for (int n = 0; n < h_a.shape(5); n++) { - for (int o = 0; n < h_a.shape(6); o++) { + for (int o = 0; o < h_a.shape(6); o++) { h_a(i, j, k, l, m, n, o) = i + j + k + l + m + n + o; } }