Skip to content

Commit

Permalink
update sycl_ext_complex
Browse files Browse the repository at this point in the history
For some reason the gtensor specified AssignN kernel names were missing
the second sycl_cplx::complex type arg, and failed to compile. Using
only the to_kernel types fixes the issue, and also makes the names more
compact and easier to read. I don't think they are actually necessary to
avoid duplication, the kernel types alone should be unique.
  • Loading branch information
bd4 committed Jan 18, 2023
1 parent 3ecb09a commit 05b0bc2
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 707 deletions.
14 changes: 7 additions & 7 deletions include/gtensor/assign.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ struct assigner<1, space::device>
auto e = q.submit([&](sycl::handler& cgh) {
using ltype = decltype(k_lhs);
using rtype = decltype(k_rhs);
using kname = gt::backend::sycl::Assign1<E1, E2, ltype, rtype>;
using kname = gt::backend::sycl::Assign1<ltype, rtype>;
cgh.parallel_for<kname>(range, [=](sycl::item<1> item) {
auto i = item.get_id();
k_lhs(i) = k_rhs(i);
Expand All @@ -427,7 +427,7 @@ struct assigner<2, space::device>
auto e = q.submit([&](sycl::handler& cgh) {
using ltype = decltype(k_lhs);
using rtype = decltype(k_rhs);
using kname = gt::backend::sycl::Assign2<E1, E2, ltype, rtype>;
using kname = gt::backend::sycl::Assign2<ltype, rtype>;
cgh.parallel_for<kname>(range, [=](sycl::item<2> item) {
auto i = item.get_id(1);
auto j = item.get_id(0);
Expand All @@ -454,7 +454,7 @@ struct assigner<3, space::device>
auto e = q.submit([&](sycl::handler& cgh) {
using ltype = decltype(k_lhs);
using rtype = decltype(k_rhs);
using kname = gt::backend::sycl::Assign3<E1, E2, ltype, rtype>;
using kname = gt::backend::sycl::Assign3<ltype, rtype>;
cgh.parallel_for<kname>(range, [=](sycl::item<3> item) {
auto i = item.get_id(2);
auto j = item.get_id(1);
Expand Down Expand Up @@ -493,16 +493,16 @@ struct assigner<N, space::device>
q.copy(&k_rhs, d_rhs_p, 1).wait();

auto e = q.submit([&](sycl::handler& cgh) {
using kname = gt::backend::sycl::AssignN<E1, E2, ltype, rtype>;
cgh.parallel_for<kname>(sycl::range<1>(size), [=](sycl::id<1> i) {
using kname = gt::backend::sycl::AssignN<ltype, rtype>;
cgh.parallel_for(sycl::range<1>(size), [=](sycl::id<1> i) {
auto idx = unravel(i, strides);
index_expression(k_lhs, idx) = index_expression(*d_rhs_p, idx);
});
});
} else {
auto e = q.submit([&](sycl::handler& cgh) {
using kname = gt::backend::sycl::AssignN<E1, E2, ltype, rtype>;
cgh.parallel_for<kname>(sycl::range<1>(size), [=](sycl::id<1> i) {
using kname = gt::backend::sycl::AssignN<ltype, rtype>;
cgh.parallel_for(sycl::range<1>(size), [=](sycl::id<1> i) {
auto idx = unravel(i, strides);
index_expression(k_lhs, idx) = index_expression(k_rhs, idx);
});
Expand Down
8 changes: 4 additions & 4 deletions include/gtensor/backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ namespace sycl
{

// kernel name templates
template <typename E1, typename E2, typename K1, typename K2>
template <typename K1, typename K2>
class Assign1;
template <typename E1, typename E2, typename K1, typename K2>
template <typename K1, typename K2>
class Assign2;
template <typename E1, typename E2, typename K1, typename K2>
template <typename K1, typename K2>
class Assign3;
template <typename E1, typename E2, typename K1, typename K2>
template <typename K1, typename K2>
class AssignN;

template <typename F>
Expand Down
2 changes: 1 addition & 1 deletion include/gtensor/complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ using complex = thrust::complex<T>;

// TODO: this will hopefully be standardized soon and be sycl::complex
template <typename T>
using complex = gt::sycl_cplx::complex<T>;
using complex = gt::sycl_cplx::complex<T, void>;

#else // fallback to std::complex, e.g. for host backend

Expand Down
Loading

0 comments on commit 05b0bc2

Please sign in to comment.