Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Bindless] Image Array Sub-Region Copy #14954

Merged
merged 10 commits into from
Sep 12, 2024
12 changes: 6 additions & 6 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 24a8299efc59c715a1c2dd180692a5e12a12283a
# Merge: eb63d1a2 2fea679d
# commit 2bbe952669861579ea84fa30f14e1ed27ead0692
# Merge: d357964a 6b353545
# Author: Omar Ahmed <[email protected]>
# Date: Wed Sep 11 10:40:59 2024 +0100
# Merge pull request #2078 from callumfare/callum/fix_device_extensions_fpga
# Add workaround for silently supported OpenCL extensions on Intel FPGA
set(UNIFIED_RUNTIME_TAG 24a8299efc59c715a1c2dd180692a5e12a12283a)
# Date: Thu Sep 12 11:36:11 2024 +0100
# Merge pull request #1928 from isaacault/iault/image_array_copy
# [Bindless][Exp] Image Array Sub-Region Copies
set(UNIFIED_RUNTIME_TAG 2bbe952669861579ea84fa30f14e1ed27ead0692)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ address mode `clamp_to_edge` will be applied for all dimensions. If the
performed when sampling along the cube face borders.
====

=== Explicit copies
=== Explicit copies [[explicit_copies]]

```cpp
namespace sycl {
Expand Down Expand Up @@ -1398,9 +1398,14 @@ As with allocation, the descriptor must be populated appropriately, i.e.

=== Copying image array data [[copying_image_array_data]]

When copying to or from image arrays, the user should copy to/from the entire
array of images in one call to `ext_oneapi_copy` by passing the image arrays'
`image_mem_handle`.
When copying to or from image arrays, the user should utilize `ext_oneapi_copy`
and pass the image arrays' `image_mem_handle`, and any applicable sub-region
copy parameters, as outlined in <<explicit_copies>>.

In order to copy to specific layers of an image array, the offset and extent
parameters involved in sub-region copies must be populated such that the 3rd
dimension of the ranges represent the arrays' layer(s) being copied, regardless
of whether the copy is performed on a 1D or 2D image array.

=== Reading an image array

Expand Down Expand Up @@ -2888,4 +2893,5 @@ These features still need to be handled:
`map_external_linear_memory`.
|6 |2024-08-05 | - Collated all changes since revision 5.
- Bumped SYCL_EXT_ONEAPI_BINDLESS_IMAGES to number 6.
|6.1|2024-09-09| - Update for image-array sub-region copy support.
|======================
35 changes: 26 additions & 9 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//

#include "ur_api.h"
#include "sycl/detail/helpers.hpp"
#include "ur_api.h"
#include <algorithm>

#include <detail/config.hpp>
Expand Down Expand Up @@ -1046,10 +1046,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1061,7 +1066,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down Expand Up @@ -1136,7 +1140,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
Desc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand All @@ -1156,10 +1160,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1171,7 +1180,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1189,8 +1197,8 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
ImageDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MDstPtr = reinterpret_cast<void*>(Dest.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = reinterpret_cast<void *>(Dest.raw_handle);

ur_image_desc_t UrDesc = {};
UrDesc.stype = UR_STRUCTURE_TYPE_IMAGE_DESC;
Expand All @@ -1208,11 +1216,17 @@ void handler::ext_oneapi_copy(
ImageDesc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height,
ImageDesc.array_size};
} else {
UrDesc.type = ImageDesc.depth > 0
? UR_MEM_TYPE_IMAGE3D
: (ImageDesc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1224,7 +1238,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {ImageDesc.width, ImageDesc.height, ImageDesc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand All @@ -1244,7 +1257,7 @@ void handler::ext_oneapi_copy(
sycl_ext_oneapi_bindless_images>();
SrcImgDesc.verify();

MSrcPtr = reinterpret_cast<void*>(Src.raw_handle);
MSrcPtr = reinterpret_cast<void *>(Src.raw_handle);
MDstPtr = Dest;

ur_image_desc_t UrDesc = {};
Expand Down Expand Up @@ -1320,10 +1333,15 @@ void handler::ext_oneapi_copy(
Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap
? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP
: UrDesc.type;

// Array size is depth extent.
impl->MCopyExtent = {Desc.width, Desc.height, Desc.array_size};
} else {
UrDesc.type = Desc.depth > 0 ? UR_MEM_TYPE_IMAGE3D
: (Desc.height > 0 ? UR_MEM_TYPE_IMAGE2D
: UR_MEM_TYPE_IMAGE1D);

impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
}

ur_image_format_t UrFormat;
Expand All @@ -1335,7 +1353,6 @@ void handler::ext_oneapi_copy(

impl->MSrcOffset = {0, 0, 0};
impl->MDestOffset = {0, 0, 0};
impl->MCopyExtent = {Desc.width, Desc.height, Desc.depth};
impl->MSrcImageDesc = UrDesc;
impl->MDstImageDesc = UrDesc;
impl->MSrcImageFormat = UrFormat;
Expand Down
145 changes: 145 additions & 0 deletions sycl/test-e2e/bindless_images/array/read_write_1d_subregion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// REQUIRES: cuda

// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <iostream>
#include <sycl/detail/core.hpp>

#include <sycl/ext/oneapi/bindless_images.hpp>

// Uncomment to print additional test information
// #define VERBOSE_PRINT

class image_addition;

int main() {

sycl::device dev;
sycl::queue q(dev);
auto ctxt = q.get_context();

// declare image data
size_t width = 4;
size_t layers = 2;
size_t N = width * layers;
std::vector<float> out(N);
std::vector<float> expected(N);
std::vector<float> dataIn1(N);
std::vector<float> dataIn2(N);
for (int i = 0; i < width; i++) {
for (int j = 0; j < layers; j++) {
expected[j + ((layers)*i)] = (j + (layers)*i) * 3;
dataIn1[j + ((layers)*i)] = (j + (layers)*i);
dataIn2[j + ((layers)*i)] = (j + (layers)*i) * 2;
}
}

// Image descriptor - can use the same for both images
sycl::ext::oneapi::experimental::image_descriptor desc(
{width}, 1, sycl::image_channel_type::fp32,
sycl::ext::oneapi::experimental::image_type::array, 1, layers);

try {
// Extension: allocate memory on device and create the handle
sycl::ext::oneapi::experimental::image_mem imgMem0(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem1(desc, q);
sycl::ext::oneapi::experimental::image_mem imgMem2(desc, q);

// Extension: create the image and return the handle
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle1 =
sycl::ext::oneapi::experimental::create_image(imgMem0, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle2 =
sycl::ext::oneapi::experimental::create_image(imgMem1, desc, q);
sycl::ext::oneapi::experimental::unsampled_image_handle imgHandle3 =
sycl::ext::oneapi::experimental::create_image(imgMem2, desc, q);

// The subregion size for the copies.
sycl::range copyExtent = {width / 2, 1, layers / 2};
// The extent of data provided on the host (vector).
sycl::range srcExtent = {width, 1, layers};

// the 4 subregion offsets used for the copies.
std::vector<sycl::range<3>> offsets{{0, 0, 0},
{width / 2, 0, 0},
{0, 0, layers / 2},
{width / 2, 0, layers / 2}};

for (auto offset : offsets) {
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn1.data(), offset, srcExtent, imgMem0.get_handle(),
offset, desc, copyExtent);
// Extension: Copy to image array subregion.
q.ext_oneapi_copy(dataIn2.data(), offset, srcExtent, imgMem1.get_handle(),
offset, desc, copyExtent);
}
q.wait_and_throw();

q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<image_addition>(
sycl::nd_range<2>{{width, layers}, {width, layers}},
[=](sycl::nd_item<2> it) {
size_t dim0 = it.get_local_id(0);
size_t dim1 = it.get_local_id(1);
float sum = 0;
// Extension: fetch image data from handle
float px1 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle1, int(dim0), dim1);
float px2 =
sycl::ext::oneapi::experimental::fetch_image_array<float>(
imgHandle2, int(dim0), dim1);

sum = px1 + px2;

// Extension: write to image with handle
sycl::ext::oneapi::experimental::write_image_array<float>(
imgHandle3, int(dim0), dim1, sum);
});
});
q.wait_and_throw();

// Extension: copy data from device to host (four subregions/quadrants)
for (auto offset : offsets) {
q.ext_oneapi_copy(imgMem2.get_handle(), offset, desc, out.data(), offset,
srcExtent, copyExtent);
}
q.wait_and_throw();

// Extension: cleanup
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle1, q);
sycl::ext::oneapi::experimental::destroy_image_handle(imgHandle2, q);
} catch (sycl::exception e) {
std::cerr << "SYCL exception caught! : " << e.what() << "\n";
return 1;
} catch (...) {
std::cerr << "Unknown exception caught!\n";
return 2;
}

// collect and validate output
bool validated = true;
for (int i = 0; i < N; i++) {
bool mismatch = false;
if (out[i] != expected[i]) {
mismatch = true;
validated = false;
}

if (mismatch) {
#ifdef VERBOSE_PRINT
std::cout << "Result mismatch! Expected: " << expected[i]
<< ", Actual: " << out[i] << std::endl;
#else
break;
#endif
}
}
if (validated) {
std::cout << "Test passed!" << std::endl;
return 0;
}

std::cout << "Test failed!" << std::endl;
return 3;
}
Loading
Loading