diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9aabd753..a263df9c5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -182,7 +182,7 @@ jobs: run: | source /opt/intel/oneapi/setvars.sh export ONEAPI_DEVICE_SELECTOR=*:cpu - ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "opencl-*|examples_cpp_for_loops-dpcpp|examples_cpp_arrays-dpcpp|examples_cpp_generic_inline_kernel-dpcpp|examples_cpp_nonblocking_streams-dpcpp" + ctest --test-dir build --progress --output-on-failure --parallel 8 --schedule-random -E "opencl-*|dpcpp-*" - name: Upload code coverage if: ${{ matrix.OCCA_COVERAGE }} diff --git a/examples/fortran/01_add_vectors/main.f90 b/examples/fortran/01_add_vectors/main.f90 index 189c69e2b..a03872cad 100644 --- a/examples/fortran/01_add_vectors/main.f90 +++ b/examples/fortran/01_add_vectors/main.f90 @@ -87,7 +87,7 @@ program main props) ! Copy memory to the device - call occaCopyPtrToMem(o_a, C_loc(a), entries*C_float, 0_occaUDim_t, occaDefault) + call occaCopyPtrToMem(o_a, C_loc(a), entries, 0_occaUDim_t, occaDefault) call occaCopyPtrToMem(o_b, C_loc(b), occaAllBytes , 0_occaUDim_t, occaDefault) ! Launch device kernel diff --git a/examples/fortran/03_static_compilation/main.f90 b/examples/fortran/03_static_compilation/main.f90 index 0168e77ae..ad341f2f7 100644 --- a/examples/fortran/03_static_compilation/main.f90 +++ b/examples/fortran/03_static_compilation/main.f90 @@ -87,7 +87,7 @@ program main props) ! Copy memory to the device - call occaCopyPtrToMem(o_a, C_loc(a), entries*C_float, 0_occaUDim_t, occaDefault) + call occaCopyPtrToMem(o_a, C_loc(a), entries, 0_occaUDim_t, occaDefault) call occaCopyPtrToMem(o_b, C_loc(b), occaAllBytes , 0_occaUDim_t, occaDefault) ! Launch device kernel diff --git a/examples/fortran/09_streams/main.f90 b/examples/fortran/09_streams/main.f90 index 8323c3867..a1beb0921 100644 --- a/examples/fortran/09_streams/main.f90 +++ b/examples/fortran/09_streams/main.f90 @@ -61,7 +61,7 @@ program main occaDefault) ! Copy memory to the device - call occaCopyPtrToMem(o_a, C_loc(a), entries*C_float, 0_occaUDim_t, occaDefault) + call occaCopyPtrToMem(o_a, C_loc(a), entries, 0_occaUDim_t, occaDefault) call occaCopyPtrToMem(o_b, C_loc(b), occaAllBytes , 0_occaUDim_t, occaDefault) ! Set stream and launch device kernel diff --git a/include/occa/core/memory.hpp b/include/occa/core/memory.hpp index 56cc673cf..7c421bf91 100644 --- a/include/occa/core/memory.hpp +++ b/include/occa/core/memory.hpp @@ -307,17 +307,18 @@ namespace occa { * @startDoc{copyFrom[0]} * * Description: - * Copies data from the input `src` to the caller [[memory]] object + * Copies `count` elements from `src` into caller's data buffer, beginning at `offset`. * * Arguments: * src: * Data source. * - * bytes: - * How many bytes to copy. + * count: + * The number of elements of type [[dtype_t]] to copy. * * offset: - * The [[memory]] offset where data transfer will start. + * The number of elements from beginning of the caller's + * data buffer the destination range is shifted. * * props: * Any backend-specific properties for memory transfer. @@ -326,7 +327,7 @@ namespace occa { * @endDoc */ void copyFrom(const void *src, - const dim_t bytes = -1, + const dim_t count = -1, const dim_t offset = 0, const occa::json &props = occa::json()); @@ -352,7 +353,7 @@ namespace occa { * @endDoc */ void copyFrom(const memory src, - const dim_t bytes = -1, + const dim_t count = -1, const dim_t destOffset = 0, const dim_t srcOffset = 0, const occa::json &props = occa::json()); @@ -367,17 +368,18 @@ namespace occa { * @startDoc{copyTo[0]} * * Description: - * Copies data from the input `src` to the caller [[memory]] object + * Copies `count` elements to `dest` from caller's data buffer, beginning at `offset`. * * Arguments: * dest: * Where to copy the [[memory]] data to. * - * bytes: - * How many bytes to copy + * count: + * The number of elements of type [[dtype_t]] to copy * * offset: - * The [[memory]] offset where data transfer will start. + * The number of elements from beginning of the caller's + * data buffer the source range is shifted. * * props: * Any backend-specific properties for memory transfer. @@ -386,7 +388,7 @@ namespace occa { * @endDoc */ void copyTo(void *dest, - const dim_t bytes = -1, + const dim_t count = -1, const dim_t offset = 0, const occa::json &props = occa::json()) const; @@ -412,7 +414,7 @@ namespace occa { * @endDoc */ void copyTo(const memory dest, - const dim_t bytes = -1, + const dim_t count = -1, const dim_t destOffset = 0, const dim_t srcOffset = 0, const occa::json &props = occa::json()) const; diff --git a/include/occa/functional/array.hpp b/include/occa/functional/array.hpp index 442a022e1..83c9a699c 100644 --- a/include/occa/functional/array.hpp +++ b/include/occa/functional/array.hpp @@ -129,7 +129,7 @@ namespace occa { : entries ); - memory_.copyFrom(src, safeEntries * sizeof(T)); + memory_.copyFrom(src, safeEntries); } void copyFrom(const occa::memory src, @@ -140,7 +140,7 @@ namespace occa { : entries ); - memory_.copyFrom(src, safeEntries * sizeof(T)); + memory_.copyFrom(src, safeEntries); } void copyTo(T *dest, @@ -151,7 +151,7 @@ namespace occa { : entries ); - memory_.copyTo(dest, safeEntries * sizeof(T)); + memory_.copyTo(dest, safeEntries); } void copyTo(occa::memory dest, @@ -162,7 +162,7 @@ namespace occa { : entries ); - memory_.copyTo(dest, safeEntries * sizeof(T)); + memory_.copyTo(dest, safeEntries); } //================================== @@ -297,17 +297,13 @@ namespace occa { //---[ Utility methods ]------------ T& operator [] (const dim_t index) { static T value; - memory_.copyTo(&value, - sizeof(T), - index * sizeof(T)); + memory_.copyTo(&value,1,index); return value; } T& operator [] (const dim_t index) const { static T value; - memory_.copyTo(&value, - sizeof(T), - index * sizeof(T)); + memory_.copyTo(&value,1,index); return value; } @@ -319,12 +315,12 @@ namespace occa { } array concat(const array &other) const { - const udim_t bytes1 = memory_.size(); - const udim_t bytes2 = other.memory_.size(); + const udim_t entries = length(); + const udim_t other_entries = other.length(); - occa::memory ret = getDevice().template malloc(length() + other.length()); - ret.copyFrom(memory_, bytes1, 0); - ret.copyFrom(other.memory_, bytes2, bytes1); + occa::memory ret = getDevice().template malloc(entries + other_entries); + ret.copyFrom(memory_, entries, 0); + ret.copyFrom(other.memory_, other_entries, entries); return array(ret); } diff --git a/include/occa/functional/typelessArray.hpp b/include/occa/functional/typelessArray.hpp index 73a747502..f6463677a 100644 --- a/include/occa/functional/typelessArray.hpp +++ b/include/occa/functional/typelessArray.hpp @@ -25,7 +25,7 @@ namespace occa { template void setupReturnMemory(const ReturnType &value) const { setupReturnMemoryArray(1); - returnMemory.copyFrom(&value, sizeof(ReturnType)); + returnMemory.copyFrom(&value, 1); } template @@ -39,8 +39,7 @@ namespace occa { template void setReturnValue(ReturnType &value) const { - size_t bytes = sizeof(ReturnType); - returnMemory.copyTo(&value, bytes); + returnMemory.copyTo(&value, 1); } virtual occa::scope getMapArrayScopeOverrides() const { diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 44b3b6f21..0148d9a37 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -186,105 +186,115 @@ namespace occa { } void memory::copyFrom(const void *src, - const dim_t bytes, + const dim_t count, const dim_t offset, const occa::json &props) { if (!isInitialized()) return; - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); + const int dtypeSize = modeMemory->dtype_->bytes(); + const dim_t bytes = dtypeSize * ((count == -1) ? length() : count); + const dim_t offset_ = dtypeSize * offset; OCCA_ERROR("Trying to allocate negative bytes (" << bytes << ")", bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << offset << ")", - offset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << offset_ << ")", + offset_ >= 0); OCCA_ERROR("Destination memory has size [" << modeMemory->size << "]," - << " trying to access [" << offset << ", " << (offset + bytes_) << "]", - (bytes_ + offset) <= modeMemory->size); + << " trying to access [" << offset_ << ", " << (offset_ + bytes) << "]", + udim_t(bytes + offset_) <= modeMemory->size); - modeMemory->copyFrom(src, bytes_, offset, props); + modeMemory->copyFrom(src, bytes, offset_, props); } void memory::copyFrom(const memory src, - const dim_t bytes, + const dim_t count, const dim_t destOffset, const dim_t srcOffset, const occa::json &props) { - if (!isInitialized() && !src.isInitialized()) return; + if (!isInitialized() && !src.isInitialized()) return; assertInitialized(); - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); + const int dtypeSize = modeMemory->dtype_->bytes(); + const dim_t bytes = dtypeSize * ((count == -1) ? length() : count); + const dim_t destOffset_ = dtypeSize * destOffset; + const dim_t srcOffset_ = src.modeMemory->dtype_->bytes() * srcOffset; OCCA_ERROR("Trying to allocate negative bytes (" << bytes << ")", bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << destOffset << ")", - destOffset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << destOffset_ << ")", + destOffset_ >= 0); - OCCA_ERROR("Cannot have a negative offset (" << srcOffset << ")", - srcOffset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << srcOffset_ << ")", + srcOffset_ >= 0); OCCA_ERROR("Source memory has size [" << src.modeMemory->size << "]," - << " trying to access [" << srcOffset << ", " << (srcOffset + bytes_) << "]", - (bytes_ + srcOffset) <= src.modeMemory->size); + << " trying to access [" << srcOffset_ << ", " << (srcOffset_ + bytes) << "]", + udim_t(bytes + srcOffset_) <= src.modeMemory->size); OCCA_ERROR("Destination memory has size [" << modeMemory->size << "]," - << " trying to access [" << destOffset << ", " << (destOffset + bytes_) << "]", - (bytes_ + destOffset) <= modeMemory->size); + << " trying to access [" << destOffset_ << ", " << (destOffset_ + bytes) << "]", + udim_t(bytes + destOffset_) <= modeMemory->size); - modeMemory->copyFrom(src.modeMemory, bytes_, destOffset, srcOffset, props); + modeMemory->copyFrom(src.modeMemory, bytes, destOffset_, srcOffset_, props); } void memory::copyTo(void *dest, - const dim_t bytes, + const dim_t count, const dim_t offset, const occa::json &props) const { if (!isInitialized()) return; - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); + const int dtypeSize = modeMemory->dtype_->bytes(); + const dim_t bytes = dtypeSize * ((count == -1) ? length() : count); + const dim_t offset_ = dtypeSize * offset; OCCA_ERROR("Trying to allocate negative bytes (" << bytes << ")", bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << offset << ")", - offset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << offset_ << ")", + offset_ >= 0); OCCA_ERROR("Source memory has size [" << modeMemory->size << "]," - << " trying to access [" << offset << ", " << (offset + bytes_) << "]", - (bytes_ + offset) <= modeMemory->size); + << " trying to access [" << offset_ << ", " << (offset_ + bytes) << "]", + udim_t(bytes + offset_) <= modeMemory->size); - modeMemory->copyTo(dest, bytes_, offset, props); + modeMemory->copyTo(dest, bytes, offset_, props); } void memory::copyTo(memory dest, - const dim_t bytes, + const dim_t count, const dim_t destOffset, const dim_t srcOffset, const occa::json &props) const { if (!isInitialized() && !dest.isInitialized()) return; assertInitialized(); - udim_t bytes_ = ((bytes == -1) ? modeMemory->size : bytes); + const int dtypeSize = modeMemory->dtype_->bytes(); + const dim_t bytes = dtypeSize * ((count == -1) ? length() : count); + const dim_t destOffset_ = dest.modeMemory->dtype_->bytes() * destOffset; + const dim_t srcOffset_ = dtypeSize * srcOffset; OCCA_ERROR("Trying to allocate negative bytes (" << bytes << ")", bytes >= -1); - OCCA_ERROR("Cannot have a negative offset (" << destOffset << ")", - destOffset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << destOffset_ << ")", + destOffset_ >= 0); - OCCA_ERROR("Cannot have a negative offset (" << srcOffset << ")", - srcOffset >= 0); + OCCA_ERROR("Cannot have a negative offset (" << srcOffset_ << ")", + srcOffset_ >= 0); OCCA_ERROR("Source memory has size [" << modeMemory->size << "]," - << " trying to access [" << srcOffset << ", " << (srcOffset + bytes_) << "]", - (bytes_ + srcOffset) <= modeMemory->size); + << " trying to access [" << srcOffset_ << ", " << (srcOffset_ + bytes) << "]", + udim_t(bytes + srcOffset_) <= modeMemory->size); OCCA_ERROR("Destination memory has size [" << dest.modeMemory->size << "]," - << " trying to access [" << destOffset << ", " << (destOffset + bytes_) << "]", - (bytes_ + destOffset) <= dest.modeMemory->size); + << " trying to access [" << destOffset_ << ", " << (destOffset_ + bytes) << "]", + udim_t(bytes + destOffset_) <= dest.modeMemory->size); - dest.modeMemory->copyFrom(modeMemory, bytes_, destOffset, srcOffset, props); + dest.modeMemory->copyFrom(modeMemory, bytes, destOffset_, srcOffset_, props); } void memory::copyFrom(const void *src, diff --git a/tests/src/core/memory.cpp b/tests/src/core/memory.cpp index 63cc509a2..b125cfdbf 100644 --- a/tests/src/core/memory.cpp +++ b/tests/src/core/memory.cpp @@ -2,12 +2,16 @@ #include void testMalloc(); +void testCopy(); +void testPartialCopy(); void testSlice(); void testUnwrap(); void testCast(); int main(const int argc, const char **argv) { testMalloc(); + testCopy(); + testPartialCopy(); testSlice(); testUnwrap(); testCast(); @@ -171,3 +175,86 @@ void testCast() { ASSERT_EQ(occa_memory.size(), casted_memory.size()); } + +void testCopy() { + occa::device occa_device({{"mode", "Serial"}}); + + const std::size_t N = 1024; + + std::vector x_host(N,1); + std::vector y1_host(N,0); + std::vector y2_host(N,0); + + occa::memory x_device = occa_device.malloc(N); + occa::memory y_device = occa_device.malloc(N); + + x_device.copyFrom(x_host.data()); + x_device.copyTo(y1_host.data()); + for (const auto& y : y1_host) {ASSERT_EQ(y,1);} + + y_device.copyFrom(x_device); + y_device.copyTo(y2_host.data()); + for (const auto& y : y2_host) {ASSERT_EQ(y,1);} +} + +void testPartialCopy() { + occa::device occa_device({{"mode", "Serial"}}); + + const std::size_t N = 1024 * 2 * 3 * 4; + + std::vector x_host(N); + std::vector y_host(N); + + occa::memory x_device = occa_device.malloc(N); + occa::memory y_device = occa_device.malloc(N); + + for (std::size_t n = 0; n < N; ++n) { + x_host[n] = static_cast(n); + } + + std::size_t entries; + std::size_t offset; + + //---[Host to device]------- + // Last quarter + entries = N/4; + offset = 3*N/4; + x_device.copyFrom(x_host.data() + offset, entries, offset); + + // First quarter + offset = 0; + x_device.copyFrom(x_host.data() + offset, entries, offset); + + // Middle-half + entries = N/2; + offset = N/4; + x_device.copyFrom(x_host.data() + offset, entries, offset); + + //---[Device to device]------- + // Middle third + entries = N/3; + offset = N/3; + y_device.copyFrom(x_device, entries, offset, offset); + + // First third + offset = 0; + x_device.copyTo(y_device, entries, offset, offset); + + // Last third + offset = 2 * N / 3; + y_device.copyFrom(x_device, entries, offset, offset); + + //---[Device to host]------- + // Last half + entries = N/2; + offset = N/2; + y_device.copyTo(y_host.data() + offset, entries, offset); + + // First half + offset = 0; + y_device.copyTo(y_host.data() + offset, entries, offset); + + for (std::size_t n = 0; n < N; ++n) { + ASSERT_EQ(x_host[n], y_host[n]); + } +}