Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Add method to copy device_buffer back to host memory #219

Merged
merged 37 commits into from
Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
76a18c4
Add `copy_to_host` method to `rmm::device_buffer`
jakirkham Dec 20, 2019
695aabf
Add `copy_to_host` to Cython binding interface
jakirkham Dec 20, 2019
88051e4
Define `tobytes` method in `DeviceBuffer`
jakirkham Dec 20, 2019
3d29c34
Implement `tobytes` using `copy_to_host`
jakirkham Dec 20, 2019
4d0b5fc
Fix incorrect variable name
jakirkham Dec 20, 2019
105b6c0
Use attributes directly in `copy_to_host`
jakirkham Dec 20, 2019
aaf38a3
Check that `host_buffer` is not `nullptr`
jakirkham Dec 20, 2019
edbd381
Add CUDA stream and use `cudaMemcpyAsync`
jakirkham Dec 20, 2019
f30a301
Fix incorrect variable name
jakirkham Dec 20, 2019
8edffa1
Convert `copy_to_host` to a function
jakirkham Dec 20, 2019
6098038
Use stream provided and don't change device_buffer
jakirkham Jan 6, 2020
93eda08
Add Mark's doc suggestion
jakirkham Jan 6, 2020
327bcad
Include changelog entry for this feature
jakirkham Jan 6, 2020
8a6aaad
Merge rapidsai/branch-0.12 into jakirkham/add_devbuf_tobytes
jakirkham Jan 6, 2020
b886251
Add `const` in `copy_to_host` forward declaration
jakirkham Jan 6, 2020
4640772
Add `stream` with default value
jakirkham Jan 6, 2020
1d1af06
Declare `copy_to_host` w/ & w/o `cudaStream_t` arg
jakirkham Jan 7, 2020
8f9662f
Test copying to raw host pointer in C++
jakirkham Jan 7, 2020
1a2989d
Define `p` as a `char*` and cast later
jakirkham Jan 7, 2020
b4f786d
Assign `const device_buffer*` beforehand
jakirkham Jan 7, 2020
7fe5c1e
Release GIL when copying to host
jakirkham Jan 7, 2020
2f550ab
Synchronize on CUDA stream before returning
jakirkham Jan 7, 2020
87fbc07
Merge rapidsai/branch-0.12 into jakirkham/add_devbuf_tobytes
jakirkham Jan 7, 2020
eec351a
Add Python test to convert `DeviceBuffer` to bytes
jakirkham Jan 7, 2020
65bd2d6
Allow passing `stream` into `tobytes`
jakirkham Jan 7, 2020
1634ef2
Drop stream synchronization from `copy_to_host`
jakirkham Jan 7, 2020
28e9116
Extern `cudaError_t` in Cython
jakirkham Jan 7, 2020
b098935
Extern `cudaStreamSynchronize`
jakirkham Jan 7, 2020
5278146
Sync stream in Cython
jakirkham Jan 7, 2020
8559ecb
Take `uintptr_t` in `tobytes` call
jakirkham Jan 7, 2020
10601c9
Import `cudaSuccess` in `device_buffer`
jakirkham Jan 7, 2020
bcf3592
Drop unneeded `ctypedef`
jakirkham Jan 7, 2020
9e43baf
Document C++ parameters of `copy_to_host`
jakirkham Jan 7, 2020
8261367
Document exceptions thrown by `copy_to_host`
jakirkham Jan 7, 2020
37b48a2
Also import `uintptr_t` in `device_buffer.pxd`
jakirkham Jan 7, 2020
48b7158
Update include/rmm/device_buffer.hpp
jakirkham Jan 7, 2020
f8ea21a
Add the CUDA error code in the Python exception
jakirkham Jan 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## New Features

- PR #218 Add `_DevicePointer`
- PR #219 Add method to copy `device_buffer` back to host memory
- PR #222 Expose free and total memory in Python interface

## Improvements
Expand Down
25 changes: 25 additions & 0 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,29 @@ class device_buffer {
mr::get_default_resource()}; ///< The memory resource used to
///< allocate/deallocate device memory
};

/**--------------------------------------------------------------------------*
* @brief Copies rmm::device_buffer to a preallocated host buffer.
*
* Copies device memory asynchronously on the specified stream
*
* @throws std::runtime_error if `hb` is `nullptr` or copy fails
*
* @param db `rmm::device_buffer` to copy to host
* @param hb host allocated buffer to copy data to
* @param stream CUDA stream on which the device to host copy will be performed
*-------------------------------------------------------------------------**/
void copy_to_host(const device_buffer& db, void* hb, cudaStream_t stream = 0) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There were linking errors downstream as this was not declared inline and the function source code was not in a .cpp file. This has since been fixed with PR ( #230 ).

if (hb == nullptr) {
throw std::runtime_error{"Cannot copy to `nullptr`."};
}
cudaError_t err = cudaMemcpyAsync(hb,
db.data(),
db.size(),
cudaMemcpyDeviceToHost,
stream);
if (err != cudaSuccess) {
throw std::runtime_error{"Failed to copy to host."};
}
}
} // namespace rmm
8 changes: 8 additions & 0 deletions python/rmm/_lib/device_buffer.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

from rmm._lib.lib cimport cudaStream_t

Expand All @@ -14,12 +15,19 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
size_t size()
size_t capacity()

void copy_to_host(const device_buffer& db, void* hb) except *
void copy_to_host(const device_buffer& db,
void* hb,
cudaStream_t stream) except *

cdef class DeviceBuffer:
cdef unique_ptr[device_buffer] c_obj

@staticmethod
cdef DeviceBuffer c_from_unique_ptr(unique_ptr[device_buffer] ptr)

cpdef bytes tobytes(self, uintptr_t stream=*)

cdef size_t c_size(self)
cpdef void resize(self, size_t new_size)
cpdef size_t capacity(self)
Expand Down
19 changes: 19 additions & 0 deletions python/rmm/_lib/device_buffer.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from libcpp.memory cimport unique_ptr
from libc.stdint cimport uintptr_t

from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING

from rmm._lib.lib cimport (cudaError_t, cudaSuccess,
cudaStream_t, cudaStreamSynchronize)


cdef class DeviceBuffer:

Expand Down Expand Up @@ -50,6 +55,20 @@ cdef class DeviceBuffer:
buf.c_obj = move(ptr)
return buf

cpdef bytes tobytes(self, uintptr_t stream=0):
cdef const device_buffer* dbp = self.c_obj.get()
cdef bytes b = PyBytes_FromStringAndSize(NULL, self.c_size())
cdef char* p = PyBytes_AS_STRING(b)
cdef cudaError_t err

with nogil:
copy_to_host(dbp[0], <void*>p, <cudaStream_t>stream)
err = cudaStreamSynchronize(<cudaStream_t>stream)
if err != cudaSuccess:
raise RuntimeError(f"Stream sync failed with error: {err}")

return b

cdef size_t c_size(self):
return self.c_obj.get()[0].size()

Expand Down
5 changes: 5 additions & 0 deletions python/rmm/_lib/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@ ctypedef pair[size_t, size_t] memory_pair

cdef extern from * nogil:

ctypedef enum cudaError_t "cudaError_t":
cudaSuccess = 0

ctypedef void* cudaStream_t "cudaStream_t"

cudaError_t cudaStreamSynchronize(cudaStream_t stream)


cdef uintptr_t c_alloc(
size_t size,
Expand Down
5 changes: 5 additions & 0 deletions python/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ def test_rmm_device_buffer(size):
assert b.__cuda_array_interface__["typestr"] == "|u1"
assert b.__cuda_array_interface__["version"] == 0

# Test conversion to bytes
s = b.tobytes()
assert isinstance(s, bytes)
assert len(s) == len(b)

# Test resizing
b.resize(2)
assert b.size == 2
Expand Down
9 changes: 9 additions & 0 deletions tests/device_buffer_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,15 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) {
EXPECT_EQ(cudaSuccess, cudaFree(device_memory));
}

TYPED_TEST(DeviceBufferTest, CopyToRawHostPointer) {
rmm::device_buffer buff(this->size);
std::vector<uint8_t> host_data(this->size);
uint8_t* host_data_ptr = host_data.data();
rmm::copy_to_host(buff, host_data_ptr);
EXPECT_EQ(0, buff.stream());
// TODO check for equality between the contents of the two allocations
}

TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) {
std::vector<uint8_t> host_data(this->size);
rmm::device_buffer buff(static_cast<void*>(host_data.data()), this->size);
Expand Down