diff --git a/include/gtensor/allocator.h b/include/gtensor/allocator.h index fe989a05..a02b8b8d 100644 --- a/include/gtensor/allocator.h +++ b/include/gtensor/allocator.h @@ -99,6 +99,8 @@ struct caching_allocator : A GT_INLINE void construct(pointer) {} + // Note: thrust allocators have a non-static deallocate and this does not work + // if thrust is enabled static void clear_cache() { for (auto it = free_.begin(); it != free_.end(); it++) { diff --git a/include/gtensor/backend_common.h b/include/gtensor/backend_common.h index 85d39dc0..c44b82ff 100644 --- a/include/gtensor/backend_common.h +++ b/include/gtensor/backend_common.h @@ -267,8 +267,11 @@ struct wrap_allocator using pointer = gt::space_pointer; using size_type = gt::size_type; - pointer allocate(size_type n) { return pointer(A::template allocate(n)); } - void deallocate(pointer p, size_type n) + static pointer allocate(size_type n) + { + return pointer(A::template allocate(n)); + } + static void deallocate(pointer p, size_type n) { A::deallocate(gt::pointer_traits::get(p)); } diff --git a/include/gtensor/gtensor.h b/include/gtensor/gtensor.h index 58da5183..20c35306 100644 --- a/include/gtensor/gtensor.h +++ b/include/gtensor/gtensor.h @@ -942,6 +942,55 @@ decltype(auto) host_mirror(E& e) return detail::host_mirror::run(e); } +// ====================================================================== +// allocator_clear_caches + +template +using caching_device_allocator = + gt::allocator::caching_allocator>; + +template +using caching_host_allocator = + gt::allocator::caching_allocator>; + +template +using caching_managed_allocator = + gt::allocator::caching_allocator>; + +// TODO: this is a hack, we should re-write caching_allocator to better +// support this use case, i.e. clearing cache after initialization +// and auto-parallelization but before main time loop in an app like +// GENE +inline void allocator_clear_caches() +{ + // Note: thrust allocators have a non-static deallocate +#if defined(GTENSOR_HAVE_DEVICE) && !defined(GTENSOR_USE_THRUST) + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator>::clear_cache(); + gt::caching_device_allocator>::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + gt::caching_device_allocator::clear_cache(); + + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator>::clear_cache(); + gt::caching_managed_allocator>::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + gt::caching_managed_allocator::clear_cache(); + + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator>::clear_cache(); + gt::caching_host_allocator>::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); + gt::caching_host_allocator::clear_cache(); +#endif +} + } // namespace gt #endif diff --git a/src/fortran/gpu_api.cxx b/src/fortran/gpu_api.cxx index 5f9eddd0..3271e323 100644 --- a/src/fortran/gpu_api.cxx +++ b/src/fortran/gpu_api.cxx @@ -196,3 +196,8 @@ extern "C" int gpuMemcpyAsync(void* dst, const void* src, size_t bytes, } #endif + +extern "C" void gpuAllocatorClearCache() +{ + gt::allocator_clear_caches(); +} diff --git a/src/fortran/gpu_api_interface.F90 b/src/fortran/gpu_api_interface.F90 index fa5141e0..a4780226 100644 --- a/src/fortran/gpu_api_interface.F90 +++ b/src/fortran/gpu_api_interface.F90 @@ -284,4 +284,7 @@ subroutine gpuDeviceGet(out_device_id) out_device_id = gt_backend_device_get() end subroutine gpuDeviceGet + subroutine gpuAllocatorClearCache() + end subroutine gpuAllocatorClearCache + end module gpu_api_m