Skip to content

Commit

Permalink
fortran: add gpuAllocatorClearCache
Browse files Browse the repository at this point in the history
- requires a hack to clear cache for most common allocator types
- does not work if thrust backend is enabled
  • Loading branch information
bd4 committed Mar 14, 2023
1 parent d8a8856 commit 6a27ab2
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 2 deletions.
2 changes: 2 additions & 0 deletions include/gtensor/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ struct caching_allocator : A

GT_INLINE void construct(pointer) {}

// Note: thrust allocators have a non-static deallocate and this does not work
// if thrust is enabled
static void clear_cache()
{
for (auto it = free_.begin(); it != free_.end(); it++) {
Expand Down
7 changes: 5 additions & 2 deletions include/gtensor/backend_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,11 @@ struct wrap_allocator
using pointer = gt::space_pointer<T, S>;
using size_type = gt::size_type;

pointer allocate(size_type n) { return pointer(A::template allocate<T>(n)); }
void deallocate(pointer p, size_type n)
static pointer allocate(size_type n)
{
return pointer(A::template allocate<T>(n));
}
static void deallocate(pointer p, size_type n)
{
A::deallocate(gt::pointer_traits<pointer>::get(p));
}
Expand Down
49 changes: 49 additions & 0 deletions include/gtensor/gtensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,55 @@ decltype(auto) host_mirror(E& e)
return detail::host_mirror<E>::run(e);
}

// ======================================================================
// allocator_clear_caches

template <typename T, typename S = gt::space::device>
using caching_device_allocator =
gt::allocator::caching_allocator<T, device_allocator<T, S>>;

template <typename T, typename S = gt::space::host>
using caching_host_allocator =
gt::allocator::caching_allocator<T, host_allocator<T, S>>;

template <typename T, typename S = gt::space::managed>
using caching_managed_allocator =
gt::allocator::caching_allocator<T, managed_allocator<T, S>>;

// TODO: this is a hack, we should re-write caching_allocator to better
// support this use case, i.e. clearing cache after initialization
// and auto-parallelization but before main time loop in an app like
// GENE
inline void allocator_clear_caches()
{
// Note: thrust allocators have a non-static deallocate
#if defined(GTENSOR_HAVE_DEVICE) && !defined(GTENSOR_USE_THRUST)
gt::caching_device_allocator<double>::clear_cache();
gt::caching_device_allocator<float>::clear_cache();
gt::caching_device_allocator<gt::complex<double>>::clear_cache();
gt::caching_device_allocator<gt::complex<float>>::clear_cache();
gt::caching_device_allocator<uint8_t>::clear_cache();
gt::caching_device_allocator<int>::clear_cache();
gt::caching_device_allocator<std::size_t>::clear_cache();

gt::caching_managed_allocator<double>::clear_cache();
gt::caching_managed_allocator<float>::clear_cache();
gt::caching_managed_allocator<gt::complex<double>>::clear_cache();
gt::caching_managed_allocator<gt::complex<float>>::clear_cache();
gt::caching_managed_allocator<uint8_t>::clear_cache();
gt::caching_managed_allocator<int>::clear_cache();
gt::caching_managed_allocator<std::size_t>::clear_cache();

gt::caching_host_allocator<double>::clear_cache();
gt::caching_host_allocator<float>::clear_cache();
gt::caching_host_allocator<gt::complex<double>>::clear_cache();
gt::caching_host_allocator<gt::complex<float>>::clear_cache();
gt::caching_host_allocator<uint8_t>::clear_cache();
gt::caching_host_allocator<int>::clear_cache();
gt::caching_host_allocator<std::size_t>::clear_cache();
#endif
}

} // namespace gt

#endif
5 changes: 5 additions & 0 deletions src/fortran/gpu_api.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,8 @@ extern "C" int gpuMemcpyAsync(void* dst, const void* src, size_t bytes,
}

#endif

extern "C" void gpuAllocatorClearCache()
{
gt::allocator_clear_caches();
}
3 changes: 3 additions & 0 deletions src/fortran/gpu_api_interface.F90
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,7 @@ subroutine gpuDeviceGet(out_device_id)
out_device_id = gt_backend_device_get()
end subroutine gpuDeviceGet

subroutine gpuAllocatorClearCache()
end subroutine gpuAllocatorClearCache

end module gpu_api_m

0 comments on commit 6a27ab2

Please sign in to comment.