From ee80b77d423efab6563cc14482fc294d73c1f7ff Mon Sep 17 00:00:00 2001 From: bgawrych Date: Fri, 14 Aug 2020 07:19:52 +0200 Subject: [PATCH] Fix default CPU allocator memory alignment (#18885) * Replace std::malloc to aligned memory allocation in Pooled StorageManager * Add test checking CPU memory alignment * Fix memory allocation success check * Fix sanity --- src/common/utils.h | 22 ++++++++++++++++++++++ src/storage/cpu_device_storage.h | 15 +++------------ src/storage/storage_manager_helpers.h | 18 +++++++++++++++--- tests/cpp/storage/storage_test.cc | 23 +++++++++++++++++++++++ 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/common/utils.h b/src/common/utils.h index 9ea3329f2c24..aa0cb6b1b454 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -950,6 +950,28 @@ inline int GetDefaultDtype(int dtype) { mshadow::kFloat32; } +inline bool AlignedMemAlloc(void** ptr, size_t size, size_t alignment) { +#if _MSC_VER + *ptr = _aligned_malloc(size, alignment); + if (*ptr == nullptr) + return false; +#else + int res = posix_memalign(ptr, alignment, size); + if (res != 0) + return false; +#endif + return true; +} + +inline void AlignedMemFree(void* ptr) { +#if _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + + } // namespace common } // namespace mxnet #endif // MXNET_COMMON_UTILS_H_ diff --git a/src/storage/cpu_device_storage.h b/src/storage/cpu_device_storage.h index b81cdbc17e25..eca7eaa057fc 100644 --- a/src/storage/cpu_device_storage.h +++ b/src/storage/cpu_device_storage.h @@ -60,21 +60,12 @@ class CPUDeviceStorage { }; // class CPUDeviceStorage inline void CPUDeviceStorage::Alloc(Storage::Handle* handle) { -#if _MSC_VER - handle->dptr = _aligned_malloc(handle->size, alignment_); - if (handle->dptr == nullptr) LOG(FATAL) << "Failed to allocate CPU Memory"; -#else - int ret = posix_memalign(&handle->dptr, alignment_, handle->size); - if (ret != 0) LOG(FATAL) << "Failed to allocate CPU Memory"; -#endif + bool success = mxnet::common::AlignedMemAlloc(&(handle->dptr), handle->size, alignment_); + if (!success) LOG(FATAL) << "Failed to allocate CPU Memory"; } inline void CPUDeviceStorage::Free(Storage::Handle handle) { -#if _MSC_VER - _aligned_free(handle.dptr); -#else - free(handle.dptr); -#endif + mxnet::common::AlignedMemFree(handle.dptr); } } // namespace storage diff --git a/src/storage/storage_manager_helpers.h b/src/storage/storage_manager_helpers.h index e144af2ab9a3..1fccb5a08f45 100644 --- a/src/storage/storage_manager_helpers.h +++ b/src/storage/storage_manager_helpers.h @@ -42,6 +42,7 @@ typedef mxnet::common::cuda::DeviceStore CudaDeviceStore; #endif // _WIN32 #include +#include "../common/utils.h" namespace mxnet { namespace storage { @@ -110,10 +111,22 @@ class ContextHelperCPU : public ContextHelper { } int Malloc(void **ppNtr, size_t size) const override { - return (*ppNtr = std::malloc(size))? 0 : -1; + bool success = mxnet::common::AlignedMemAlloc(ppNtr, size, alignment_); + return success ? 0 : -1; } - void Free(void *dptr) const override { std::free(dptr); } + void Free(void *dptr) const override { + mxnet::common::AlignedMemFree(dptr); + } + + private: +#if MXNET_USE_MKLDNN == 1 + // MKLDNN requires special alignment. 64 is used by the MKLDNN library in + // memory allocation. + static constexpr size_t alignment_ = kMKLDNNAlign; +#else + static constexpr size_t alignment_ = 16; +#endif }; #if MXNET_USE_CUDA @@ -155,7 +168,6 @@ class ContextHelperPinned : public ContextHelperGPU { #else typedef ContextHelperCPU ContextHelperPinned; #endif - } // namespace storage } // namespace mxnet diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc index d9e7d8bc0294..497af35c83cd 100644 --- a/tests/cpp/storage/storage_test.cc +++ b/tests/cpp/storage/storage_test.cc @@ -47,6 +47,29 @@ TEST(Storage, Basic_CPU) { storage->Free(handle); } +TEST(Storage, CPU_MemAlign) { + #if MXNET_USE_MKLDNN == 1 + // MKLDNN requires special alignment. 64 is used by the MKLDNN library in + // memory allocation. + static constexpr size_t alignment_ = mxnet::kMKLDNNAlign; + #else + static constexpr size_t alignment_ = 16; + #endif + + auto&& storage = mxnet::Storage::Get(); + mxnet::Context context_cpu = mxnet::Context::CPU(0); + + for (int i = 0; i < 5; ++i) { + const size_t kSize = (std::rand() % 1024) + 1; + auto&& handle = storage->Alloc(kSize, context_cpu); + EXPECT_EQ(handle.ctx, context_cpu); + EXPECT_EQ(handle.size, kSize); + EXPECT_EQ(reinterpret_cast(handle.dptr) % alignment_, 0); + storage->Free(handle); + } +} + + #if MXNET_USE_CUDA TEST(Storage_GPU, Basic_GPU) { if (mxnet::test::unitTestsWithCuda) {