Skip to content

Commit

Permalink
Improve address sanitizer build
Browse files Browse the repository at this point in the history
When building with the address sanitizer option turned on:
Compile for xnack+ architectures when specifying address sanitizer build.
Add address sanitizer flags to runtime-compiled kernels.
Do not create a kernel cache.
  • Loading branch information
malcolmroberts committed Oct 1, 2024
1 parent e401f55 commit fc0cedd
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 13 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Documentation for rocFFT is available at

## rocFFT 1.0.29 (unreleased)

### Changes

* Building with the address sanitizer option sets xnack+ on relevant GPU
architectures and address-sanitizer support is added to runtime-compiled
kernels.

## rocFFT 1.0.31 for ROCm 6.3.0

### Additions

* Implemented experimental APIs to allow computing FFTs on data
Expand Down
55 changes: 45 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,6 @@ option(ROCFFT_RUNTIME_COMPILE_DEFAULT "Compile kernels at runtime by default" OF
# Set default to OFF since users are not likely to tune
option(ROCFFT_BUILD_OFFLINE_TUNER "Build with offline tuner executable rocfft_offline_tuner" OFF)

if(BUILD_ADDRESS_SANITIZER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan")
add_link_options(-fuse-ld=lld)
endif()

# FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG
option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF)
Expand All @@ -138,12 +133,52 @@ if( WERROR )
set( WARNING_FLAGS ${WARNING_FLAGS} -Werror )
endif( )

# Use target ID syntax if supported for AMDGPU_TARGETS
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
list(LENGTH AMDGPU_TARGETS AMDGPU_TARGETS_LENGTH)
set(DEFAULT_GPUS
gfx803
gfx900
gfx906
gfx908
gfx90a
gfx940
gfx941
gfx942
gfx1030
gfx1100
gfx1101
gfx1102
gfx1151
gfx1200
gfx1201)

if(BUILD_ADDRESS_SANITIZER)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
add_link_options(-shared-libasan)
SET(DEFAULT_GPUS
gfx908:xnack+
gfx90a:xnack+
gfx940:xnack+
gfx941:xnack+
gfx942:xnack+)
add_link_options(-fuse-ld=lld)
set(ROCFFT_KERNEL_CACHE_ENABLE off)
add_compile_definitions(ADDRESS_SANITIZER)
endif()

# Build only for local GPU architecture
if (BUILD_LOCAL_GPU_TARGET_ONLY)
message(STATUS "Building only for local GPU target")
if (COMMAND rocm_local_targets)
rocm_local_targets(DEFAULT_GPUS)
else()
message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.")
endif()
endif()


set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.")
rocm_check_target_ids(AMDGPU_TARGETS TARGETS "${AMDGPU_TARGETS}")

# HIP is required - library and clients use HIP to access the device
find_package( HIP REQUIRED )

Expand Down
9 changes: 7 additions & 2 deletions library/src/rtc_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,11 @@ std::vector<char> RTCCache::cached_compile(const std::string& kernel_na
kernel_src_gen_t generate_src,
const std::array<char, 32>& generator_sum)
{
#ifdef ADDRESS_SANITIZER
// The address sanitizer is reported to work better when we include xnack+, so don't strip this
// from the architecture string when building:
const std::string gpu_arch = gpu_arch_with_flags;
#else
// Supplied gpu arch may have extra flags on it
// (e.g. gfx90a:sramecc+:xnack-), Strip those from the arch name
// since omitting them will generate code that handles either
Expand All @@ -570,8 +575,8 @@ std::vector<char> RTCCache::cached_compile(const std::string& kernel_na
// As of this writing, there are no known performance benefits to
// including the flags. If that changes, we may need to be more
// selective about which flags to strip.
std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags);

const std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags);
#endif
std::shared_future<std::vector<char>> result;

const pending_key key{kernel_name, gpu_arch};
Expand Down
4 changes: 3 additions & 1 deletion library/src/rtc_compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ std::vector<char> compile_inprocess(const std::string& kernel_src, const std::st
std::string gpu_arch_arg = "--gpu-architecture=" + gpu_arch;

std::vector<const char*> options;
options.reserve(4);
options.push_back("-O3");
options.push_back("-std=c++14");
options.push_back(gpu_arch_arg.c_str());
options.push_back("-mcumode");
#ifdef ADDRESS_SANITIZER
options.push_back("-fsanitize=address");
#endif

auto compileResult = hiprtcCompileProgram(prog, options.size(), options.data());
if(compileResult != HIPRTC_SUCCESS)
Expand Down

0 comments on commit fc0cedd

Please sign in to comment.