Skip to content

Commit

Permalink
Added HIP to the configure
Browse files Browse the repository at this point in the history
and added a test case.  It seems to work.  Now have to add
the actual roctracer support.
  • Loading branch information
khuck committed May 21, 2021
1 parent 6c44472 commit ab0d254
Show file tree
Hide file tree
Showing 5 changed files with 299 additions and 1 deletion.
33 changes: 32 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,16 @@ if(APEX_WITH_CUDA)
find_package(CUDAToolkit REQUIRED QUIET COMPONENTS CUPTI)
endif(APEX_WITH_CUDA)

if(APEX_WITH_HIP)
# This is needed so that the clang compiler test passes, and we can use it to
# compile the test cases.
if(APEX_BUILD_TESTS)
find_package(hip REQUIRED)
# OK to enable HIP tests
set(APEX_HIP_TESTS TRUE)
endif()
endif(APEX_WITH_HIP)

# set(APEX_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE STRING "Configuration type (one of Debug, RelWithDebInfo, Release, MinSizeRel)" FORCE)

# Always force CMAKE_CONFIGURATION_TYPES to be the same as CMAKE_BUILD_TYPE
Expand Down Expand Up @@ -274,7 +284,10 @@ endif (APEX_USE_PEDANTIC)
# PGI and Intel don't like the concurrentqueue code.
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
enable_cxx_compiler_flag_if_supported("-Werror")
# ROCM Clang++ has an unused library in the link.
if (NOT ${APEX_WITH_HIP})
enable_cxx_compiler_flag_if_supported("-Werror")
endif (NOT ${APEX_WITH_HIP})
endif (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
endif (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")

Expand Down Expand Up @@ -633,6 +646,21 @@ if (APEX_WITH_CUDA)
include_directories(${NVML_INCLUDE_DIRS})
endif (APEX_WITH_CUDA)

################################################################################
# HIP configuration
################################################################################

if (APEX_WITH_HIP)
find_package(ROCTRACER REQUIRED)
find_package(ROCTX REQUIRED)
set(LIBS ${LIBS} ${ROCTRACER_LIBRARIES} ${ROCTX_LIBRARIES})
add_definitions(-DAPEX_WITH_HIP)
message(INFO " Using ROCTRACER include: ${ROCTRACER_INCLUDE_DIRS}")
include_directories(${ROCTRACER_INCLUDE_DIRS})
message(INFO " Using ROCTX include: ${ROCTX_INCLUDE_DIRS}")
include_directories(${ROCTX_INCLUDE_DIRS})
endif (APEX_WITH_HIP)

################################################################################
# RAJA configuration
################################################################################
Expand Down Expand Up @@ -936,6 +964,9 @@ if(APEX_BUILD_TESTS)
if (APEX_CUDA_TESTS)
add_subdirectory (src/unit_tests/CUDA)
endif (APEX_CUDA_TESTS)
if (APEX_HIP_TESTS)
add_subdirectory (src/unit_tests/HIP)
endif (APEX_HIP_TESTS)
else()
#add_subdirectory (src/unit_tests/C EXCLUDE_FROM_ALL)
#add_subdirectory (src/unit_tests/C++ EXCLUDE_FROM_ALL)
Expand Down
38 changes: 38 additions & 0 deletions cmake/Modules/FindROCTRACER.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# - Try to find LibROCTRACER
# Once done this will define
# ROCTRACER_FOUND - System has ROCTRACER
# ROCTRACER_INCLUDE_DIRS - The ROCTRACER include directories
# ROCTRACER_LIBRARIES - The libraries needed to use ROCTRACER
# ROCTRACER_DEFINITIONS - Compiler switches required for using ROCTRACER

if(NOT DEFINED $ROCTRACER_ROOT)
if(DEFINED ENV{ROCTRACER_ROOT})
# message(" env ROCTRACER_ROOT is defined as $ENV{ROCTRACER_ROOT}")
set(ROCTRACER_ROOT $ENV{ROCTRACER_ROOT})
endif()
if(DEFINED $ROCTRACER_PATH)
set(ROCTRACER_ROOT ${ROCTRACER_PATH})
endif()
endif()

find_path(ROCTRACER_INCLUDE_DIR NAMES roctracer.h
HINTS ${ROCM_ROOT}/include/roctracer ${ROCTRACER_ROOT}/include)

find_library(ROCTRACER_LIBRARY NAMES roctracer64
HINTS ${ROCM_ROOT}/lib64 ${ROCM_ROOT}/lib ${ROCTRACER_ROOT}/lib64 ${ROCTRACER_ROOT}/lib)

include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set ROCTRACER_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(ROCTRACER DEFAULT_MSG
ROCTRACER_LIBRARY ROCTRACER_INCLUDE_DIR)

mark_as_advanced(ROCTRACER_INCLUDE_DIR ROCTRACER_LIBRARY)

if(ROCTRACER_FOUND)
set(ROCTRACER_LIBRARIES ${CUDA_LIBRARY} ${ROCTRACER_LIBRARY} )
set(ROCTRACER_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS} ${ROCTRACER_INCLUDE_DIR})
set(ROCTRACER_DIR ${ROCTRACER_ROOT})
add_definitions(-DAPEX_HAVE_ROCTRACER)
endif()

38 changes: 38 additions & 0 deletions cmake/Modules/FindROCTX.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# - Try to find LibROCTX
# Once done this will define
# ROCTX_FOUND - System has ROCTX
# ROCTX_INCLUDE_DIRS - The ROCTX include directories
# ROCTX_LIBRARIES - The libraries needed to use ROCTX
# ROCTX_DEFINITIONS - Compiler switches required for using ROCTX

if(NOT DEFINED $ROCTX_ROOT)
if(DEFINED ENV{ROCTX_ROOT})
# message(" env ROCTX_ROOT is defined as $ENV{ROCTX_ROOT}")
set(ROCTX_ROOT $ENV{ROCTX_ROOT})
endif()
if(DEFINED $ROCTX_PATH)
set(ROCTX_ROOT ${ROCTX_PATH})
endif()
endif()

find_path(ROCTX_INCLUDE_DIR NAMES roctx.h
HINTS ${ROCM_ROOT}/include/roctracer ${ROCTX_ROOT}/include)

find_library(ROCTX_LIBRARY NAMES roctx64
HINTS ${ROCM_ROOT}/lib64 ${ROCM_ROOT}/lib ${ROCTX_ROOT}/lib64 ${ROCTX_ROOT}/lib)

include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set ROCTX_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(ROCTX DEFAULT_MSG
ROCTX_LIBRARY ROCTX_INCLUDE_DIR)

mark_as_advanced(ROCTX_INCLUDE_DIR ROCTX_LIBRARY)

if(ROCTX_FOUND)
set(ROCTX_LIBRARIES ${CUDA_LIBRARY} ${ROCTX_LIBRARY} )
set(ROCTX_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS} ${ROCTX_INCLUDE_DIR})
set(ROCTX_DIR ${ROCTX_ROOT})
add_definitions(-DAPEX_HAVE_ROCTX)
endif()

34 changes: 34 additions & 0 deletions src/unit_tests/HIP/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Make sure the compiler can find include files from our Apex library.
include_directories (${APEX_SOURCE_DIR}/src/apex ${APEX_HIP_EXTRA_INCLUDE})
include_directories(${APEX_SOURCE_DIR}/src/unit_tests/HIP)

# Make sure the linker can find the Apex library once it is built.
link_directories (${APEX_BINARY_DIR}/src/apex)
#link_directories (${APEX_BINARY_DIR}/src/apex_pthread_wrapper)

set(example_programs
MatrixTranspose
)

message(INFO "Using HIP libraries: ${HIP_LIBRARIES}")

set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${APEX_HIP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${APEX_HIP_CXX_FLAGS}")

foreach(example_program ${example_programs})
set(sources ${example_program}.cpp)
source_group("Source Files" FILES ${sources})
add_executable("${example_program}_hip" ${sources})
target_link_libraries ("${example_program}_hip" apex ${LIBS} hip::host)
if (BUILD_STATIC_EXECUTABLES)
set_target_properties("${example_program}_hip" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1)
endif()
add_dependencies ("${example_program}_hip" apex)
add_dependencies (tests "${example_program}_hip")
add_test ("test_${example_program}_hip" "${example_program}_hip")
endforeach()

# Make sure the linker can find the Apex library once it is built.
link_directories (${APEX_BINARY_DIR}/src/apex)


157 changes: 157 additions & 0 deletions src/unit_tests/HIP/MatrixTranspose.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include <iostream>

// hip header file
#include <hip/hip_runtime.h>
#include "roctracer_ext.h"
// roctx header file
#include <roctx.h>

#define RUNTIME_API_CALL(apiFuncCall) \
do { \
hipError_t _status = apiFuncCall; \
if (_status != hipSuccess) { \
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
__FILE__, __LINE__, #apiFuncCall, hipGetErrorString(_status));\
exit(-1); \
} \
} while (0)

#define WIDTH 1024


#define NUM (WIDTH * WIDTH)

#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1

// Mark API
extern "C"
void roctracer_mark(const char* str);

// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;

out[y * width + x] = in[x * width + y];
}

// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}


int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;

float* gpuMatrix;
float* gpuTransposeMatrix;

hipDeviceProp_t devProp;
RUNTIME_API_CALL(hipGetDeviceProperties(&devProp, 0));

std::cout << "Device name " << devProp.name << std::endl;

int i;
int errors;

Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));

// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}

// allocate the memory on the device side
RUNTIME_API_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
RUNTIME_API_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));

uint32_t iterations = 100;
while (iterations-- > 0) {
std::cout << "## Iteration (" << iterations << ") #################" << std::endl;

// Memory transfer from host to device
RUNTIME_API_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));

roctracer_mark("before HIP LaunchKernel");
roctxMark("before hipLaunchKernel");
int rangeId = roctxRangeStart("hipLaunchKernel range");
roctxRangePush("hipLaunchKernel");
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
gpuMatrix, WIDTH);
roctracer_mark("after HIP LaunchKernel");
roctxMark("after hipLaunchKernel");

// Memory transfer from device to host
roctxRangePush("hipMemcpy");

RUNTIME_API_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));

roctxRangePop(); // for "hipMemcpy"
roctxRangePop(); // for "hipLaunchKernel"
roctxRangeStop(rangeId);

// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);

// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}

}

// free the resources on device side
RUNTIME_API_CALL(hipFree(gpuMatrix));
RUNTIME_API_CALL(hipFree(gpuTransposeMatrix));

// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);

return errors;
}

0 comments on commit ab0d254

Please sign in to comment.