-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
and added a test case. It seems to work. Now have to add the actual roctracer support.
- Loading branch information
Showing
5 changed files
with
299 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# - Try to find LibROCTRACER | ||
# Once done this will define | ||
# ROCTRACER_FOUND - System has ROCTRACER | ||
# ROCTRACER_INCLUDE_DIRS - The ROCTRACER include directories | ||
# ROCTRACER_LIBRARIES - The libraries needed to use ROCTRACER | ||
# ROCTRACER_DEFINITIONS - Compiler switches required for using ROCTRACER | ||
|
||
if(NOT DEFINED $ROCTRACER_ROOT) | ||
if(DEFINED ENV{ROCTRACER_ROOT}) | ||
# message(" env ROCTRACER_ROOT is defined as $ENV{ROCTRACER_ROOT}") | ||
set(ROCTRACER_ROOT $ENV{ROCTRACER_ROOT}) | ||
endif() | ||
if(DEFINED $ROCTRACER_PATH) | ||
set(ROCTRACER_ROOT ${ROCTRACER_PATH}) | ||
endif() | ||
endif() | ||
|
||
find_path(ROCTRACER_INCLUDE_DIR NAMES roctracer.h | ||
HINTS ${ROCM_ROOT}/include/roctracer ${ROCTRACER_ROOT}/include) | ||
|
||
find_library(ROCTRACER_LIBRARY NAMES roctracer64 | ||
HINTS ${ROCM_ROOT}/lib64 ${ROCM_ROOT}/lib ${ROCTRACER_ROOT}/lib64 ${ROCTRACER_ROOT}/lib) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
# handle the QUIETLY and REQUIRED arguments and set ROCTRACER_FOUND to TRUE | ||
# if all listed variables are TRUE | ||
find_package_handle_standard_args(ROCTRACER DEFAULT_MSG | ||
ROCTRACER_LIBRARY ROCTRACER_INCLUDE_DIR) | ||
|
||
mark_as_advanced(ROCTRACER_INCLUDE_DIR ROCTRACER_LIBRARY) | ||
|
||
if(ROCTRACER_FOUND) | ||
set(ROCTRACER_LIBRARIES ${CUDA_LIBRARY} ${ROCTRACER_LIBRARY} ) | ||
set(ROCTRACER_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS} ${ROCTRACER_INCLUDE_DIR}) | ||
set(ROCTRACER_DIR ${ROCTRACER_ROOT}) | ||
add_definitions(-DAPEX_HAVE_ROCTRACER) | ||
endif() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# - Try to find LibROCTX | ||
# Once done this will define | ||
# ROCTX_FOUND - System has ROCTX | ||
# ROCTX_INCLUDE_DIRS - The ROCTX include directories | ||
# ROCTX_LIBRARIES - The libraries needed to use ROCTX | ||
# ROCTX_DEFINITIONS - Compiler switches required for using ROCTX | ||
|
||
if(NOT DEFINED $ROCTX_ROOT) | ||
if(DEFINED ENV{ROCTX_ROOT}) | ||
# message(" env ROCTX_ROOT is defined as $ENV{ROCTX_ROOT}") | ||
set(ROCTX_ROOT $ENV{ROCTX_ROOT}) | ||
endif() | ||
if(DEFINED $ROCTX_PATH) | ||
set(ROCTX_ROOT ${ROCTX_PATH}) | ||
endif() | ||
endif() | ||
|
||
find_path(ROCTX_INCLUDE_DIR NAMES roctx.h | ||
HINTS ${ROCM_ROOT}/include/roctracer ${ROCTX_ROOT}/include) | ||
|
||
find_library(ROCTX_LIBRARY NAMES roctx64 | ||
HINTS ${ROCM_ROOT}/lib64 ${ROCM_ROOT}/lib ${ROCTX_ROOT}/lib64 ${ROCTX_ROOT}/lib) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
# handle the QUIETLY and REQUIRED arguments and set ROCTX_FOUND to TRUE | ||
# if all listed variables are TRUE | ||
find_package_handle_standard_args(ROCTX DEFAULT_MSG | ||
ROCTX_LIBRARY ROCTX_INCLUDE_DIR) | ||
|
||
mark_as_advanced(ROCTX_INCLUDE_DIR ROCTX_LIBRARY) | ||
|
||
if(ROCTX_FOUND) | ||
set(ROCTX_LIBRARIES ${CUDA_LIBRARY} ${ROCTX_LIBRARY} ) | ||
set(ROCTX_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS} ${ROCTX_INCLUDE_DIR}) | ||
set(ROCTX_DIR ${ROCTX_ROOT}) | ||
add_definitions(-DAPEX_HAVE_ROCTX) | ||
endif() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Make sure the compiler can find include files from our Apex library. | ||
include_directories (${APEX_SOURCE_DIR}/src/apex ${APEX_HIP_EXTRA_INCLUDE}) | ||
include_directories(${APEX_SOURCE_DIR}/src/unit_tests/HIP) | ||
|
||
# Make sure the linker can find the Apex library once it is built. | ||
link_directories (${APEX_BINARY_DIR}/src/apex) | ||
#link_directories (${APEX_BINARY_DIR}/src/apex_pthread_wrapper) | ||
|
||
set(example_programs | ||
MatrixTranspose | ||
) | ||
|
||
message(INFO "Using HIP libraries: ${HIP_LIBRARIES}") | ||
|
||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${APEX_HIP_C_FLAGS}") | ||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${APEX_HIP_CXX_FLAGS}") | ||
|
||
foreach(example_program ${example_programs}) | ||
set(sources ${example_program}.cpp) | ||
source_group("Source Files" FILES ${sources}) | ||
add_executable("${example_program}_hip" ${sources}) | ||
target_link_libraries ("${example_program}_hip" apex ${LIBS} hip::host) | ||
if (BUILD_STATIC_EXECUTABLES) | ||
set_target_properties("${example_program}_hip" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1) | ||
endif() | ||
add_dependencies ("${example_program}_hip" apex) | ||
add_dependencies (tests "${example_program}_hip") | ||
add_test ("test_${example_program}_hip" "${example_program}_hip") | ||
endforeach() | ||
|
||
# Make sure the linker can find the Apex library once it is built. | ||
link_directories (${APEX_BINARY_DIR}/src/apex) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
/* | ||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. | ||
*/ | ||
|
||
#include <iostream> | ||
|
||
// hip header file | ||
#include <hip/hip_runtime.h> | ||
#include "roctracer_ext.h" | ||
// roctx header file | ||
#include <roctx.h> | ||
|
||
#define RUNTIME_API_CALL(apiFuncCall) \ | ||
do { \ | ||
hipError_t _status = apiFuncCall; \ | ||
if (_status != hipSuccess) { \ | ||
fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \ | ||
__FILE__, __LINE__, #apiFuncCall, hipGetErrorString(_status));\ | ||
exit(-1); \ | ||
} \ | ||
} while (0) | ||
|
||
#define WIDTH 1024 | ||
|
||
|
||
#define NUM (WIDTH * WIDTH) | ||
|
||
#define THREADS_PER_BLOCK_X 4 | ||
#define THREADS_PER_BLOCK_Y 4 | ||
#define THREADS_PER_BLOCK_Z 1 | ||
|
||
// Mark API | ||
extern "C" | ||
void roctracer_mark(const char* str); | ||
|
||
// Device (Kernel) function, it must be void | ||
__global__ void matrixTranspose(float* out, float* in, const int width) { | ||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; | ||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; | ||
|
||
out[y * width + x] = in[x * width + y]; | ||
} | ||
|
||
// CPU implementation of matrix transpose | ||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { | ||
for (unsigned int j = 0; j < width; j++) { | ||
for (unsigned int i = 0; i < width; i++) { | ||
output[i * width + j] = input[j * width + i]; | ||
} | ||
} | ||
} | ||
|
||
|
||
int main() { | ||
float* Matrix; | ||
float* TransposeMatrix; | ||
float* cpuTransposeMatrix; | ||
|
||
float* gpuMatrix; | ||
float* gpuTransposeMatrix; | ||
|
||
hipDeviceProp_t devProp; | ||
RUNTIME_API_CALL(hipGetDeviceProperties(&devProp, 0)); | ||
|
||
std::cout << "Device name " << devProp.name << std::endl; | ||
|
||
int i; | ||
int errors; | ||
|
||
Matrix = (float*)malloc(NUM * sizeof(float)); | ||
TransposeMatrix = (float*)malloc(NUM * sizeof(float)); | ||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); | ||
|
||
// initialize the input data | ||
for (i = 0; i < NUM; i++) { | ||
Matrix[i] = (float)i * 10.0f; | ||
} | ||
|
||
// allocate the memory on the device side | ||
RUNTIME_API_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float))); | ||
RUNTIME_API_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float))); | ||
|
||
uint32_t iterations = 100; | ||
while (iterations-- > 0) { | ||
std::cout << "## Iteration (" << iterations << ") #################" << std::endl; | ||
|
||
// Memory transfer from host to device | ||
RUNTIME_API_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice)); | ||
|
||
roctracer_mark("before HIP LaunchKernel"); | ||
roctxMark("before hipLaunchKernel"); | ||
int rangeId = roctxRangeStart("hipLaunchKernel range"); | ||
roctxRangePush("hipLaunchKernel"); | ||
// Lauching kernel from host | ||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), | ||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, | ||
gpuMatrix, WIDTH); | ||
roctracer_mark("after HIP LaunchKernel"); | ||
roctxMark("after hipLaunchKernel"); | ||
|
||
// Memory transfer from device to host | ||
roctxRangePush("hipMemcpy"); | ||
|
||
RUNTIME_API_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); | ||
|
||
roctxRangePop(); // for "hipMemcpy" | ||
roctxRangePop(); // for "hipLaunchKernel" | ||
roctxRangeStop(rangeId); | ||
|
||
// CPU MatrixTranspose computation | ||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); | ||
|
||
// verify the results | ||
errors = 0; | ||
double eps = 1.0E-6; | ||
for (i = 0; i < NUM; i++) { | ||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { | ||
errors++; | ||
} | ||
} | ||
if (errors != 0) { | ||
printf("FAILED: %d errors\n", errors); | ||
} else { | ||
printf("PASSED!\n"); | ||
} | ||
|
||
} | ||
|
||
// free the resources on device side | ||
RUNTIME_API_CALL(hipFree(gpuMatrix)); | ||
RUNTIME_API_CALL(hipFree(gpuTransposeMatrix)); | ||
|
||
// free the resources on host side | ||
free(Matrix); | ||
free(TransposeMatrix); | ||
free(cpuTransposeMatrix); | ||
|
||
return errors; | ||
} |