diff --git a/Code_Exercises/CMakeLists.txt b/Code_Exercises/CMakeLists.txt index 7778a5b4..4e49060d 100644 --- a/Code_Exercises/CMakeLists.txt +++ b/Code_Exercises/CMakeLists.txt @@ -66,5 +66,5 @@ add_subdirectory(Local_Memory_Tiling) add_subdirectory(Work_Group_Sizes) add_subdirectory(Matrix_Transpose) add_subdirectory(Functors) -add_subdirectory(OneMKL_gemm) +add_subdirectory(oneMath_gemm) add_subdirectory(More_SYCL_Features) diff --git a/Code_Exercises/OneMKL_gemm/CMakeLists.txt b/Code_Exercises/OneMKL_gemm/CMakeLists.txt deleted file mode 100644 index becd8121..00000000 --- a/Code_Exercises/OneMKL_gemm/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -#[[ - SYCL Academy (c) - - SYCL Academy is licensed under a Creative Commons Attribution-ShareAlike 4.0 - International License. - - You should have received a copy of the license along with this work. If not, - see . -]] - -add_sycl_executable(OneMKL_gemm source_onemkl_usm_gemm) -add_sycl_executable(OneMKL_gemm source_onemkl_buffer_gemm) - -target_link_libraries(OneMKL_gemm_source_onemkl_usm_gemm PUBLIC -L$ENV{MKLROOT} -lonemkl) -target_link_libraries(OneMKL_gemm_source_onemkl_buffer_gemm PUBLIC -L$ENV{MKLROOT} -lonemkl) -if(SYCL_ACADEMY_ENABLE_SOLUTIONS) - add_sycl_executable(OneMKL_gemm solution_onemkl_usm_gemm) - add_sycl_executable(OneMKL_gemm solution_onemkl_buffer_gemm) - - target_link_libraries(OneMKL_gemm_solution_onemkl_usm_gemm PUBLIC -L$ENV{MKLROOT} -lonemkl) - target_link_libraries(OneMKL_gemm_solution_onemkl_buffer_gemm PUBLIC -L$ENV{MKLROOT} -lonemkl) -endif() diff --git a/Code_Exercises/OneMKL_gemm/README.md b/Code_Exercises/OneMKL_gemm/README.md deleted file mode 100644 index 4d27768b..00000000 --- a/Code_Exercises/OneMKL_gemm/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# SYCL Academy - -## Use of oneMKL library ---- - -In this exercise you will learn how to make use of APIs from oneMKL Interfaces library. -More specifically to perform a matrix multiplication using GEMM. - -The source code provides template to perform GEMM using oneMKL's USM/buffer API: -Please refer to the API here: https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html - ---- -## Exercise `OneMKL_usm_gemm/source.cpp` - -The source code invloves matrix array initialization on host and generate reference results on the host. -`source_*.cpp` has templates with TODOs to complete the exercise - -## Build and execution hints - -To run the example: ./OneMKL_usm_gemm_solution (or) ./OneMKL_usm_gemm_source -To verify with CUBLAS debug info, `export CUBLAS_LOGINFO_DB=1` and `export CUBLAS_LOGDEST_DBG=stdout` - -For DevCloud via JupiterLab follow these [instructions](../devcloudJupyter.md). - -For DPC++: [instructions](../dpcpp.md). - -For AdaptiveCpp: [instructions](../adaptivecpp.md). diff --git a/Code_Exercises/oneMath_gemm/CMakeLists.txt b/Code_Exercises/oneMath_gemm/CMakeLists.txt new file mode 100644 index 00000000..7843b6ed --- /dev/null +++ b/Code_Exercises/oneMath_gemm/CMakeLists.txt @@ -0,0 +1,22 @@ +#[[ + SYCL Academy (c) + + SYCL Academy is licensed under a Creative Commons Attribution-ShareAlike 4.0 + International License. + + You should have received a copy of the license along with this work. If not, + see . +]] + +add_sycl_executable(oneMath_gemm source_onemath_usm_gemm) +add_sycl_executable(oneMath_gemm source_onemath_buffer_gemm) + +target_link_libraries(oneMath_gemm_source_onemath_usm_gemm PUBLIC -lonemath) +target_link_libraries(oneMath_gemm_source_onemath_buffer_gemm PUBLIC -lonemath) +if(SYCL_ACADEMY_ENABLE_SOLUTIONS) + add_sycl_executable(oneMath_gemm solution_onemath_usm_gemm) + add_sycl_executable(oneMath_gemm solution_onemath_buffer_gemm) + + target_link_libraries(oneMath_gemm_solution_onemath_usm_gemm PUBLIC -lonemath) + target_link_libraries(oneMath_gemm_solution_onemath_buffer_gemm PUBLIC -lonemath) +endif() diff --git a/Code_Exercises/oneMath_gemm/README.md b/Code_Exercises/oneMath_gemm/README.md new file mode 100644 index 00000000..da86aa37 --- /dev/null +++ b/Code_Exercises/oneMath_gemm/README.md @@ -0,0 +1,29 @@ +# SYCL Academy + +## Exercise 11: Using the oneMath library for matrix multiplication +--- + +In this exercise you will learn how to use the API of the oneMath library and +perform a matrix multiplication using the GEMM routines. + +The source code provides a template to perform GEMM using oneMath's USM/buffer +API. Please refer to the API here: +https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm + +--- +## Exercise `oneMath_usm_gemm/source.cpp` + +The `source_*.cpp` already include all the code to create input matrices and +compute a reference result serially on host. The exercise is to fill in the +sections marked with "TODO" comments to perform GEMM on a device using oneMath. + +## Build and execution hints + +To run the example: ./oneMath_usm_gemm_solution (or) ./oneMath_usm_gemm_source +To verify with CUBLAS debug info, `export CUBLAS_LOGINFO_DB=1` and `export CUBLAS_LOGDEST_DBG=stdout` + +For DevCloud via JupiterLab follow these [instructions](../devcloudJupyter.md). + +For DPC++: [instructions](../dpcpp.md). + +For AdaptiveCpp: [instructions](../adaptivecpp.md). diff --git a/Code_Exercises/OneMKL_gemm/solution_onemkl_buffer_gemm.cpp b/Code_Exercises/oneMath_gemm/solution_onemath_buffer_gemm.cpp similarity index 72% rename from Code_Exercises/OneMKL_gemm/solution_onemkl_buffer_gemm.cpp rename to Code_Exercises/oneMath_gemm/solution_onemath_buffer_gemm.cpp index acfbb592..d9c36f43 100644 --- a/Code_Exercises/OneMKL_gemm/solution_onemkl_buffer_gemm.cpp +++ b/Code_Exercises/oneMath_gemm/solution_onemath_buffer_gemm.cpp @@ -7,20 +7,20 @@ You should have received a copy of the license along with this work. If not, see . - SYCL Quick Reference + Quick Reference ~~~~~~~~~~~~~~~~~~~~ - // oneMKL APIs: - https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm + oneMath execution model: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture - // DGEMM: - https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html + oneMath GEMM API: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm */ #include #include -#include +#include #include #include @@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor& c_A, T* c_B) { ////////////////////////////////////////////////////////////////////////////////////////// void print_device_info(sycl::queue& Q) { - std::string sycl_dev_name, sycl_runtime, sycl_driver; + std::string sycl_dev_name, sycl_dev_version, sycl_driver; sycl_dev_name = Q.get_device().get_info(); sycl_driver = Q.get_device().get_info(); - sycl_runtime = Q.get_device().get_info(); - std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v" - << sycl_runtime.c_str() + sycl_dev_version = Q.get_device().get_info(); + std::cout << "Running on " << sycl_dev_name.c_str() + << ", version: " << sycl_dev_version.c_str() << ", driver version: " << sycl_driver.c_str() << std::endl; } @@ -117,28 +117,30 @@ int main() { } } - // Create a SYCL in-order queue targetting GPU device - sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}}; + // Create a SYCL queue + sycl::queue Q{}; // Prints some basic info related to the hardware print_device_info(Q); - // TODO: Allocate memory on device, (using sycl::malloc_device APIs) - // Creating 1D buffers for matrices which are bound to host memory array + // Create 1D buffers for matrices which are bound to host memory arrays sycl::buffer a{A.data(), sycl::range<1>{M * N}}; sycl::buffer b{B.data(), sycl::range<1>{N * P}}; sycl::buffer c{C_host.data(), sycl::range<1>{M * P}}; - // TODO: Use oneMKL GEMM USM API - oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans; - oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b, - ldB, a, ldA, beta, c, ldC); - Q.wait(); + // Use oneMath GEMM buffer API + oneapi::math::transpose transA = oneapi::math::transpose::nontrans; + oneapi::math::transpose transB = oneapi::math::transpose::nontrans; + oneapi::math::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b, + ldB, a, ldA, beta, c, ldC); + + // Host accessor ensures synchronisation: a read operation on the accessor + // will wait until all kernels writing to buffer "c" finished executing and + // then copy the data back to host sycl::host_accessor C_device{c}; - // Verify results from oneMKL APIs + // Verify results from oneMath int result = 0; - std::cout << "Verify results between OneMKL & Serial: "; + std::cout << "Verify results between oneMath & serial: "; result = VerifyResult(C_device, C_host.data()); return result; diff --git a/Code_Exercises/OneMKL_gemm/solution_onemkl_usm_gemm.cpp b/Code_Exercises/oneMath_gemm/solution_onemath_usm_gemm.cpp similarity index 66% rename from Code_Exercises/OneMKL_gemm/solution_onemkl_usm_gemm.cpp rename to Code_Exercises/oneMath_gemm/solution_onemath_usm_gemm.cpp index 79f61f77..e3ef8fe2 100644 --- a/Code_Exercises/OneMKL_gemm/solution_onemkl_usm_gemm.cpp +++ b/Code_Exercises/oneMath_gemm/solution_onemath_usm_gemm.cpp @@ -7,20 +7,20 @@ You should have received a copy of the license along with this work. If not, see . - SYCL Quick Reference + Quick Reference ~~~~~~~~~~~~~~~~~~~~ - // oneMKL APIs: - https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm + oneMath execution model: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture - // DGEMM: - https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html + oneMath GEMM API: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm */ #include #include -#include +#include #include #include @@ -62,12 +62,12 @@ int VerifyResult(T* c_A, T* c_B) { ////////////////////////////////////////////////////////////////////////////////////////// void print_device_info(sycl::queue& Q) { - std::string sycl_dev_name, sycl_runtime, sycl_driver; + std::string sycl_dev_name, sycl_dev_version, sycl_driver; sycl_dev_name = Q.get_device().get_info(); sycl_driver = Q.get_device().get_info(); - sycl_runtime = Q.get_device().get_info(); - std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v" - << sycl_runtime.c_str() + sycl_dev_version = Q.get_device().get_info(); + std::cout << "Running on " << sycl_dev_name.c_str() + << ", version: " << sycl_dev_version.c_str() << ", driver version: " << sycl_driver.c_str() << std::endl; } @@ -117,35 +117,42 @@ int main() { } } - // Create a SYCL in-order queue targetting GPU device - sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}}; + // Create a SYCL queue + sycl::queue Q{}; // Prints some basic info related to the hardware print_device_info(Q); - // TODO: Allocate memory on device, (using sycl::malloc_device APIs) + // Allocate memory on device, (using sycl::malloc_device APIs) T* a = sycl::malloc_device((M * N), Q); T* b = sycl::malloc_device((N * P), Q); T* c = sycl::malloc_device((M * P), Q); - Q.memcpy(a, A.data(), sizeof(T) * M * N); - Q.memcpy(b, B.data(), sizeof(T) * N * P); - - // TODO: Use oneMKL GEMM USM API - oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans; - oneapi::mkl::blas::column_major::gemm(Q, transA, transB, n, m, k, alpha, b, - ldB, a, ldA, beta, c, - ldC); // row-major - + sycl::event eventCopyA = Q.memcpy(a, A.data(), sizeof(T) * M * N); + sycl::event eventCopyB = Q.memcpy(b, B.data(), sizeof(T) * N * P); + + // Use oneMath GEMM USM API + oneapi::math::transpose transA = oneapi::math::transpose::nontrans; + oneapi::math::transpose transB = oneapi::math::transpose::nontrans; + // Pass the synchronisation events to ensure GEMM starts after inputs are + // fully copied to the device + sycl::event eventGEMM = oneapi::math::blas::column_major::gemm( + Q, transA, transB, n, m, k, alpha, b, ldB, a, ldA, beta, c, ldC, + {eventCopyA, eventCopyB}); // row-major + + // Copy the results from device to host for verification std::vector C_device(M * P); - Q.memcpy(C_device.data(), c, sizeof(T) * M * P); - Q.wait(); + // Pass the synchronisation event for the copy to wait until GEMM is finished + sycl::event eventCopyC = + Q.memcpy(C_device.data(), c, sizeof(T) * M * P, eventGEMM); + + // Wait for the copy to finish + eventCopyC.wait(); - // Verify results from oneMKL APIs + // Verify results from oneMath int result = 0; - std::cout << "Verify results between OneMKL & Serial: "; + std::cout << "Verify results between oneMath & serial: "; result = VerifyResult(C_device.data(), C_host.data()); - // TODO: Free memory from device + // Free memory from device sycl::free(a, Q); sycl::free(b, Q); sycl::free(c, Q); diff --git a/Code_Exercises/OneMKL_gemm/source_onemkl_buffer_gemm.cpp b/Code_Exercises/oneMath_gemm/source_onemath_buffer_gemm.cpp similarity index 78% rename from Code_Exercises/OneMKL_gemm/source_onemkl_buffer_gemm.cpp rename to Code_Exercises/oneMath_gemm/source_onemath_buffer_gemm.cpp index 14235b27..a1fc53d0 100644 --- a/Code_Exercises/OneMKL_gemm/source_onemkl_buffer_gemm.cpp +++ b/Code_Exercises/oneMath_gemm/source_onemath_buffer_gemm.cpp @@ -7,20 +7,20 @@ You should have received a copy of the license along with this work. If not, see . - SYCL Quick Reference + Quick Reference ~~~~~~~~~~~~~~~~~~~~ - // oneMKL APIs: - https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm + oneMath execution model: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture - // DGEMM: - https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html + oneMath GEMM API: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm */ #include #include -#include +#include #include #include @@ -62,12 +62,12 @@ int VerifyResult(sycl::host_accessor& c_A, T* c_B) { ////////////////////////////////////////////////////////////////////////////////////////// void print_device_info(sycl::queue& Q) { - std::string sycl_dev_name, sycl_runtime, sycl_driver; + std::string sycl_dev_name, sycl_dev_version, sycl_driver; sycl_dev_name = Q.get_device().get_info(); sycl_driver = Q.get_device().get_info(); - sycl_runtime = Q.get_device().get_info(); - std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v" - << sycl_runtime.c_str() + sycl_dev_version = Q.get_device().get_info(); + std::cout << "Running on " << sycl_dev_name.c_str() + << ", version: " << sycl_dev_version.c_str() << ", driver version: " << sycl_driver.c_str() << std::endl; } @@ -117,22 +117,21 @@ int main() { } } - // Create a SYCL in-order queue targetting GPU device - sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}}; + // Create a SYCL queue + sycl::queue Q{}; // Prints some basic info related to the hardware print_device_info(Q); - // TODO: Allocate memory on device - // Creating 1D buffers for matrices which are bound to host memory array + // TODO: Create 1D buffers for matrices which are bound to host memory arrays - // TODO: Use oneMKL GEMM USM API + // TODO: Use oneMath GEMM buffer API // TODO: Copy the results from device to host for verification // Hint: Use sycl::host_accessor - // Verify results from oneMKL APIs + // Verify results from oneMath int result = 0; - std::cout << "Verify results between OneMKL & Serial: "; + std::cout << "Verify results between oneMath & serial: "; // TODO: Uncomment the following line verify the results // result = VerifyResult(C_device, C_host); diff --git a/Code_Exercises/OneMKL_gemm/source_onemkl_usm_gemm.cpp b/Code_Exercises/oneMath_gemm/source_onemath_usm_gemm.cpp similarity index 80% rename from Code_Exercises/OneMKL_gemm/source_onemkl_usm_gemm.cpp rename to Code_Exercises/oneMath_gemm/source_onemath_usm_gemm.cpp index 32fb55e4..17b5a0fa 100644 --- a/Code_Exercises/OneMKL_gemm/source_onemkl_usm_gemm.cpp +++ b/Code_Exercises/oneMath_gemm/source_onemath_usm_gemm.cpp @@ -7,20 +7,20 @@ You should have received a copy of the license along with this work. If not, see . - SYCL Quick Reference + Quick Reference ~~~~~~~~~~~~~~~~~~~~ - // oneMKL APIs: - https://spec.oneapi.io/versions/latest/elements/oneMKL/source/domains/blas/gemm.html#onemkl-blas-gemm + oneMath execution model: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/architecture/architecture - // DGEMM: - https://www.intel.com/content/www/us/en/docs/onemkl/tutorial-c/2021-4/multiplying-matrices-using-dgemm.html + oneMath GEMM API: + https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/domains/blas/gemm */ #include #include -#include +#include #include #include @@ -62,12 +62,12 @@ int VerifyResult(T* c_A, T* c_B) { ////////////////////////////////////////////////////////////////////////////////////////// void print_device_info(sycl::queue& Q) { - std::string sycl_dev_name, sycl_runtime, sycl_driver; + std::string sycl_dev_name, sycl_dev_version, sycl_driver; sycl_dev_name = Q.get_device().get_info(); sycl_driver = Q.get_device().get_info(); - sycl_runtime = Q.get_device().get_info(); - std::cout << "Running on " << sycl_dev_name.c_str() << ", SYCL runtime: v" - << sycl_runtime.c_str() + sycl_dev_version = Q.get_device().get_info(); + std::cout << "Running on " << sycl_dev_name.c_str() + << ", version: " << sycl_dev_version.c_str() << ", driver version: " << sycl_driver.c_str() << std::endl; } @@ -117,20 +117,20 @@ int main() { } } - // Create a SYCL in-order queue targetting GPU device - sycl::queue Q{sycl::gpu_selector_v, sycl::property::queue::in_order{}}; + // Create a SYCL queue + sycl::queue Q{}; // Prints some basic info related to the hardware print_device_info(Q); // TODO: Allocate memory on device, (using sycl::malloc_device APIs) - // TODO: Use oneMKL GEMM USM API + // TODO: Use oneMath GEMM USM API // TODO: Copy the results from device to host for verification - // Verify results from oneMKL APIs + // Verify results from oneMath int result = 0; - std::cout << "Verify results between OneMKL & Serial: "; + std::cout << "Verify results between oneMath & serial: "; // TODO: Uncomment the following line verify the results // result = VerifyResult(C_device, C_host);