From 92f43bafb0b83cd73f2f83926cbd5db67af7658c Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:09:31 +0200 Subject: [PATCH 1/6] Use config from file for clang-tidy, remove cert-dcl37-c, cert-dcl51-cpp, *-braces-around-statements --- .clang-tidy | 18 +++++++- CMakeLists.txt | 5 --- addkernels/addkernels.cpp | 20 +++++++++ addkernels/include_inliner.cpp | 2 + cmake/ClangTidy.cmake | 3 +- speedtests/sequences.cpp | 2 + src/bz2.cpp | 6 +++ src/comgr.cpp | 2 + src/conv/invokers/mlir_impl_gemm.cpp | 4 ++ src/conv/solver_finders.cpp | 2 + src/convolution.cpp | 6 +++ src/convolution_api.cpp | 34 ++++++++++++++ src/ctc.cpp | 2 + src/db_path.cpp.in | 4 ++ src/driver_arguments.cpp | 2 + src/exec_utils.cpp | 2 + src/find_controls.cpp | 22 ++++++++++ src/fusion.cpp | 2 + src/gemm_v2.cpp | 34 ++++++++++++++ src/hip/batched_transpose_sol.cpp | 7 ++- src/hip/handlehip.cpp | 2 + src/hipoc/hipoc_program.cpp | 6 +++ src/include/miopen/conv/asm_implicit_gemm.hpp | 4 ++ src/include/miopen/convolution.hpp | 2 + src/include/miopen/db_record.hpp | 2 + src/include/miopen/env.hpp | 4 ++ src/include/miopen/execution_context.hpp | 4 ++ src/include/miopen/find_solution.hpp | 8 ++++ src/include/miopen/generic_search.hpp | 4 ++ src/include/miopen/hipoc_kernel.hpp | 2 + src/include/miopen/kern_db.hpp | 6 +++ src/include/miopen/magic_div.hpp | 2 + src/include/miopen/sequences.hpp | 4 ++ src/include/miopen/solver.hpp | 4 ++ .../miopen/solver/ck_utility_common.hpp | 4 ++ .../miopen/solver/implicitgemm_util.hpp | 10 ++++- src/include/miopen/sqlite_db.hpp | 18 ++++++++ src/include/miopen/tensor_ops.hpp | 2 + src/include/miopen/tensor_reorder_util.hpp | 10 +++++ src/invoker_cache.cpp | 6 +++ src/kernel_cache.cpp | 2 + src/kernels/hip_float8.hpp | 4 ++ src/logger.cpp | 29 ++++++------ src/ocl/convolutionocl.cpp | 18 ++++++++ src/ocl/ctcocl.cpp | 10 +++++ src/ocl/rnnocl.cpp | 18 ++++++++ src/operator.cpp | 2 + src/problem.cpp | 4 ++ src/reducetensor.cpp | 44 +++++++++++++++++++ src/reducetensor_api.cpp | 20 ++++++--- src/rnn.cpp | 8 ++++ src/rnn/Solutions/rnn_transformer.cpp | 22 ++++++++++ src/rnn/rnn_util.cpp | 16 +++++++ src/rnn_api.cpp | 40 ++++++++++++----- src/seq_tensor.cpp | 10 +++++ src/solution.cpp | 8 ++++ src/solver/conv_MP_bidirectional_winograd.cpp | 24 ++++++++-- src/solver/conv_asm_1x1u_bias_activ_fused.cpp | 4 ++ src/solver/conv_asm_dir_BwdWrW1x1.cpp | 4 ++ src/solver/conv_asm_dir_BwdWrW3x3.cpp | 32 ++++++++++++++ .../conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp | 8 ++++ src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 2 + .../conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp | 8 ++++ .../conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp | 10 +++++ .../conv_asm_implicit_gemm_v4r1_dynamic.cpp | 14 ++++-- ...m_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp | 6 +++ ...onv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp | 4 +- src/solver/conv_bin_winoRxS_fused.cpp | 6 +++ .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 2 + src/solver/conv_direct_naive_conv.cpp | 16 +++++++ src/solver/conv_direct_naive_conv_bwd.cpp | 2 + .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 4 ++ ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 2 + .../conv_hip_implicit_gemm_fwd_v4r1.cpp | 4 ++ ...licit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp | 2 + ...conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp | 2 + ...licit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp | 2 + src/solver/conv_multipass_wino3x3WrW.cpp | 28 ++++++++++++ src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 4 +- src/solver/conv_ocl_dir2D_bwdWrW_2.cpp | 2 + src/solver/conv_ocl_dir2Dfwd1x1.cpp | 4 +- .../conv_ocl_dir2Dfwd_exhaustive_search.cpp | 6 +++ src/solver/conv_winoRxS.cpp | 2 + src/solver/conv_wino_fury_RxS.cpp | 2 + src/solver/fft.cpp | 14 ++++++ src/solver/gemm.cpp | 10 +++++ src/solver/gemm_bwd.cpp | 12 +++++ src/sqlite_db.cpp | 2 + src/tensor.cpp | 2 + test/cbna_inference.cpp | 4 ++ test/conv_common.hpp | 17 ++++++- test/cpu_conv.hpp | 4 ++ test/cpu_reduce_util.hpp | 8 ++++ test/ctc.cpp | 20 +++++++++ test/driver.hpp | 14 ++++++ test/dropout.cpp | 2 + test/dropout_util.hpp | 10 +++++ test/gpu_nchw_nhwc_transpose.cpp | 23 +++++----- test/gpu_reference_kernel.cpp | 23 +++++----- test/handle_test.cpp | 28 ++++++++++++ test/na_inference.cpp | 2 + test/na_train.cpp | 2 + test/perfdb.cpp | 14 ++++++ test/pooling_common.hpp | 6 ++- test/reduce_test.cpp | 28 +++++++++++- test/soft_max.cpp | 24 ++++++++++ test/sqlite_perfdb.cpp | 12 +++++ test/tensor_reorder.cpp | 25 ++++++----- test/tensor_trans.cpp | 10 +++++ test/tensor_util.hpp | 4 ++ test/tensor_vec.cpp | 8 ++++ 111 files changed, 977 insertions(+), 95 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5c2b781687..e0e491d787 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,3 +1,17 @@ CheckOptions: - - key: bugprone-reserved-identifier.AllowedIdentifiers - value: '__HIP_PLATFORM_HCC__;__HIP_ROCclr__' + - key: google-readability-braces-around-statements.ShortStatementLines + value: '6' + - key: hicpp-braces-around-statements.ShortStatementLines + value: '6' + - key: readability-braces-around-statements.ShortStatementLines +# TODO: +# Current value is 6. Even 4 is too much, but clang-tidy counts all lines after if(...) and with 2 +# it generates warning even for trivial if-else statement: +# if(...) +# do_this(); +# else +# do_that(); +# This also applies to aliases: +# google-readability-braces-around-statements and +# hicpp-braces-around-statements + value: '6' diff --git a/CMakeLists.txt b/CMakeLists.txt index 79e0bf7f7c..23187afef2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -559,8 +559,6 @@ enable_clang_tidy( -bugprone-macro-parentheses # too many narrowing conversions in our code -bugprone-narrowing-conversions - -cert-dcl37-c - -cert-dcl51-cpp -cert-env33-c # Yea we shouldn't be using rand() -cert-msc30-c @@ -589,12 +587,10 @@ enable_clang_tidy( -cppcoreguidelines-special-member-functions -fuchsia-* -google-explicit-constructor - -google-readability-braces-around-statements -google-readability-todo -google-runtime-int -google-runtime-references -hicpp-avoid-c-arrays - -hicpp-braces-around-statements -hicpp-explicit-conversions -hicpp-named-parameter -hicpp-no-array-decay @@ -630,7 +626,6 @@ enable_clang_tidy( -modernize-concat-nested-namespaces -modernize-unary-static-assert -performance-unnecessary-value-param - -readability-braces-around-statements -readability-convert-member-functions-to-static -readability-else-after-return # TODO We are not ready to use it, but very useful. diff --git a/addkernels/addkernels.cpp b/addkernels/addkernels.cpp index 03c4dd5a5f..cf3a8cdf5f 100644 --- a/addkernels/addkernels.cpp +++ b/addkernels/addkernels.cpp @@ -173,14 +173,20 @@ void Process(const std::string& sourcePath, try { if(is_asm) + { inliner.Process( sourceFile, inlinerTemp, root, sourcePath, ".include", false, recurse); + } else if(is_cl || is_header) + { inliner.Process( sourceFile, inlinerTemp, root, sourcePath, "#include", true, recurse); + } else if(is_hip) + { inliner.Process( sourceFile, inlinerTemp, root, sourcePath, "<#not_include>", true, false); + } } catch(const InlineException& ex) { @@ -261,19 +267,33 @@ int main(int argsn, char** args) target = &targetFile; } else if(arg == "l" || arg == "line-size") + { lineSize = std::stol(args[++i]); + } else if(arg == "b" || arg == "buffer") + { bufferSize = std::stol(args[++i]); + } else if(arg == "g" || arg == "guard") + { guard = args[++i]; + } else if(arg == "n" || arg == "no-recurse") + { recurse = false; + } else if(arg == "m" || arg == "mark-includes") + { mark_includes = true; + } else if(arg == "e" || arg == "extern") + { as_extern = true; + } else + { UnknownArgument(arg); + } } WrongUsage("source key is required"); diff --git a/addkernels/include_inliner.cpp b/addkernels/include_inliner.cpp index 0c0cc9c024..00aeb061b4 100644 --- a/addkernels/include_inliner.cpp +++ b/addkernels/include_inliner.cpp @@ -164,8 +164,10 @@ void IncludeInliner::ProcessCore(std::istream& input, std::ifstream include_file(abs_include_file_path, std::ios::in); if(!include_file.good()) + { throw IncludeCantBeOpenedException(include_file_path, GetIncludeStackTrace(current_line)); + } ProcessCore(include_file, output, diff --git a/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake index fc3eb75315..c0d71c2535 100644 --- a/cmake/ClangTidy.cmake +++ b/cmake/ClangTidy.cmake @@ -149,9 +149,8 @@ function(clang_tidy_check TARGET) string(MAKE_C_IDENTIFIER "${SOURCE}" tidy_file) set(tidy_target tidy-target-${TARGET}-${tidy_file}) add_custom_target(${tidy_target} - # for some targets clang-tidy not able to get information from .clang-tidy DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} ${SOURCE} "-config-file=${PROJECT_SOURCE_DIR}/.clang-tidy" "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) diff --git a/speedtests/sequences.cpp b/speedtests/sequences.cpp index d9937b6fac..9ba3b1c21b 100644 --- a/speedtests/sequences.cpp +++ b/speedtests/sequences.cpp @@ -371,8 +371,10 @@ struct SpeedTestDriver : public test_driver const auto start = std::chrono::steady_clock::now(); for(auto i = 0; i < iterations; i++) + { for(auto j = 0; j < 128 * 1024 * 1024; j++) rule_getter().Next(td); + } const auto time = std::chrono::duration_cast( std::chrono::steady_clock::now() - start) diff --git a/src/bz2.cpp b/src/bz2.cpp index aeeae0f64a..a17b3f4e28 100644 --- a/src/bz2.cpp +++ b/src/bz2.cpp @@ -34,16 +34,22 @@ void check_bz2_error(int e, const std::string& name) if(e == BZ_MEM_ERROR) throw std::runtime_error(name + " failed: out of memory!"); if(e == BZ_OUTBUFF_FULL) + { throw std::runtime_error(name + " failed: the size of the compressed data exceeds *destLen"); + } if(e == BZ_PARAM_ERROR) throw std::runtime_error(name + " failed: bad parameters given to function"); if(e == BZ_DATA_ERROR) + { throw std::runtime_error( name + " failed: a data integrity error was detected in the compressed data"); + } if(e == BZ_DATA_ERROR_MAGIC) + { throw std::runtime_error( name + " failed: the compressed data doesn't begin with the right magic bytes"); + } if(e == BZ_UNEXPECTED_EOF) throw std::runtime_error(name + " failed: the compressed data ends unexpectedly"); throw std::runtime_error(name + " failed: unknown error!"); diff --git a/src/comgr.cpp b/src/comgr.cpp index f492eef1a1..32477b08ba 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -641,8 +641,10 @@ class Dataset : ComgrOwner const char name[] = "hip.pch"; const Data d(AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER); if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES{})) + { MIOPEN_LOG_I(name << ' ' << size << " bytes, ptr = " << static_cast(content)); + } d.SetName(name); d.SetFromBuffer(content, size); AddData(d); diff --git a/src/conv/invokers/mlir_impl_gemm.cpp b/src/conv/invokers/mlir_impl_gemm.cpp index 541975c36f..d60424f179 100644 --- a/src/conv/invokers/mlir_impl_gemm.cpp +++ b/src/conv/invokers/mlir_impl_gemm.cpp @@ -342,6 +342,7 @@ InvokerFactory MakeMlirFwdInvokerFactory(const miopen::ProblemDescription& probl handle.Run(kernels[0])(args); #endif if(needs_output_cast) + { CastTensor(handle, &lowp_quant, outConvDesc, @@ -350,6 +351,7 @@ InvokerFactory MakeMlirFwdInvokerFactory(const miopen::ProblemDescription& probl tensors.out, 0, 0); + } }; }; } @@ -434,9 +436,11 @@ InvokerFactory MakeMlirWrWInvokerFactory(const miopen::ProblemDescription& probl const auto workspaceSize = wrw_invoke_params.workSpaceSize; if((workspace == nullptr) || (workspaceSize < workspace_req)) + { MIOPEN_THROW("Not enough workspace for MLIR WRW (" + std::to_string(workspaceSize) + " provided, " + std::to_string(workspace_req) + " required)"); + } TensorDescriptor workspaceDesc( miopenFloat, tensors.dwDesc.GetLengths(), tensors.dwDesc.GetStrides()); diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index 809f333bd8..bf96c9f13c 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -305,8 +305,10 @@ void FindCore(const AnyInvokeParams& invoke_ctx, const auto network_config = problem.MakeNetworkConfig(); for(const auto& ss : solutions) + { if(!ss.second.empty()) EvaluateInvokers(handle, ss.second, ss.first, network_config, invoke_ctx, record); + } } bool IsAlgorithmDisabled(miopenConvAlgorithm_t algo) diff --git a/src/convolution.cpp b/src/convolution.cpp index ac7c28fdc4..58835db36c 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -503,19 +503,23 @@ void ConvolutionAttribute::Set(miopenConvolutionAttrib_t attr, int value) if(attr == MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL) { if(value < -1 || value > 1) + { MIOPEN_THROW(miopenStatusBadParm, "[Set conv attribute] Error: Attempt to set invalid value of " "MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL: " + std::to_string(value)); + } gfx90aFp16alt.value = value; } else if(attr == MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC) { if(value < 0 || value > 1) + { MIOPEN_THROW(miopenStatusBadParm, "[Set conv attribute] Error: Attemp to set invalid value for " "MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC: " + std::to_string(value)); + } deterministic.value = value; } else if(attr == MIOPEN_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE) @@ -523,10 +527,12 @@ void ConvolutionAttribute::Set(miopenConvolutionAttrib_t attr, int value) const auto rounding_mode = static_cast(value); if(rounding_mode != miopenF8RoundingModeStochastic && rounding_mode != miopenF8RoundingModeStandard) + { MIOPEN_THROW(miopenStatusBadParm, "[Set conv attribute] Error: Attempt to set invalid value for " "MIOPEN_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE" + std::to_string(value)); + } fp8rounding_mode.rounding_mode = rounding_mode; } else diff --git a/src/convolution_api.cpp b/src/convolution_api.cpp index 555b90f926..505eb4e1f8 100644 --- a/src/convolution_api.cpp +++ b/src/convolution_api.cpp @@ -490,6 +490,7 @@ miopenFindConvolutionForwardAlgorithm(miopenHandle_t handle, xDesc, wDesc, convDesc, yDesc, miopen::debug::ConvDirection::Fwd, false); /// workaround for previous trans conv logic if(miopen::deref(convDesc).mode == miopenTranspose) + { return miopen::try_([&] { miopen::deref(convDesc).FindConvBwdDataAlgorithm(miopen::deref(handle), miopen::deref(xDesc), @@ -512,6 +513,7 @@ miopenFindConvolutionForwardAlgorithm(miopenHandle_t handle, static_cast(perfResults[i].bwd_data_algo); } }); + } return miopen::try_([&] { miopen::deref(convDesc).FindConvFwdAlgorithm(miopen::deref(handle), @@ -563,6 +565,7 @@ extern "C" miopenStatus_t miopenConvolutionForward(miopenHandle_t handle, /// workaround for previous trans conv logic if(miopen::deref(convDesc).mode == miopenTranspose) + { return miopen::try_([&] { // It is guaranteed that enum values are equal, see conv_algo_name.cpp const auto algo_trans = static_cast(algo); @@ -579,6 +582,7 @@ extern "C" miopenStatus_t miopenConvolutionForward(miopenHandle_t handle, DataCast(workSpace), workSpaceSize); }); + } return miopen::try_([&] { miopen::deref(convDesc).ConvolutionForward(miopen::deref(handle), @@ -654,8 +658,10 @@ static inline void ReturnSolutions(const std::vector& solu if(solution_count_ret != nullptr) *solution_count_ret = solutions.size(); if(solutions_ret != nullptr) + { for(auto i = 0; i < solutions.size(); ++i) solutions_ret[i] = solutions[i]; + } } extern "C" miopenStatus_t @@ -694,19 +700,23 @@ miopenConvolutionForwardGetSolutionWorkspaceSize(miopenHandle_t handle, MIOPEN_LOG_FUNCTION(handle, wDesc, xDesc, convDesc, yDesc, solution_id, workSpaceSize); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { *workSpaceSize = miopen::deref(convDesc).GetBackwardSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(xDesc), miopen::deref(wDesc), miopen::deref(yDesc), solution_id); + } else + { *workSpaceSize = miopen::deref(convDesc).GetForwardSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(wDesc), miopen::deref(xDesc), miopen::deref(yDesc), solution_id); + } }); } @@ -747,6 +757,7 @@ miopenConvolutionForwardImmediate(miopenHandle_t handle, return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { miopen::deref(convDesc).ConvolutionBackwardImmediate(miopen::deref(handle), miopen::deref(xDesc), DataCast(x), @@ -757,7 +768,9 @@ miopenConvolutionForwardImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } else + { miopen::deref(convDesc).ConvolutionForwardImmediate(miopen::deref(handle), miopen::deref(wDesc), DataCast(w), @@ -768,6 +781,7 @@ miopenConvolutionForwardImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } }); } @@ -825,19 +839,23 @@ miopenConvolutionBackwardDataGetSolutionWorkspaceSize(miopenHandle_t handle, MIOPEN_LOG_FUNCTION(handle, dyDesc, wDesc, convDesc, dxDesc, solution_id, workSpaceSize); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { *workSpaceSize = miopen::deref(convDesc).GetForwardSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(wDesc), miopen::deref(dyDesc), miopen::deref(dxDesc), solution_id); + } else + { *workSpaceSize = miopen::deref(convDesc).GetBackwardSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(dyDesc), miopen::deref(wDesc), miopen::deref(dxDesc), solution_id); + } }); } @@ -877,6 +895,7 @@ miopenConvolutionBackwardDataImmediate(miopenHandle_t handle, dxDesc, wDesc, convDesc, dyDesc, miopen::debug::ConvDirection::Bwd, true); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { miopen::deref(convDesc).ConvolutionForwardImmediate(miopen::deref(handle), miopen::deref(wDesc), DataCast(w), @@ -887,7 +906,9 @@ miopenConvolutionBackwardDataImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } else + { miopen::deref(convDesc).ConvolutionBackwardImmediate(miopen::deref(handle), miopen::deref(dyDesc), DataCast(dy), @@ -898,6 +919,7 @@ miopenConvolutionBackwardDataImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } }); } @@ -955,19 +977,23 @@ extern "C" miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionWorkspaceSi MIOPEN_LOG_FUNCTION(handle, dyDesc, xDesc, convDesc, dwDesc, solution_id, workSpaceSize); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { *workSpaceSize = miopen::deref(convDesc).GetWrwSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(xDesc), miopen::deref(dyDesc), miopen::deref(dwDesc), solution_id); + } else + { *workSpaceSize = miopen::deref(convDesc).GetWrwSolutionWorkspaceSize(miopen::deref(handle), miopen::deref(dyDesc), miopen::deref(xDesc), miopen::deref(dwDesc), solution_id); + } }); } @@ -1007,6 +1033,7 @@ miopenConvolutionBackwardWeightsImmediate(miopenHandle_t handle, xDesc, dwDesc, convDesc, dyDesc, miopen::debug::ConvDirection::WrW, true); return miopen::try_([&] { if(miopen::deref(convDesc).mode == miopenTranspose) + { miopen::deref(convDesc).ConvolutionWrwImmediate(miopen::deref(handle), miopen::deref(xDesc), DataCast(x), @@ -1017,7 +1044,9 @@ miopenConvolutionBackwardWeightsImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } else + { miopen::deref(convDesc).ConvolutionWrwImmediate(miopen::deref(handle), miopen::deref(dyDesc), DataCast(dy), @@ -1028,6 +1057,7 @@ miopenConvolutionBackwardWeightsImmediate(miopenHandle_t handle, DataCast(workSpace), workSpaceSize, solution_id); + } }); } @@ -1067,6 +1097,7 @@ miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t handle, dxDesc, wDesc, convDesc, dyDesc, miopen::debug::ConvDirection::Bwd, false); /// workaround for previous trans conv logic if(miopen::deref(convDesc).mode == miopenTranspose) + { return miopen::try_([&] { miopen::deref(convDesc).FindConvFwdAlgorithm(miopen::deref(handle), miopen::deref(dyDesc), @@ -1089,6 +1120,7 @@ miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t handle, static_cast(perfResults[i].fwd_algo); } }); + } return miopen::try_([&] { miopen::deref(convDesc).FindConvBwdDataAlgorithm(miopen::deref(handle), @@ -1141,6 +1173,7 @@ miopenConvolutionBackwardData(miopenHandle_t handle, /// workaround for previous trans conv logic if(miopen::deref(convDesc).mode == miopenTranspose) + { return miopen::try_([&] { // It is guaranteed that enum values are equal, see conv_algo_name.cpp const auto algo_trans = static_cast(algo); @@ -1157,6 +1190,7 @@ miopenConvolutionBackwardData(miopenHandle_t handle, DataCast(workSpace), workSpaceSize); }); + } return miopen::try_([&] { miopen::deref(convDesc).ConvolutionBackwardData(miopen::deref(handle), diff --git a/src/ctc.cpp b/src/ctc.cpp index f1d5be69ad..4cc3ffe1b0 100644 --- a/src/ctc.cpp +++ b/src/ctc.cpp @@ -83,8 +83,10 @@ size_t CTCLossDescriptor::GetCTCLossWorkspaceSize(Handle& handle, MIOPEN_THROW(miopenStatusBadParm, "Wrong label id at batch"); } if(j > 0) + { if(labels[labels_offset[i] + j] == labels[labels_offset[i] + j - 1]) repeat[i]++; + } } if(labelLengths[i] + repeat[i] > inputLengths[i]) diff --git a/src/db_path.cpp.in b/src/db_path.cpp.in index 113b5fdcf4..4deb26ea9c 100644 --- a/src/db_path.cpp.in +++ b/src/db_path.cpp.in @@ -68,7 +68,9 @@ std::string GetSystemDbPath() auto p = GetStringEnv(MIOPEN_SYSTEM_DB_PATH{}); if(p == nullptr) #if MIOPEN_BUILD_DEV + { return "${MIOPEN_SYSTEM_DB_PATH}"; + } #else { // Get the module path and construct the db path @@ -77,7 +79,9 @@ std::string GetSystemDbPath() } #endif else + { return p; + } } namespace { diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index d9e82d270a..ac71b3974c 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -228,12 +228,14 @@ std::string BnormArgsForMIOpenDriver(miopenTensorDescriptor_t xDesc, } ss << " -M " << bn_mode; // clang-format on if(print_for_bn_driver) + { BnDriverInfo(ss, dir, resultRunningMean, resultRunningVariance, resultSaveMean, resultSaveInvVariance); + } return ss.str(); } diff --git a/src/exec_utils.cpp b/src/exec_utils.cpp index 27a064b837..da35f11bab 100644 --- a/src/exec_utils.cpp +++ b/src/exec_utils.cpp @@ -63,8 +63,10 @@ int Run(const std::string& p, std::istream* in, std::ostream* out) if(redirect_stdout) { while(feof(pipe.get()) == 0) + { if(fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) *out << buffer.data(); + } } else { diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 5c5086d3d7..d7814eeebf 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -75,15 +75,25 @@ FindEnforceAction GetFindEnforceActionImpl() for(auto& c : str) c = toupper(static_cast(c)); if(str == "NONE") + { return FindEnforceAction::None; + } else if(str == "DB_UPDATE") + { return FindEnforceAction::DbUpdate; + } else if(str == "SEARCH") + { return FindEnforceAction::Search; + } else if(str == "SEARCH_DB_UPDATE") + { return FindEnforceAction::SearchDbUpdate; + } else if(str == "DB_CLEAN") + { return FindEnforceAction::DbClean; + } else { // Nop. Fall down & try numerics. } @@ -121,10 +131,14 @@ boost::optional> GetEnvFindOnlySolverImpl() numeric_id = solver::Id{solver::Id{numeric_id}.ToString()}.Value(); } if(numeric_id != 0) + { MIOPEN_LOG_NQI(numeric_id); + } else + { MIOPEN_THROW(miopenStatusBadParm, "Invalid value of MIOPEN_DEBUG_FIND_ONLY_SOLVER: " + kinder); + } const auto id = solver::Id{numeric_id}; if(id.IsValid()) { @@ -188,13 +202,21 @@ FindMode::Values GetFindModeValueImpl2() for(auto& c : str) c = toupper(static_cast(c)); if(str == "NORMAL") + { return FindMode::Values::Normal; + } else if(str == "FAST") + { return FindMode::Values::Fast; + } else if(str == "HYBRID") + { return FindMode::Values::Hybrid; + } else if(str == "DYNAMIC_HYBRID") + { return FindMode::Values::DynamicHybrid; + } else { // Nop. Fall down & try numerics. } diff --git a/src/fusion.cpp b/src/fusion.cpp index 7e1f4a3fa4..9bcaf7d6bc 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -677,8 +677,10 @@ miopenStatus_t BiasFusionOpDescriptor::SetArgs(OperatorArgs& args, std::string FusionPlanDescriptor::GetAlgorithmName(const Handle& /*handle*/) { if(conv_fwd_algo) + { return miopen::ConvolutionAlgoToDirectionalString( static_cast(*conv_fwd_algo), miopen::conv::Direction::Forward); + } MIOPEN_THROW(miopenStatusBadParm, "GetAlgorithmName was called, but Algorithm has not been set"); } diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index e080074bfb..08f57649f0 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -90,15 +90,25 @@ FlagsForRocblasFp32Fp16Call(const miopen::GemmDescriptor& desc) // bool gfx90aFp static inline rocblas_computetype rocBlasComputeType_ex3(const miopen::GemmDescriptor& desc) { if(desc.a_cast_type == miopenFloat8 && desc.b_cast_type == miopenFloat8) + { return rocblas_compute_type_f8_f8_f32; + } else if(desc.a_cast_type == miopenFloat8 && desc.b_cast_type == miopenBFloat8) + { return rocblas_compute_type_f8_bf8_f32; + } else if(desc.a_cast_type == miopenBFloat8 && desc.b_cast_type == miopenFloat8) + { return rocblas_compute_type_bf8_f8_f32; + } else if(desc.a_cast_type == miopenBFloat8 && desc.b_cast_type == miopenBFloat8) + { return rocblas_compute_type_bf8_bf8_f32; + } else + { return rocblas_compute_type_f32; + } } #endif @@ -481,9 +491,13 @@ miopenStatus_t CallGemm(const Handle& handle, gemm_desc.a_cast_type == miopenBFloat8) || (gemm_desc.b_cast_type == miopenBFloat8 || gemm_desc.b_cast_type == miopenFloat8)) + { return true; + } else + { return false; + } }(); // ex3 API only works on the gfx94x ASIC; if(needs_ex3) @@ -494,8 +508,10 @@ miopenStatus_t CallGemm(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } } else { @@ -607,8 +623,10 @@ miopenStatus_t CallGemm(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } }; break; @@ -734,9 +752,13 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, gemm_desc.a_cast_type == miopenBFloat8) || (gemm_desc.b_cast_type == miopenBFloat8 || gemm_desc.b_cast_type == miopenFloat8)) + { return true; + } else + { return false; + } }(); // ex3 API only works on the gfx94x ASIC; if(needs_ex3) @@ -747,8 +769,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } } else { @@ -873,8 +897,10 @@ miopenStatus_t CallGemmStridedBatched(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } break; } @@ -1002,9 +1028,13 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, gemm_desc.a_cast_type == miopenBFloat8) || (gemm_desc.b_cast_type == miopenBFloat8 || gemm_desc.b_cast_type == miopenFloat8)) + { return true; + } else + { return false; + } }(); // ex3 API only works on the gfx94x ASIC; if(needs_ex3) @@ -1015,8 +1045,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } } else { @@ -1138,8 +1170,10 @@ miopenStatus_t CallGemmStridedBatchedSequential(const Handle& handle, handle, gemm_desc, A, a_offset, B, b_offset, C, c_offset); } else + { MIOPEN_THROW(miopenStatusBadParm, "8-bit floating types are only supported on gfx94x"); + } break; } diff --git a/src/hip/batched_transpose_sol.cpp b/src/hip/batched_transpose_sol.cpp index 01349775ca..6f1353bd24 100644 --- a/src/hip/batched_transpose_sol.cpp +++ b/src/hip/batched_transpose_sol.cpp @@ -252,9 +252,12 @@ static inline BatchedTransposeParam HeuristicGet(const ExecutionContext& ctx, for(auto it = kernel_list.rbegin(); it != kernel_list.rend(); it++) { - if(it->tile_x == 4 || it->tile_y == 4) // We don't want such kernel to be selected here, - // they should be used in above cases + if(it->tile_x == 4 || it->tile_y == 4) + { + // We don't want such kernel to be selected here, + // they should be used in above cases continue; + } if(!IsApplicable(batch, height, width, &(*it))) continue; std::size_t current_padding_size = GetExtraPaddingSize(batch, height, width, &(*it)); diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index ff6d27d26e..567cd9b3c9 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -355,6 +355,7 @@ void Handle::ReserveExtraStreamsInPool(int cnt) const int last_stream_id = this->impl->ms_resourse_ptr->stream_pool.size(); if(last_stream_id < cnt) + { for(; last_stream_id < cnt; last_stream_id++) { auto new_stream = this->impl->create_stream_non_blocking(); @@ -365,6 +366,7 @@ void Handle::ReserveExtraStreamsInPool(int cnt) const this->impl->ms_resourse_ptr->add_stream(std::move(new_stream)); #endif } + } } miopenAcceleratorQueue_t Handle::GetStream() const diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 8a87d0d3f7..e4c2546564 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -292,13 +292,19 @@ void HIPOCProgramImpl::BuildCodeObjectInMemory(const std::string& params, comgr::BuildHip(filename, src, params, target, binary); } else if(miopen::EndsWith(filename, ".s")) + { comgr::BuildAsm(filename, src, params, target, binary); + } #if MIOPEN_USE_MLIR else if(miopen::EndsWith(filename, ".mlir")) + { MiirGenBin(params, binary); + } #endif else + { comgr::BuildOcl(filename, src, params, target, binary); + } } if(binary.empty()) MIOPEN_THROW("Code object build failed. Source: " + filename); diff --git a/src/include/miopen/conv/asm_implicit_gemm.hpp b/src/include/miopen/conv/asm_implicit_gemm.hpp index 675df33871..1ca9f49a5c 100644 --- a/src/include/miopen/conv/asm_implicit_gemm.hpp +++ b/src/include/miopen/conv/asm_implicit_gemm.hpp @@ -255,7 +255,9 @@ static inline int igemm_split_batch_size(const int hi, // max_n * image_size <= max_tensor_size size_t max_n = max_tensor_size / image_size; if(max_n > n) + { max_n = n % max_n; + } else if(max_n < n) { // find the smallest multiple m of n such that (n / m) * image_size <= max_tensor_size. @@ -265,7 +267,9 @@ static inline int igemm_split_batch_size(const int hi, while(n % max_n != 0) { if(n % m == 0) + { max_n = n / m; + } else { m += 1; diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 35c494eab2..fc6ea04228 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -123,8 +123,10 @@ struct ConvolutionAttribute inline miopenF8RoundingMode_t Get() const { if(nullptr != miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) + { return static_cast( miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); + } return rounding_mode; } diff --git a/src/include/miopen/db_record.hpp b/src/include/miopen/db_record.hpp index f0fdf25b0e..af898f9941 100644 --- a/src/include/miopen/db_record.hpp +++ b/src/include/miopen/db_record.hpp @@ -230,8 +230,10 @@ class DbRecord const bool ok = values.Deserialize(s); if(!ok) + { MIOPEN_LOG_WE( "Perf db record is obsolete or corrupt: " << s << ". Performance may degrade."); + } return ok; } diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 7e91a2a2c8..44b402fda4 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -51,7 +51,9 @@ inline bool IsEnvvarValueDisabled(const char* name) // NOLINTNEXTLINE (concurrency-mt-unsafe) const auto value_env_p = std::getenv(name); if(value_env_p == nullptr) + { return false; + } else { std::string value_env_str = value_env_p; @@ -76,7 +78,9 @@ inline bool IsEnvvarValueEnabled(const char* name) // NOLINTNEXTLINE (concurrency-mt-unsafe) const auto value_env_p = std::getenv(name); if(value_env_p == nullptr) + { return false; + } else { std::string value_env_str = value_env_p; diff --git a/src/include/miopen/execution_context.hpp b/src/include/miopen/execution_context.hpp index d2195d6061..aa76a9788f 100644 --- a/src/include/miopen/execution_context.hpp +++ b/src/include/miopen/execution_context.hpp @@ -219,10 +219,14 @@ struct ExecutionContext try { if(pos != std::string::npos) + { cur_count = std::stoi(fname.substr(pos + 1)); + } else + { cur_count = std::stoi(fname.substr(db_id.length()), nullptr, 16); + } } catch(const std::exception& e) { diff --git a/src/include/miopen/find_solution.hpp b/src/include/miopen/find_solution.hpp index fd65f55d4c..2e5c1ef1f8 100644 --- a/src/include/miopen/find_solution.hpp +++ b/src/include/miopen/find_solution.hpp @@ -253,7 +253,9 @@ struct SolverContainer // else if(problem.use_dynamic_solutions_only && !solver.IsDynamic()) // MIOPEN_LOG_I2(solver.SolverDbId() << ": Skipped (non-dynamic)"); else if(!solver.IsApplicable(ctx, problem)) + { MIOPEN_LOG_I2(solver.SolverDbId() << ": Not applicable"); + } else { auto s = solver.GetSolution(ctx, problem); @@ -294,13 +296,19 @@ struct SolverContainer { // Do nothing (and keep silence for the sake of Tuna), just skip. } else if(!solver.MayNeedWorkspace()) + { MIOPEN_LOG_I2(solver.SolverDbId() << ": Skipped (no workspace required)"); + } // For better performance, check IsDynamic() first, because // it is much faster than IsApplicable(). else if(ctx.use_dynamic_solutions_only && !solver.IsDynamic()) + { MIOPEN_LOG_I2(solver.SolverDbId() << ": Skipped (non-dynamic)"); + } else if(!solver.IsApplicable(ctx, problem)) + { MIOPEN_LOG_I2(solver.SolverDbId() << ": Not applicable"); + } else { ++count; diff --git a/src/include/miopen/generic_search.hpp b/src/include/miopen/generic_search.hpp index 71b34b908a..4eed74b037 100644 --- a/src/include/miopen/generic_search.hpp +++ b/src/include/miopen/generic_search.hpp @@ -121,9 +121,11 @@ class ComputedIterator bool operator!=(ComputedIterator const& other) const { if(p == other.p) + { if(p == nullptr // Ends are always equal. || v == other.v) return false; + } return true; } bool operator==(ComputedIterator const& other) const { return !(*this != other); } @@ -442,7 +444,9 @@ auto GenericSearch(const Solver s, { threads_remaining--; if(threads_remaining == 0) + { break; + } else { continue; diff --git a/src/include/miopen/hipoc_kernel.hpp b/src/include/miopen/hipoc_kernel.hpp index 73ac77f160..dc29c8d309 100644 --- a/src/include/miopen/hipoc_kernel.hpp +++ b/src/include/miopen/hipoc_kernel.hpp @@ -188,9 +188,11 @@ struct HIPOCKernel kernel_module = name; auto status = hipModuleGetFunction(&fun, program.GetModule(), kernel_module.c_str()); if(hipSuccess != status) + { MIOPEN_THROW_HIP_STATUS(status, "Failed to get function: " + kernel_module + " from " + program.GetCodeObjectPathname().string()); + } } HIPOCKernelInvoke Invoke(hipStream_t stream, diff --git a/src/include/miopen/kern_db.hpp b/src/include/miopen/kern_db.hpp index 269124348e..0700402357 100644 --- a/src/include/miopen/kern_db.hpp +++ b/src/include/miopen/kern_db.hpp @@ -106,7 +106,9 @@ class KernDb : public SQLiteBase auto stmt = SQLite::Statement{sql, del_query}; auto rc = stmt.Step(sql); if(rc == SQLITE_DONE) + { return true; + } else { MIOPEN_THROW(miopenStatusInternalError, sql.ErrorMessage()); @@ -142,9 +144,13 @@ class KernDb : public SQLiteBase return decompressed_blob; } else if(rc == SQLITE_DONE) + { return boost::none; + } else + { MIOPEN_THROW(miopenStatusInternalError, sql.ErrorMessage()); + } return boost::none; } diff --git a/src/include/miopen/magic_div.hpp b/src/include/miopen/magic_div.hpp index d909444909..281df3a38c 100644 --- a/src/include/miopen/magic_div.hpp +++ b/src/include/miopen/magic_div.hpp @@ -71,8 +71,10 @@ static inline magic_div_u32_t magic_div_u32_gen(uint32_t d) assert(d >= 1 && d <= INT32_MAX); uint8_t shift; for(shift = 0; shift < 32; shift++) + { if((1U << shift) >= d) break; + } constexpr uint64_t one = 1; uint64_t magic = ((one << 32) * ((one << shift) - d)) / d + 1; diff --git a/src/include/miopen/sequences.hpp b/src/include/miopen/sequences.hpp index 60cd46c9c9..91177810d1 100644 --- a/src/include/miopen/sequences.hpp +++ b/src/include/miopen/sequences.hpp @@ -209,9 +209,13 @@ struct Sequence static constexpr bool ValidateValues() { for(auto i = 0; i < ValuesCount() - 1; ++i) + { for(auto j = i + 1; j < ValuesCount(); ++j) + { if(data[i] == data[j]) return false; + } + } return true; } diff --git a/src/include/miopen/solver.hpp b/src/include/miopen/solver.hpp index ce40d6f081..b50373a07f 100644 --- a/src/include/miopen/solver.hpp +++ b/src/include/miopen/solver.hpp @@ -2192,11 +2192,15 @@ struct ConvWinograd3x3MultipassWrW final : ConvSolver static int GetSolverWinoXformHWSize(const ProblemDescription& problem, int id) { if(id == 0) + { return WinoDataH + (WinoFilterH - 1) * (WinoDataH == 7 ? 2 : problem.GetKernelStrideH()); + } else + { return WinoDataW + (WinoFilterW - 1) * (WinoDataW == 7 ? 2 : problem.GetKernelStrideW()); + } } private: diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp index a8d049c389..ab5ddf3cd1 100644 --- a/src/include/miopen/solver/ck_utility_common.hpp +++ b/src/include/miopen/solver/ck_utility_common.hpp @@ -95,6 +95,7 @@ static inline auto get_ck_common_compiler_flag(const Handle& handle) // GPU target static const std::string device_name = handle.GetDeviceName(); + // NOLINTBEGIN(*-braces-around-statements) if(StartsWith(device_name, "gfx803")) compiler_flag << " -DCK_AMD_GPU_GFX803"; else if(StartsWith(device_name, "gfx900")) @@ -121,6 +122,7 @@ static inline auto get_ck_common_compiler_flag(const Handle& handle) compiler_flag << " -DCK_AMD_GPU_GFX1101"; else if(StartsWith(device_name, "gfx1102")) compiler_flag << " -DCK_AMD_GPU_GFX1102"; + // NOLINTEND(*-braces-around-statements) // buffer atomic-fadd compiler_flag << " -DCK_USE_AMD_BUFFER_ATOMIC_FADD=" @@ -142,6 +144,7 @@ static inline auto get_ck_convolution_problem_descriptor(const ProblemDescriptio { ck::DataTypeEnum_t ck_datatype; + // NOLINTBEGIN(*-braces-around-statements) if(problem.IsFp32()) ck_datatype = ck::DataTypeEnum_t::Float; else if(problem.IsFp16()) @@ -150,6 +153,7 @@ static inline auto get_ck_convolution_problem_descriptor(const ProblemDescriptio ck_datatype = ck::DataTypeEnum_t::BFloat16; else ck_datatype = ck::DataTypeEnum_t::Unknown; + // NOLINTEND(*-braces-around-statements) return ck::driver::ConvolutionProblemDescriptor{ ProblemInterpreter::GetBatchN(problem), diff --git a/src/include/miopen/solver/implicitgemm_util.hpp b/src/include/miopen/solver/implicitgemm_util.hpp index d9aad50b98..327d14dd6b 100644 --- a/src/include/miopen/solver/implicitgemm_util.hpp +++ b/src/include/miopen/solver/implicitgemm_util.hpp @@ -232,11 +232,17 @@ GetEPackLength(const ExecutionContext& ctx, const ProblemDescription& problem, b int EPACK = 1; if(problem.IsFp16()) // for fp16, either 2 or 4 Es could be packed { - if(IsXdlopsSupport(ctx) && isXdlopsInvoked) // in xdlops, 4 fp16s are packed + if(IsXdlopsSupport(ctx) && isXdlopsInvoked) + { + // in xdlops, 4 fp16s are packed EPACK = 4; - else // for fp16, either 2 or 4 Es could be packed in non-xdlops scenarios. + } + else + { + // for fp16, either 2 or 4 Es could be packed in non-xdlops scenarios. // EPACK = (C * Y * X % 32) == 0 ? 4 : 2; EPACK = 2; + } } else if(problem.IsBfp16()) // for bfp16, only 2 Es could be packed { diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index a1beef32e1..3a72e26e95 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -413,8 +413,10 @@ class SQLitePerfDb : public SQLiteBase auto stmt = SQLite::Statement{sql, clause, vals}; auto rc = stmt.Step(sql); if(rc != SQLITE_DONE) + { MIOPEN_THROW(miopenStatusInternalError, "Failed to insert config: " + sql.ErrorMessage()); + } auto cnt = sql.Changes(); MIOPEN_LOG_I2(cnt << " rows updated"); } @@ -430,11 +432,17 @@ class SQLitePerfDb : public SQLiteBase { auto rc = stmt.Step(sql); if(rc == SQLITE_ROW) + { return stmt.ColumnText(0); + } else if(rc == SQLITE_DONE) + { return ""; + } else if(rc == SQLITE_ERROR || rc == SQLITE_MISUSE) + { MIOPEN_THROW(miopenStatusInternalError, sql.ErrorMessage()); + } } } template @@ -484,11 +492,17 @@ class SQLitePerfDb : public SQLiteBase { auto rc = stmt.Step(sql); if(rc == SQLITE_ROW) + { rec.SetValues(stmt.ColumnText(0), stmt.ColumnText(1)); + } else if(rc == SQLITE_DONE) + { break; + } else if(rc == SQLITE_ERROR || rc == SQLITE_MISUSE) + { MIOPEN_THROW(miopenStatusInternalError, sql.ErrorMessage()); + } } if(rec.GetSize() == 0) return boost::none; @@ -519,7 +533,9 @@ class SQLitePerfDb : public SQLiteBase auto stmt = SQLite::Statement{sql, query, values}; auto rc = stmt.Step(sql); if(rc == SQLITE_DONE) + { return true; + } else { const std::string msg = "Unable to remove database entry: "; @@ -544,8 +560,10 @@ class SQLitePerfDb : public SQLiteBase auto stmt = SQLite::Statement{sql, clause, vals}; auto rc = stmt.Step(sql); if(rc != SQLITE_DONE) + { MIOPEN_THROW(miopenStatusInternalError, "Failed to insert config: " + sql.ErrorMessage()); + } auto cnt = sql.Changes(); MIOPEN_LOG_I2(cnt << " rows updated"); } diff --git a/src/include/miopen/tensor_ops.hpp b/src/include/miopen/tensor_ops.hpp index 0e06a41b44..3f1f324761 100644 --- a/src/include/miopen/tensor_ops.hpp +++ b/src/include/miopen/tensor_ops.hpp @@ -78,11 +78,13 @@ GetConsistentFlattenedTensorDescriptors(const TDescriptors&... real_descriptor_p bool is_all_same_strided = true; const auto& real_desc_0_strides = real_descriptors[0]->GetStrides(); for(std::size_t itensor = 1; itensor < NTensor; ++itensor) + { if(real_desc_0_strides != real_descriptors[itensor]->GetStrides()) { is_all_same_strided = false; break; } + } auto non1_length_strides = boost::combine(real_descriptors[0]->GetLengths(), real_descriptor_pack.GetStrides()...) | diff --git a/src/include/miopen/tensor_reorder_util.hpp b/src/include/miopen/tensor_reorder_util.hpp index 24b7b96650..a12486010b 100644 --- a/src/include/miopen/tensor_reorder_util.hpp +++ b/src/include/miopen/tensor_reorder_util.hpp @@ -215,15 +215,25 @@ MakeTensorReorderAttributes(const ExecutionContext& ctx_, if(data_type_ != miopenDouble) { if((order_0_ == 0) && (order_1_ == 1) && (order_2_ == 3) && (order_3_ == 2)) + { which = 1; + } else if((order_0_ == 0) && (order_1_ == 2) && (order_2_ == 3) && (order_3_ == 1)) + { which = 2; + } else if((order_0_ == 0) && (order_1_ == 3) && (order_2_ == 1) && (order_3_ == 2)) + { which = 3; + } else if((order_0_ == 2) && (order_1_ == 3) && (order_2_ == 0) && (order_3_ == 1)) + { which = 4; + } else if((order_0_ == 3) && (order_1_ == 0) && (order_2_ == 1) && (order_3_ == 2)) + { which = 5; + } } // Order [0, 1, 3, 2], [0, 2, 3, 1], [0, 3, 1, 2], [2, 3, 0, 1], [3, 0, 1, 2] are using batched // transpose kernel to achieve higher performance. Details as following: diff --git a/src/invoker_cache.cpp b/src/invoker_cache.cpp index 937a1b2e08..6fd7dab2c3 100644 --- a/src/invoker_cache.cpp +++ b/src/invoker_cache.cpp @@ -67,8 +67,10 @@ boost::optional InvokerCache::GetFound1_0(const std::string& net } const auto invoker = item_invokers.find(found_1_0_id->second); if(invoker == item_invokers.end()) + { MIOPEN_THROW("No invoker with solver_id of " + found_1_0_id->second + " was registered for " + network_config); + } return invoker->second; } @@ -103,7 +105,9 @@ void InvokerCache::Register(const Key& key, const Invoker& invoker) { auto it = invokers.find(key.first); if(it != invokers.end()) + { it->second.invokers.insert({key.second, invoker}); + } else { auto& item = invokers.insert({key.first, Item{}}).first->second; @@ -125,8 +129,10 @@ void InvokerCache::SetAsFound1_0(const std::string& network_config, const auto& item_invokers = item->second.invokers; const auto invoker = item_invokers.find(solver_id); if(invoker == item_invokers.end()) + { MIOPEN_THROW("No invoker with solver_id of " + solver_id + " was registered for " + network_config); + } } item->second.found_1_0[algorithm] = solver_id; diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index 0f706d458f..2b8f5bcef2 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -117,8 +117,10 @@ Kernel KernelCache::AddKernel(const Handle& h, else { if(!is_kernel_miopengemm_str) // default value + { is_kernel_miopengemm_str = algorithm.find("ImplicitGEMM") == std::string::npos && algorithm.find("GEMM") != std::string::npos; + } program = h.LoadProgram(program_name, params, is_kernel_miopengemm_str, kernel_src); program_map[std::make_pair(program_name, params)] = program; } diff --git a/src/kernels/hip_float8.hpp b/src/kernels/hip_float8.hpp index dd57c9ca5b..a9b2a559a8 100644 --- a/src/kernels/hip_float8.hpp +++ b/src/kernels/hip_float8.hpp @@ -264,9 +264,13 @@ struct hip_f8 { if((rhs.is_zero() && this->is_zero()) || (fabs(rhs - *this) < std::numeric_limits>::epsilon())) + { return true; + } else if(rhs.is_nan() || rhs.is_inf() || this->is_nan() || this->is_inf()) + { return false; + } return false; } diff --git a/src/logger.cpp b/src/logger.cpp index f8045f9ac7..e39a43ea2f 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -151,26 +151,27 @@ bool IsLogging(const LoggingLevel level, const bool disableQuieting) const char* LoggingLevelToCString(const LoggingLevel level) { - // Intentionally straightforward. - // The most frequently used come first. - if(level == LoggingLevel::Error) + switch(level) + { + case LoggingLevel::Default: + return "Default"; + case LoggingLevel::Quiet: + return "Quiet"; + case LoggingLevel::Fatal: + return "Fatal"; + case LoggingLevel::Error: return "Error"; - else if(level == LoggingLevel::Warning) + case LoggingLevel::Warning: return "Warning"; - else if(level == LoggingLevel::Info) + case LoggingLevel::Info: return "Info"; - else if(level == LoggingLevel::Info2) + case LoggingLevel::Info2: return "Info2"; - else if(level == LoggingLevel::Trace) + case LoggingLevel::Trace: return "Trace"; - else if(level == LoggingLevel::Default) - return "Default"; - else if(level == LoggingLevel::Quiet) - return "Quiet"; - else if(level == LoggingLevel::Fatal) - return "Fatal"; - else + default: return ""; + } } bool IsLoggingCmd() { diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index c89d974a62..ef326c3fb8 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -208,9 +208,11 @@ static inline std::vector FindConvolution(const ExecutionContext& ctx } if(IsEnabled(MIOPEN_DEBUG_COMPILE_ONLY{})) + { MIOPEN_THROW( miopenStatusGpuOperationsSkipped, "MIOPEN_DEBUG_COMPILE_ONLY is enabled, escaping forward convolution. Search skipped."); + } ShrinkToFind10Results(results); @@ -264,10 +266,12 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle, const auto results = FindConvolution(ctx, problem, invoke_ctx); if(results.empty()) + { // Changes to this message lead to failures in test_conv_for_implicit_gemm // To fix them check the test // Two similar messages are in other convolution find methods MIOPEN_THROW("No suitable algorithm was found to execute the required convolution"); + } *returnedAlgoCount = std::min(requestAlgoCount, static_cast(results.size())); @@ -650,9 +654,11 @@ ConvolutionDescriptor::GetSolutionsFallback(const ExecutionContext& ctx, } MIOPEN_LOG_I2("maxSolutionCount = " << maxSolutionCount << ", available = " << interim.size()); for(const auto& s : interim) + { MIOPEN_LOG_I2("id: " << s.solution_id << " algo: " << s.algorithm << ", time: " << s.time << " ms, ws: " << s.workspace_size << ", name: " << miopen::solver::Id(s.solution_id).ToString()); + } std::sort(begin(interim), end(interim), SolutionTimeComparator{}); interim.resize(std::min(maxSolutionCount, interim.size())); @@ -848,10 +854,12 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, const auto results = FindConvolution(ctx, problem, invoke_ctx); if(results.empty()) + { // Changes to this message lead to failures in test_conv_for_implicit_gemm // To fix them check the test // Two similar messages are in other convolution find methods MIOPEN_THROW("No suitable algorithm was found to execute the required convolution"); + } *returnedAlgoCount = std::min(requestAlgoCount, static_cast(results.size())); @@ -965,11 +973,15 @@ std::size_t ConvolutionDescriptor::GetBackwardSolutionWorkspaceSize(Handle& hand auto ctx = ExecutionContext{}; ctx.SetStream(&handle); if(sol.IsApplicable(ctx, problem)) + { return sol.GetWorkspaceSize(ctx, problem); + } else + { MIOPEN_THROW(miopenStatusBadParm, "The supplied solution id: " + solver_id.ToString() + " is not applicable to the current problem"); + } } void ConvolutionDescriptor::ConvolutionBackwardImmediate(Handle& handle, @@ -1055,10 +1067,12 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, const auto results = FindConvolution(ctx, problem, invoke_ctx); if(results.empty()) + { // Changes to this message lead to failures in test_conv_for_implicit_gemm // To fix them check the test // Two similar messages are in other convolution find methods MIOPEN_THROW("No suitable algorithm was found to execute the required convolution"); + } *returnedAlgoCount = std::min(requestAlgoCount, static_cast(results.size())); @@ -1163,11 +1177,15 @@ std::size_t ConvolutionDescriptor::GetWrwSolutionWorkspaceSize(Handle& handle, auto ctx = ExecutionContext{}; ctx.SetStream(&handle); if(sol.IsApplicable(ctx, problem)) + { return sol.GetWorkspaceSize(ctx, problem); + } else + { MIOPEN_THROW(miopenStatusBadParm, "The supplied solution id: " + solver_id.ToString() + " is not applicable to the current problem"); + } } void ConvolutionDescriptor::ConvolutionWrwImmediate(Handle& handle, diff --git a/src/ocl/ctcocl.cpp b/src/ocl/ctcocl.cpp index cb343c6624..d594544403 100644 --- a/src/ocl/ctcocl.cpp +++ b/src/ocl/ctcocl.cpp @@ -97,8 +97,10 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, MIOPEN_THROW("Wrong label id"); } if(j > 0) + { if(labels[labels_offset[i] + j] == labels[labels_offset[i] + j - 1]) repeat[i]++; + } } if(labelLengths[i] + repeat[i] > inputLengths[i]) @@ -248,12 +250,16 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, " -DBLANK_LB_ID=" + std::to_string(blank_label_id); if(!probsDesc.IsPacked()) + { params += " -DPROBS_STRIDE0=" + std::to_string(probsDesc.GetStrides()[0]) + " -DPROBS_STRIDE1=" + std::to_string(probsDesc.GetStrides()[1]); + } if(!gradientsDesc.IsPacked()) + { params += " -DGRADS_STRIDE0=" + std::to_string(gradientsDesc.GetStrides()[0]) + " -DGRADS_STRIDE1=" + std::to_string(gradientsDesc.GetStrides()[1]); + } params += " -DSOFTMAX_APPLIED=" + std::to_string(static_cast(apply_softmax_layer)) + " -DSOFTMAX_LEN=" + std::to_string(class_sz); @@ -268,14 +274,18 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, + class_sz #endif <= lcl_mem_per_grp) + { params += " -DOPT_LCL_MEM_BETA"; + } if(static_cast(max_S_len) * 3 #if MIOPEN_BACKEND_OPENCL + class_sz #endif <= lcl_mem_per_grp) + { params += " -DOPT_LCL_MEM_LB"; + } if(probsDesc.GetType() == miopenHalf) params += " -DMIOPEN_USE_FP16=1"; diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index 8f650ef30d..4fc514f203 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -124,9 +124,11 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, int bidirect_mode) const { if(bidirect_mode == 0) + { return matrix_lin_layer_size(input_vector_sz, hidden_vec_sz, gates) + static_cast(hidden_vec_sz + hidden_xinput_size(hidden_vec_sz, 0)) * hidden_vec_sz * static_cast(layers_cnt - 1) * gates; + } MIOPEN_THROW("execution failure: bidirect is not supported by this solver"); } @@ -171,19 +173,27 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, size_t get_matrix_x_off(int layer_id) const { if(layer_id > 0) + { return matrix_normal_start_off + static_cast(layer_id - 1) * get_matrix_layer_size(layer_id); + } else + { return 0; + } }; size_t get_matrix_h_off(int layer_id) const { if(layer_id > 0) + { return get_matrix_x_off(layer_id) + static_cast(h_vec * x_in_vec * gates_cnt); + } else + { return get_matrix_x_off(layer_id) + static_cast(h_vec * in_vec) * gates_cnt; + } }; int bias_vector_size() const { return h_vec; } @@ -431,8 +441,10 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, : RBuff.ht_offset(layer, bacc_per_time[cur_time - 1]); if(cur_time == 0) + { if(hx == nullptr) return; + } const miopen::GemmDescriptor gemm_desc_hx = GemmDescriptor{false, false, @@ -697,8 +709,10 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, const int chunk_time = std::min(time_chunk_sz, seq_len - chunk_id * time_chunk_sz); if(layer_id > 0 && layer_stream_id[layer_id - 1] != stream_id) + { hipStreamWaitEvent( stream_pull[stream_id], layer_chunk_end_event[layer_id - 1][chunk_id].get(), 0); + } if(!(layer_id == 0 && chunk_id == 1)) { @@ -737,8 +751,10 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, handle.SetStreamFromPool(extra_stream_id); if(biasMode != 0u) + { for(int layer_id = 1; layer_id < nLayers; layer_id++) call_bias_add(layer_id); + } call_inx_next_chunk_preload(first_layer_id); @@ -760,9 +776,11 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle, { auto chunk_id = layer_upd_cur_time[layer_id] / time_chunk_sz; if(chunk_id > 0) + { hipStreamWaitEvent(stream_pull[main_stream_id], layer_chunk_end_event[layer_id][chunk_id - 1].get(), 0); + } layer_stream_id[layer_id] = main_stream_id; } diff --git a/src/operator.cpp b/src/operator.cpp index 620d943c71..72add47e1b 100644 --- a/src/operator.cpp +++ b/src/operator.cpp @@ -50,6 +50,7 @@ std::ostream& operator<<(std::ostream& stream, const MDGraph_op_t& o) std::ostream& operator<<(std::ostream& stream, const boost::any& a) { + // NOLINTBEGIN(*-braces-around-statements) if(a.type() == typeid(std::string)) stream << boost::any_cast(a); else if(a.type() == typeid(int)) @@ -68,6 +69,7 @@ std::ostream& operator<<(std::ostream& stream, const boost::any& a) stream << boost::any_cast(a); else stream << "Unsupported any type: " << a.type().name(); + // NOLINTEND(*-braces-around-statements) return stream; } } // namespace miopen diff --git a/src/problem.cpp b/src/problem.cpp index b5588a7f4b..c89ad62399 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -162,8 +162,10 @@ Problem::GetTensorDescriptorChecked(miopenTensorArgumentId_t name, { const auto found = tensor_descriptors.find(name); if(found == tensor_descriptors.end()) + { MIOPEN_THROW(miopenStatusInvalidValue, "Problem is missing " + name_str + " tensor descriptor."); + } return found->second; } @@ -230,8 +232,10 @@ std::vector Problem::FindSolutionsImpl(Handle& handle, auto ret = std::vector{}; if(tensor_descriptors.size() != 3) + { MIOPEN_THROW(miopenStatusInvalidValue, "Convolution problem should have exactly three tensor descriptors."); + } // These are not swapped for now to preserve argument order in calls const auto& x_desc = diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6ca1067fb8..27959e2c20 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -95,22 +95,32 @@ struct ReductionKernelConfigurator { if(toReduceLength <= GredBlockWiseUpperReductionLen) // let one block to do this only reduction + { return (1); + } else + { return ((toReduceLength + blockSize_ - 1) / blockSize_); // let multiple blocks to do this only reduction + } } else { if(toReduceLength <= GredDirectThreadWiseUpperReductionLen) // let one thread to do each reduction + { return ((invariantLength + blockSize_ - 1) / blockSize_); + } else if(toReduceLength <= GredDirectWarpWiseUpperReductionLen) // let one warp to do each reduction + { return ((invariantLength + numWarpsPerBlock - 1) / numWarpsPerBlock); + } else if(toReduceLength <= GredBlockWiseUpperReductionLen) // let one block to do each reduction + { return (invariantLength); + } else { // let multiple blocks to do each reduction std::size_t expBlocksPerReduction = @@ -134,23 +144,35 @@ struct ReductionKernelConfigurator { if(toReduceLength <= GredBlockWiseUpperReductionLen) // let one block to do this only reduction + { return (Reduce_BlockWise); + } else // let multiple blocks to do this only reduction + { return (Reduce_MultiBlock); + } } else { if(toReduceLength <= GredDirectThreadWiseUpperReductionLen) // let one thread to do each reduction + { return (Reduce_DirectThreadWise); + } else if(toReduceLength <= GredDirectWarpWiseUpperReductionLen) // let one warp to do each reduction + { return (Reduce_DirectWarpWise); + } else if(toReduceLength <= GredBlockWiseUpperReductionLen) // let one block to do each reduction + { return (Reduce_BlockWise); + } else + { return (Reduce_MultiBlock); // let multiple blocks to do each reduction + } }; }; @@ -513,8 +535,10 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, for(int i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) + { MIOPEN_THROW("The length of the output tensor dimension should either be 1 or be equal " "to the length of the corresponding dimension of the input tensor."); + } }; auto invariantLength = outDesc.GetElementSize(); @@ -567,8 +591,10 @@ std::size_t ReduceTensorDescriptor::GetIndicesSize(const TensorDescriptor& inDes for(int i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) + { MIOPEN_THROW("The length of the output tensor dimension should either be 1 or be equal " "to the length of the corresponding dimension of the input tensor."); + } }; auto reduceIndicesOpt = this->reduceTensorIndices_; @@ -632,8 +658,10 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, for(int i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) + { MIOPEN_THROW("The length of the output tensor dimension should either be 1 or be equal " "to the length of the corresponding dimension of the input tensor."); + } }; std::size_t ws_sizeInBytes = this->GetWorkspaceSize(handle, aDesc, cDesc); @@ -679,8 +707,10 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, }; if(toReduceDims.empty()) + { MIOPEN_THROW("Invalid TensorDescriptor, at least one dimension of the input tensor should " "be reduced."); + } const bool reduceAllDims = invariantDims.empty(); @@ -808,19 +838,25 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, #if WORKAROUND_MIOPEN_ISSUE_557 if(StartsWith(handle.GetDeviceName(), "gfx10") || StartsWith(handle.GetDeviceName(), "gfx11")) + { param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + } else { if(srcDataType == miopenDouble) + { // TODO: support from composable kernel utility for using AMD Buffer Addressing for // double param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + } }; #else if(srcDataType == miopenDouble) + { // TODO: support from composable kernel utility for using AMD Buffer Addressing for // double param += " -DCK_USE_AMD_BUFFER_ADDRESSING=0 "; + } #endif Data_t ws_buf1_global = workspace; @@ -996,6 +1032,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, std::to_string(static_cast(use_padding.second)); if(!reduceAllDims) + { handle.AddKernel( algo_name, network_config_1, program_name1, kernel_name1, vld, vgd1, param1)( gridSize, @@ -1019,7 +1056,9 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, p_outStrides[4], p_outStrides[5], workspace); + } else + { handle.AddKernel( algo_name, network_config_1, program_name1, kernel_name1, vld, vgd1, param1)( gridSize, @@ -1037,6 +1076,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, p_inStrides[4], p_inStrides[5], workspace); + } if(handle.IsProfilingEnabled()) time_reduce += handle.GetKernelTime(); @@ -1091,6 +1131,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, std::to_string(static_cast(use_padding2.second)); if(!reduceAllDims) + { handle.AddKernel( algo_name, network_config_2, program_name2, kernel_name2, vld, vgd1, param2)( gridSize_2, @@ -1108,10 +1149,13 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, p_outStrides[4], p_outStrides[5], workspace); + } else + { handle.AddKernel( algo_name, network_config_2, program_name2, kernel_name2, vld, vgd1, param2)( gridSize_2, blkGroupSize, workspace); + } if(handle.IsProfilingEnabled()) time_reduce += handle.GetKernelTime(); diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 10b1ff73bf..6551c3a803 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -39,16 +39,24 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, if(miopen::IsLoggingCmd()) { std::stringstream ss; - if(aDesc.GetType() == miopenHalf) + + switch(aDesc.GetType()) + { + case miopenHalf: ss << "reducefp16"; - else if(aDesc.GetType() == miopenBFloat16) - ss << "reducebfp16"; - else if(aDesc.GetType() == miopenInt8) + break; + case miopenInt8: ss << "reduceint8"; - else if(aDesc.GetType() == miopenDouble) + break; + case miopenBFloat16: + ss << "reducebfp16"; + break; + case miopenDouble: ss << "reducefp64"; - else + break; + default: ss << "reduce"; + } ss << " -A " << *reinterpret_cast(alpha); ss << " -B " << *reinterpret_cast(beta); diff --git a/src/rnn.cpp b/src/rnn.cpp index e83bd986c9..465a29f040 100644 --- a/src/rnn.cpp +++ b/src/rnn.cpp @@ -584,8 +584,10 @@ size_t RNNDescriptor::GetParamsSize(size_t inputVector) const if(inputMode == miopenRNNskip) { if(inputVector != hsize) + { MIOPEN_THROW(miopenStatusBadParm, "In miopenRNNskip mode input_vector size and hidden_size shoud be same."); + } inputVector = 0; } @@ -910,9 +912,11 @@ void RNNDescriptor::SetLayerBias(const Handle& handle, void RNNDescriptor::SetPaddingmode(miopenRNNPaddingMode_t padding) { if(padding != miopenRNNIOWithPadding && padding != miopenRNNIONotPadded) + { MIOPEN_THROW(miopenStatusBadParm, "SetPaddingmode: Bad parameter. RNN padding mode must be " "miopenRNNIOWithPadding or miopenRNNIONotPadded."); + } paddingMode = padding; } @@ -1122,6 +1126,7 @@ void RNNDescriptor::RNNVanillaForward(Handle& handle, miopen::c_array_view yDescArray{output_descs.data(), seq_len}; if(fwdMode == miopenRNNFWDMode_t::miopenRNNTraining) + { return RNNForwardTrainingPackedTensors(handle, seq_len, xDescArray, @@ -1141,7 +1146,9 @@ void RNNDescriptor::RNNVanillaForward(Handle& handle, cy, reserveSpace, reserveSpaceSize); + } else + { return RNNForwardInferencePacked(handle, seq_len, xDescArray, @@ -1161,6 +1168,7 @@ void RNNDescriptor::RNNVanillaForward(Handle& handle, cy, workSpace, workSpaceSize); + } } void RNNDescriptor::RNNVanillaBackwardData(Handle& handle, diff --git a/src/rnn/Solutions/rnn_transformer.cpp b/src/rnn/Solutions/rnn_transformer.cpp index 623bba5643..72ebb560ee 100644 --- a/src/rnn/Solutions/rnn_transformer.cpp +++ b/src/rnn/Solutions/rnn_transformer.cpp @@ -220,11 +220,15 @@ void RNNDescriptor::RNNTransformerForward(Handle& handle, const std::vector input_reorder_index = RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(xDesc); if(hx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, hx, tmp_layout_hx); + } if(cx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, cx, tmp_layout_cx); + } auto workSpace_shift_size = dataTypeSize * @@ -258,11 +262,15 @@ void RNNDescriptor::RNNTransformerForward(Handle& handle, const std::vector output_reorder_index = RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(xDesc, true); if(hy != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, output_reorder_index, tmp_layout_hy, hy); + } if(cy != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, cDesc, 1, output_reorder_index, tmp_layout_cy, cy); + } } RNNTensorBaseLayoutConverter::ReverseConvertInputTensorGPUData( @@ -368,17 +376,25 @@ void RNNDescriptor::RNNTransformerBackwardData(Handle& handle, const std::vector input_reorder_index = RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(xDesc); if(hx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, hx, tmp_layout_hx); + } if(dhy != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, dhy, tmp_layout_dhy); + } if(cx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, cx, tmp_layout_cx); + } if(dcy != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, dcy, tmp_layout_dcy); + } auto workSpace_shift_size = dataTypeSize * RNNLayoutTransformTotalTmpSpace( @@ -414,11 +430,15 @@ void RNNDescriptor::RNNTransformerBackwardData(Handle& handle, RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(xDesc, true); if(dhx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, output_reorder_index, tmp_layout_dhx, dhx); + } if(dcx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, cDesc, 1, output_reorder_index, tmp_layout_dcx, dcx); + } } RNNTensorBaseLayoutConverter::ReverseConvertInputTensorGPUData( @@ -499,8 +519,10 @@ void RNNDescriptor::RNNTransformerBackwardWeights(Handle& handle, RNNTensorBaseLayoutConverter::GetSamplesDescendingOrder(xDesc); if(hx != nullptr) + { RNNTensorBaseLayoutConverter::ReorderHiddenTensorGPUData( handle, hDesc, 1, input_reorder_index, hx, tmp_layout_hx); + } auto workSpace_shift_size = dataTypeSize * RNNLayoutTransformTotalTmpSpace( diff --git a/src/rnn/rnn_util.cpp b/src/rnn/rnn_util.cpp index 3dc9979f7a..8761155de7 100644 --- a/src/rnn/rnn_util.cpp +++ b/src/rnn/rnn_util.cpp @@ -230,18 +230,24 @@ void RNNTensorBaseLayoutConverter::ChangeTensorGPUDataPadding( const Handle& handle, const SeqTensorDescriptor& tensor_desc, ConstData_t src, Data_t dst) { if(!tensor_desc.IsSequenceLengthsSorted()) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, only sorted tensors supported."); + } if(!tensor_desc.IsZeroBytePadding()) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, tensors with byte padding not supported."); + } miopenRNNBaseLayout_t data_layout_t = RNNDescriptor::getBaseLayoutFromDataTensor(tensor_desc); if(data_layout_t == miopenRNNDataUnknownLayout) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, only Base Layouts supported."); + } bool is_seq_major = data_layout_t == miopenRNNDataSeqMajorNotPadded || data_layout_t == miopenRNNDataSeqMajorPadded; @@ -347,19 +353,25 @@ void RNNTensorBaseLayoutConverter::ChangePaddedTensorGPUDataLayout( Data_t dst) { if(!src_padded_desc.IsPaddedSeqLayout() || !dst_padded_desc.IsPaddedSeqLayout()) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, only padded tensors supported."); + } const auto data_type = src_padded_desc.GetType(); if(dst_padded_desc.GetType() != data_type) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, Dst data type should match src data type."); + } const std::vector copy_size = src_padded_desc.GetLengths(); if(dst_padded_desc.GetLengths() != copy_size) + { MIOPEN_THROW(miopenStatusInternalError, "Wrong tensor descriptor, Dst desc size should match Src desc size."); + } const std::vector src_stride = src_padded_desc.GetPaddedStrides(); const std::vector dst_stride = dst_padded_desc.GetPaddedStrides(); @@ -413,6 +425,7 @@ void RNNTensorBaseLayoutConverter::ConvertInputTensorGPUData( handle, src_tensor_desc, src, SeqMajorPadded_desc, SeqMajorPadded_ptr); if(dst_layout != miopenRNNDataSeqMajorPadded) + { ConvertInputTensorGPUData( handle, SeqMajorPadded_desc, @@ -422,6 +435,7 @@ void RNNTensorBaseLayoutConverter::ConvertInputTensorGPUData( static_cast(reinterpret_cast(workspace) + SeqMajorPadded_desc.GetTensorMaxByteSpace()), reverse); + } } else if(src_layout == miopenRNNDataSeqMajorPadded) { @@ -515,6 +529,7 @@ void RNNTensorBaseLayoutConverter::ConvertInputTensorGPUData( } if(dst_layout == miopenRNNDataBatchMajorPadded) + { ConvertInputTensorGPUData( handle, reordered_padded_tensor_desc, @@ -524,6 +539,7 @@ void RNNTensorBaseLayoutConverter::ConvertInputTensorGPUData( static_cast(reinterpret_cast(workspace) + reordered_padded_tensor_desc.GetTensorMaxByteSpace()), reverse); + } } else MIOPEN_THROW(miopenStatusInternalError, "Unsupported layout."); diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 8dce8f364e..605d6e7428 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -298,12 +298,16 @@ extern "C" miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle, return miopen::try_([&] { if(workSpaceSize != nullptr) + { miopen::deref(workSpaceSize) = miopen::deref(rnnDesc).GetMaxWorkspaceSize( miopen::deref(handle), miopen::deref(xDesc), fwdMode); + } if((fwdMode == miopenRNNTraining) && reserveSpaceSize != nullptr) + { miopen::deref(reserveSpaceSize) = miopen::deref(rnnDesc).GetMaxReserveSize( miopen::deref(handle), miopen::deref(xDesc)); + } }); } @@ -533,16 +537,24 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; - if(rnnMode == miopenRNNRELU) + + switch(rnnMode) + { + case miopenRNNRELU: mode = "relu"; - else if(rnnMode == miopenRNNTANH) + break; + case miopenRNNTANH: mode = "tanh"; - else if(rnnMode == miopenLSTM) + break; + case miopenLSTM: mode = "lstm"; - else if(rnnMode == miopenGRU) + break; + case miopenGRU: mode = "gru"; - else + break; + default: mode = ""; + } std::string batch_sz; if(miopen::deref(xDesc[0]).GetLengths()[0] == @@ -598,16 +610,24 @@ static void LogCmdRNN(const miopenSeqTensorDescriptor_t xDesc, { std::string mode; miopenRNNMode_t rnnMode = miopen::deref(rnnDesc).rnnMode; - if(rnnMode == miopenRNNRELU) + + switch(rnnMode) + { + case miopenRNNRELU: mode = "relu"; - else if(rnnMode == miopenRNNTANH) + break; + case miopenRNNTANH: mode = "tanh"; - else if(rnnMode == miopenLSTM) + break; + case miopenLSTM: mode = "lstm"; - else if(rnnMode == miopenGRU) + break; + case miopenGRU: mode = "gru"; - else + break; + default: mode = ""; + } std::string seq_len_array; { diff --git a/src/seq_tensor.cpp b/src/seq_tensor.cpp index 0ae8f40016..7d7a9c32b5 100644 --- a/src/seq_tensor.cpp +++ b/src/seq_tensor.cpp @@ -204,14 +204,20 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t, SetDimOrder(layout_in); if(padding_in.empty()) + { padds = std::vector(dims, 0); + } else { if(padding_in.size() != dims) + { MIOPEN_THROW(miopenStatusBadParm, "Lengths and padding number dimensions must be equal"); + } else + { padds = padding_in; + } } SetSequenceLen(seq_len, use_seq_len); @@ -372,8 +378,10 @@ void SeqTensorDescriptor::SetSequenceLen(const std::vector& seq_len { auto seq_cnt = lens[0]; if(seq_lens.empty() || seq_cnt != seq_lens.size()) + { MIOPEN_THROW(miopenStatusBadParm, "Size of sequence_len and first dimension size must be equal"); + } sequence_len = seq_lens; @@ -476,12 +484,14 @@ std::string SeqTensorDescriptor::ToString() const " }"; if(!this->sequence_len.empty()) + { result += ", sequence_len[" + std::accumulate(std::next(this->sequence_len.begin()), this->sequence_len.end(), std::to_string(sequence_len[0]), coma_fold) + " ]"; + } if(this->packed) result += ", packed"; diff --git a/src/solution.cpp b/src/solution.cpp index 1378a8a839..a29ae8fe30 100644 --- a/src/solution.cpp +++ b/src/solution.cpp @@ -53,10 +53,12 @@ void Solution::Run(Handle& handle, std::size_t workspace_size) { if(workspace_size < workspace_required) + { MIOPEN_THROW(miopenStatusBadParm, GetSolver().ToString() + " requires at least " + std::to_string(workspace_required) + " workspace, while " + std::to_string(workspace_size) + " was provided"); + } const auto run = boost::hof::match([&](const ConvolutionDescriptor& op_desc) { RunImpl(handle, inputs, workspace, workspace_size, op_desc); @@ -94,8 +96,10 @@ void Solution::RunImpl(Handle& handle, const auto get_input_checked = [&](auto name, const std::string& name_str) { const auto& found = inputs.find(name); if(found == inputs.end()) + { MIOPEN_THROW(miopenStatusInvalidValue, "Problem is missing " + name_str + " tensor descriptor."); + } auto ret = found->second; if(!ret.descriptor.has_value()) ret.descriptor = GetProblem().GetTensorDescriptorChecked(name, name_str); @@ -240,12 +244,16 @@ void from_json(const nlohmann::json& json, Solution& solution) constexpr const auto check_header = Solution::SerializationMetadata::Current(); if(header.validation_number != check_header.validation_number) + { MIOPEN_THROW(miopenStatusInvalidValue, "Invalid buffer has been passed to the solution deserialization."); + } if(header.version != check_header.version) + { MIOPEN_THROW( miopenStatusVersionMismatch, "Data from wrong version has been passed to the solution deserialization."); + } } json.at("time").get_to(solution.time); diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index c364c5ae00..a7c8e5938c 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -229,9 +229,7 @@ static bool IsApplicableTransform(const ExecutionContext& ctx, const ProblemDesc } if(!problem.IsLayoutDefault()) - { return false; - } { unsigned int const waves_in_group = 512 / wave_size; @@ -325,9 +323,7 @@ bool ConvMPBidirectWinograd::IsA // ROCBLAS for GEMM step if(!problem.IsLayoutDefault()) - { return false; - } if(problem.IsTensorsCasted()) return false; @@ -339,20 +335,30 @@ bool ConvMPBidirectWinograd::IsA static const int wino_filter_tile = std::max(WinoFilterH, WinoFilterW); if(wino_data_tile == 6 && wino_filter_tile == 3) + { if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3{})) return false; + } if(wino_data_tile == 5 && wino_filter_tile == 3) + { if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3{})) return false; + } if(wino_data_tile == 4 && wino_filter_tile == 3) + { if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3{})) return false; + } if(wino_data_tile == 3 && wino_filter_tile == 3) + { if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3{})) return false; + } if(wino_data_tile == 2 && wino_filter_tile == 3) + { if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3{})) return false; + } return IsApplicableTransform(ctx, problem); } @@ -853,20 +859,30 @@ bool ConvMPBidirectWinograd_xdlops ConstData_t { if(has_bias) + { return dynamic_cast( *invoke_ctx.op_args.params[1]) .bdata; + } else + { return nullptr; + } }(); if(activ_idx == -1) // skip the activation args diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 0abe71326f..3daec5f2b4 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -342,10 +342,14 @@ bool PerformanceConfigConvAsmBwdWrW1x1::IsValid(const ExecutionContext& ctx, const std::string name = ctx.GetStream().GetDeviceName(); if(name.find("gfx8") == std::string::npos && name.find("gfx9") == std::string::npos) + { bfp16_convert = 0; + } else + { bfp16_convert = (problem.GetOutDataType() == miopenBFloat16) ? ((c_mult + k_mult) * read_size) : 0; + } if(!(acc_gprs + 12 + (c_mult + k_mult) * read_size * (data_prefetch + 1) + bfp16_convert <= (n_part_cnt > 4 ? 128 : 256))) diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index ae58cfcd9b..f400949f94 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -153,21 +153,29 @@ bool PerformanceConfigAsmDirect3x3WrW::IsValid(const ExecutionContext& ctx, { if((problem.GetOutChannels_() % (GetCPerWave() * problem.GetGroupCount()) != 0) || (problem.GetInChannels_() % (GetKPerWave() * problem.GetGroupCount()) != 0)) + { return false; + } } else { if((problem.GetOutChannels_() % (GetKPerWave() * problem.GetGroupCount()) != 0) || (problem.GetInChannels_() % (GetCPerWave() * problem.GetGroupCount()) != 0)) + { return false; + } } if((problem.GetOutChannels_() % (64 / chunk_size) != 0) && (problem.GetInChannels_() % (64 / chunk_size) != 0)) + { return false; + } if((reverse_inout != 0 ? problem.GetInChannels_() : problem.GetOutChannels_()) % GetCPerWave() != 0) + { return false; + } if(!(chunk_size * k_per_wave <= 64)) return false; if((reverse_inout != 0 ? problem.GetOutChannels_() : problem.GetInChannels_()) % k_per_wave != @@ -217,8 +225,10 @@ bool PerformanceConfigAsmDirect3x3WrW::IsValid(const ExecutionContext& ctx, if(!(vgprs <= 256)) return false; if(n_per_group > 4) + { if(!(vgprs <= 128)) return false; + } if(limit_wave_cnt != 0 && limit_wave_cnt * 4 < n_per_group) return false; const auto lds_size = static_cast(n_per_group - 1) * solver::wave_size * @@ -258,39 +268,61 @@ void PerformanceConfigAsmDirect3x3WrW::HeuristicInit(const ExecutionContext& ctx chunk_size = (problem.GetOutWidth_() < 48) ? 8 : 16; if((problem.GetOutChannels_() % (64 / chunk_size) != 0) && (problem.GetInChannels_() % (64 / chunk_size) != 0)) + { chunk_size = 16; // Fixup for correctness + } reverse_inout = 0; if(IsReverseInOutAllowed(problem) && ((problem.GetOutChannels_() % 4 != 0) || (problem.GetOutWidth_() < 8))) + { reverse_inout = 1; + } const auto c_k = problem.GetOutChannels_() * problem.GetInChannels_() / problem.GetGroupCount(); // C*K if(c_k < 256) + { k_per_wave = 1; + } else if(c_k < 16384) + { k_per_wave = 2; + } else // C*K >= 16k + { k_per_wave = ((chunk_size == 8) ? 2 : 4); + } while((reverse_inout != 0 ? problem.GetOutChannels_() : problem.GetInChannels_()) % k_per_wave != 0) + { k_per_wave /= 2; // Fixup for correctness + } if(c_k <= 512) + { n_per_group = 8; + } else if(c_k <= 4096) + { n_per_group = 4; + } else if(c_k <= 8192) + { n_per_group = 2; + } else + { n_per_group = 1; + } if(n_per_group > problem.GetBatchSize_()) n_per_group = problem.GetBatchSize_(); // n_per_group should never be > batch size. if(problem.GetOutWidth_() >= 256 && n_per_group > 4) // when width >= 256, n_per_group should not be > 4. + { n_per_group = 4; + } pipe_lines_depth = (problem.GetOutHeight_() <= 1) ? 1 : 2; if((problem.GetOutHeight_() < 8) && (problem.GetOutWidth_() < 64)) diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index 9cfdd8aeea..c6c14e2860 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -729,8 +729,10 @@ void PerformanceConfigAsmImplicitGemmGTCBwdXdlopsNHWC::HeuristicInit( { if(miopen::IsDisabled( MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + { if(problem.IsFp16() && gks > 0) vector_store = 1; + } if(gks > 0) gemm_k_global_split = static_cast(gks); } @@ -795,11 +797,15 @@ bool PerformanceConfigAsmImplicitGemmGTCBwdXdlopsNHWC::IsValid( if(!((problem.IsFp16() && precision == "fp16") || (problem.IsFp32() && precision == "fp32") || (problem.IsBfp16() && precision == "bf16"))) + { return false; + } if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + { if(problem.IsFp16() && gemm_k_global_split != 0 && vector_store != 1) return false; + } const auto group = problem.GetGroupCount(); const int k = problem.GetInChannels_(); @@ -1018,10 +1024,12 @@ size_t ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::GetWorkspaceSize( } if(!problem.IsFp32()) + { size_tensor_cast = miopen::GetTypeSize(miopenFloat) // The intermediate output of the 1st // kernel is FP32, when using FP32 atomic * n * c * hi * wi; + } MultiBufferWorkspaceTraits wt( {size_trans_input, size_trans_weight, size_trans_output, size_tensor_cast}, buf_alignment); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index 32b50167cf..f395e7b290 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -1538,8 +1538,10 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext #if WORKAROUND_SWDEV_306318 if((problem.GetWeightsHeight_() == 1) && (problem.GetWeightsWidth_() == 1) && (problem.GetInChannels_() % 8 != 0)) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) return false; + } #endif const auto target = ctx.GetStream().GetTargetProperties(); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp index 4ab9ce1c37..9bc88dd10e 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp @@ -601,8 +601,10 @@ void PerformanceConfigAsmImplicitGemmGTCFwdXdlopsNHWC::HeuristicInit( { if(miopen::IsDisabled( MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + { if(problem.IsFp16() && gks > 0) vector_store = 1; + } if(gks > 0) gemm_k_global_split = static_cast(gks); } @@ -670,11 +672,15 @@ bool PerformanceConfigAsmImplicitGemmGTCFwdXdlopsNHWC::IsValid( if(!((problem.IsFp16() && precision == "fp16") || (problem.IsFp32() && precision == "fp32") || (problem.IsBfp16() && precision == "bf16"))) + { return false; + } if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + { if(problem.IsFp16() && gemm_k_global_split != 0 && vector_store != 1) return false; + } const int c = problem.GetInChannels_(); const int k = problem.GetOutChannels_(); @@ -836,10 +842,12 @@ size_t ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::GetWorkspaceSize( } if(!problem.IsFp32()) + { size_tensor_cast = miopen::GetTypeSize(miopenFloat) // The intermediate output of the 1st // kernel is FP32, when using FP32 atomic * n * k * ho * wo; + } MultiBufferWorkspaceTraits wt( {size_trans_input, size_trans_weight, size_trans_output, size_tensor_cast}, buf_alignment); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp index 8ac238395a..77256b15e7 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp @@ -755,12 +755,18 @@ bool PerformanceConfigAsmImplicitGemmGTCWrwXdlopsNHWC::IsValid( if(!((problem.IsFp16() && precision == "fp16") || (problem.IsFp32() && precision == "fp32") || (problem.IsBfp16() && precision == "bf16"))) + { return false; + } if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + { if(problem.IsFp16() && tensor_b_thread_lengths[3] != 1 && gemm_k_global_split != 0 && vector_store != 1) + { return false; + } + } const int k = problem.GetInChannels_(); const int c = problem.GetOutChannels_(); @@ -997,10 +1003,12 @@ size_t ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::GetWorkspaceSize( } if(!problem.IsFp32()) + { size_tensor_cast = miopen::GetTypeSize(miopenFloat) // The intermediate output of the 1st // kernel is FP32, when using FP32 atomic * (k / group) * c * y * x; + } MultiBufferWorkspaceTraits wt( {size_trans_input, size_trans_weight, size_trans_output, size_tensor_cast}, buf_alignment); @@ -1232,9 +1240,11 @@ ConvSolution ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::GetSolution( float zero = 0.f; if(workSpace == nullptr || workSpaceSize < required_workspace_size) + { MIOPEN_THROW("Not enough workspace has been provided for " "ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC with fp16 and atomic " "add."); + } auto trans_input_buf = trans_input_size == 0 ? null_buf diff --git a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp index 8e1450c7a3..604fb30bad 100644 --- a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp @@ -207,7 +207,9 @@ bool TunableImplicitGemmV4R1Dynamic::IsValid(const ExecutionContext& ctx, BPerBlock % InBlockCopyClusterLengths_B == 0 && KPerBlock % WeiBlockCopyClusterLengths_K == 0 && N1 % InBlockCopyClusterLengths_N1 == 0 && N2 % InBlockCopyClusterLengths_N2 == 0)) + { return false; + } // divide block work by [K, B] if(!(K % KPerBlock == 0 && B % BPerBlock == 0 && E % EPerBlock == 0)) @@ -242,7 +244,9 @@ bool TunableImplicitGemmV4R1Dynamic::IsValid(const ExecutionContext& ctx, if(block_size != InBlockCopyClusterLengths_E * InBlockCopyClusterLengths_N1 * InBlockCopyClusterLengths_B * InBlockCopyClusterLengths_N2) + { return false; + } if(block_size != WeiBlockCopyClusterLengths_K * WeiBlockCopyClusterLengths_E) return false; @@ -305,9 +309,7 @@ bool ConvAsmImplicitGemmV4R1DynamicFwd::IsApplicable(const ExecutionContext& ctx return false; if(!problem.IsLayoutDefault()) - { return false; - } const auto target = ctx.GetStream().GetTargetProperties(); if(target.Xnack() && *target.Xnack()) @@ -350,9 +352,7 @@ bool ConvAsmImplicitGemmV4R1DynamicFwd_1x1::IsApplicable(const ExecutionContext& return false; if(!problem.IsLayoutDefault()) - { return false; - } const auto target = ctx.GetStream().GetTargetProperties(); if(target.Xnack() && *target.Xnack()) @@ -401,7 +401,9 @@ static inline ConvSolution GetSolutionBase(const ExecutionContext& ctx, MIOPEN_LOG_I2(kernel.kernel_file + ":" + kernel.kernel_name); if(kernel_is_1x1) + { result.invoker_factory = conv::MakeImplGemmDynamicForward1x1InvokerFactory(problem); + } else { int packed_value = 0; @@ -421,9 +423,11 @@ ConvSolution ConvAsmImplicitGemmV4R1DynamicFwd::GetSolution(const ExecutionConte }); if(it == tunables.end()) + { MIOPEN_THROW( miopenStatusInternalError, "no solution found in igemm v4r1 dynamic fwd, should call IsApplicable() first."); + } return GetSolutionBase(ctx, problem, *it, AsmImplicitGemmV4R1); } @@ -438,9 +442,11 @@ ConvAsmImplicitGemmV4R1DynamicFwd_1x1::GetSolution(const ExecutionContext& ctx, }); if(it == tunables.end()) + { MIOPEN_THROW( miopenStatusInternalError, "no solution found in igemm v4r1 dynamic fwd 1x1, should call IsApplicable() first."); + } return GetSolutionBase(ctx, problem, *it, AsmImplicitGemmV4R1_1x1); } diff --git a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp index a5d056178d..2c9a033683 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp @@ -746,8 +746,10 @@ FindImplicitGemmWrwGTCDynamicXdlopsKernel(const ProblemDescription& problem) else { if(cfg.tensor_a_thread_lengths[2] * cfg.tensor_a_thread_lengths[3] > 1) + { if(gemm_m % gemm_m_per_block != 0) continue; + } } if(wo % cfg.tensor_b_thread_lengths[1] != 0) @@ -797,7 +799,9 @@ ConvAsmImplicitGemmGTCDynamicWrwXdlops::GetWorkspaceSize(const ExecutionContext& const ProblemDescription& problem) const { if(problem.IsFp32()) + { return 0; + } else { const int k = problem.GetInChannels_(); @@ -975,9 +979,11 @@ ConvAsmImplicitGemmGTCDynamicWrwXdlops::GetSolution(const ExecutionContext& ctx, float zero = 0.f; if(workSpace == nullptr || workSpaceSize < required_workspace_size) + { MIOPEN_THROW("Not enough workspace has been provided for " "ConvAsmImplicitGemmGTCDynamicWrwXdlops with fp16 and atomic " "add."); + } SetTensor(handle, workspaceDesc, workSpace, &zero); if(handle.IsProfilingEnabled()) diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index fb5f0caf7c..2edea4848a 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -306,8 +306,10 @@ bool ConvAsmImplicitGemmV4R1DynamicWrw::IsApplicable(const ExecutionContext& ctx return false; if(GetGemmkGroups(problem) > 0) // GetSolution() adds HIP kernels in this case. + { if(!ctx.use_hip_kernels) return false; + } if(!problem.direction.IsBackwardWrW()) return false; @@ -328,9 +330,7 @@ bool ConvAsmImplicitGemmV4R1DynamicWrw::IsApplicable(const ExecutionContext& ctx return false; if(!problem.IsLayoutDefault()) - { return false; - } const auto target = ctx.GetStream().GetTargetProperties(); if(target.Xnack() && *target.Xnack()) diff --git a/src/solver/conv_bin_winoRxS_fused.cpp b/src/solver/conv_bin_winoRxS_fused.cpp index f11edc368e..328a8813af 100644 --- a/src/solver/conv_bin_winoRxS_fused.cpp +++ b/src/solver/conv_bin_winoRxS_fused.cpp @@ -201,11 +201,17 @@ ConvSolution ConvBinWinogradRxSFused::GetSolution(const FusionContext& context, const int zero = 0; int flags = [&]() { if(bias_idx != -1 && activ_idx != -1) + { return (1 << 7) + (1 << 8); + } else if(bias_idx != -1) + { return (1 << 7); + } else + { return zero; + } }(); const miopenActivationMode_t activ_mode = [&]() { if(activ_idx != -1) diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index 001f3a8cb7..e74a5d49a3 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -90,7 +90,9 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ExecutionContext& ctx, #else if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #endif + { return false; + } if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index f87511f911..9800751c1d 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -124,6 +124,7 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) kernel_name << "naive_conv_nonpacked_"; } + // NOLINTBEGIN(*-braces-around-statements) if(problem.direction.IsForward()) kernel_name << "fwd_"; else if(problem.direction.IsBackwardData()) @@ -132,6 +133,7 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) kernel_name << "wrw_"; else MIOPEN_THROW("unsupported convolution direction"); + // NOLINTEND(*-braces-around-statements) if(problem.IsLayoutDefault()) { @@ -148,7 +150,9 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) kernel_name << "ndhwc_"; } else + { MIOPEN_THROW("unsupported tensor layout"); + } if(problem.IsFp8() || problem.IsTensorsCasted() || problem.IsBfp8()) { @@ -158,15 +162,25 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) return kernel_name.str(); } else if(IsInputFp32(problem)) + { kernel_name << "float_"; + } else if(IsInputFp16(problem)) + { kernel_name << "half_"; + } else if(IsInputBfp16(problem)) + { kernel_name << "ushort_"; + } else if(IsInputInt8(problem)) + { kernel_name << "int8_t_"; + } else + { MIOPEN_THROW("unsupported data type:"); + } if(IsAccInt32(problem)) kernel_name << "int32_t_"; @@ -175,6 +189,7 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) else MIOPEN_THROW("unsupported data type:"); + // NOLINTBEGIN(*-braces-around-statements) if(IsOutputFp32(problem)) kernel_name << "float"; else if(IsOutputFp16(problem)) @@ -187,6 +202,7 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) kernel_name << "int32_t"; else MIOPEN_THROW("unsupported data type:"); + // NOLINTEND(*-braces-around-statements) return kernel_name.str(); } diff --git a/src/solver/conv_direct_naive_conv_bwd.cpp b/src/solver/conv_direct_naive_conv_bwd.cpp index 1e8f006ef0..d34406d398 100644 --- a/src/solver/conv_direct_naive_conv_bwd.cpp +++ b/src/solver/conv_direct_naive_conv_bwd.cpp @@ -118,7 +118,9 @@ ConvSolution ConvDirectNaiveConvBwd::GetSolution(const ExecutionContext& ctx, grid_size = static_cast(group) * n * di; } else + { MIOPEN_THROW("Unsupported layout"); + } KernelInfo kernel; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index b2b591b859..de9e9cd5a5 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -611,7 +611,9 @@ size_t ConvHipImplicitGemmBwdDataV1R1::GetWorkspaceSize(const ExecutionContext&, const ProblemDescription& problem) const { if(problem.IsFp32()) + { return 0; + } else { // In case of fp16/bfp16, because there is no atomic add ISA, @@ -658,8 +660,10 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, return false; #if WORKAROUND_ISSUE_309 if(problem.IsBfp16()) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) return false; + } #endif const auto k = ProblemInterpreter::GetOutputChannelK(problem); diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index f657fa74fe..68f61d3b13 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -721,7 +721,9 @@ ConvHipImplicitGemmBwdDataV1R1Xdlops::GetWorkspaceSize(const ExecutionContext&, const ProblemDescription& problem) const { if(problem.IsFp32()) + { return 0; + } else { const auto y = ProblemInterpreter::GetFilterHeightY(problem); diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 39e8c71c16..d679f07bfb 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -292,10 +292,14 @@ ConvHipImplicitGemmV4R1Fwd::GetSolution(const ExecutionContext& ctx, // Since C/EPack are not in contiguous memory along with Y*X, vector length // can' be more than Y*X if(KernelFilterHeightY(problem) * KernelFilterWidthX(problem) >= WeiBlockCopySubLengths_E) + { WeiBlockCopySrcDataPerRead_E = GetReadWriteVectorSize(WeiBlockCopySubLengths_E); + } else + { WeiBlockCopySrcDataPerRead_E = GetReadWriteVectorSize( static_cast(KernelFilterHeightY(problem) * KernelFilterWidthX(problem))); + } } const auto& InBlockCopySubLengths_B = b_per_block / config.InBlockCopyClusterLengths_B; diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp index d25ca1b68b..ce1f6a328e 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp @@ -368,8 +368,10 @@ PerformanceImplicitGemmForwardV4R4Xdlops_Padded_Gemm::CalculateGemmABlockCopyPer // GemmKPack is src vector read dimension, bounded by GemmKPack SrcDataPerRead_GemmKPack = gcd(SrcDataPerRead_GemmKPack, GemmKPack); if(gemm_k_extra != 0) + { SrcDataPerRead_GemmKPack = gcd(SrcDataPerRead_GemmKPack, gemm_k_after_padding - gemm_k_extra); + } // calculate threadwise copy size auto data_per_thread_copy = diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp index 5a42ba3255..0716f101d3 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp @@ -1113,7 +1113,9 @@ ConvHipImplicitGemmWrwV4R4Xdlops::GetWorkspaceSize(const ExecutionContext&, const ProblemDescription& problem) const { if(problem.IsFp32()) + { return 0; + } else { const auto k = ProblemInterpreter::GetOutputChannelK(problem); diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp index abd178dcca..151e763713 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp @@ -1200,7 +1200,9 @@ std::size_t ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm::GetWorkspaceSize( const ExecutionContext&, const ProblemDescription& problem) const { if(problem.IsFp32()) + { return 0; + } else { const auto k = ProblemInterpreter::GetOutputChannelK(problem); diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 233489c4fc..44f56bd38d 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -380,54 +380,82 @@ bool ConvWinograd3x3MultipassWrW static const int wino_filter_tile = std::max(WinoFilterH, WinoFilterW); if(wino_data_tile == 3 && wino_filter_tile == 2) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2{}) || problem.GetKernelStrideH() == 1) + { return false; + } + } if(wino_data_tile == 3 && wino_filter_tile == 3) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3{}) || problem.GetKernelStrideH() == 1) + { return false; + } + } const std::string name = ctx.GetStream().GetDeviceName(); #if WORKAROUND_SWDEV_234193 if(problem.IsFp16() && (StartsWith(name, "gfx908") || StartsWith(name, "gfx906"))) { if(wino_data_tile == 3 && wino_filter_tile == 4) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) return false; + } if(wino_data_tile == 3 && wino_filter_tile == 5) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) return false; + } if(wino_data_tile == 3 && wino_filter_tile == 6) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) return false; + } } else #endif { if(wino_data_tile == 3 && wino_filter_tile == 4) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) return false; + } if(wino_data_tile == 3 && wino_filter_tile == 5) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) return false; + } if(wino_data_tile == 3 && wino_filter_tile == 6) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) return false; + } } if(wino_data_tile == 7 && wino_filter_tile == 2) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2{})) return false; + } if(wino_data_tile == 7 && wino_filter_tile == 3) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3{})) return false; + } if(wino_data_tile == 5 && wino_filter_tile == 3) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3{})) return false; + } if(wino_data_tile == 5 && wino_filter_tile == 4) + { if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4{})) return false; + } if(!ctx.use_asm_kernels) return false; if(!ctx.rmv.IsV2orV3()) diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index 4e0cda8629..8ade9bb666 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -45,8 +45,10 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_266868 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) return false; + } #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) return false; @@ -63,9 +65,7 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, if(!(problem.IsFp32() || problem.IsFp16() || problem.IsBfp16())) return false; if(!problem.IsLayoutDefault()) - { return false; - } if(problem.IsTensorsCasted()) return false; diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 2b400909f8..6664e46a5c 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -424,6 +424,7 @@ void PerformanceConfigConvOclBwdWrw2::HeuristicInit( n_waves = 1; read_size = 6; const auto n_output_channels_per_group = problem.GetInChannels_() / problem.GetGroupCount(); + // NOLINTBEGIN(*-braces-around-statements) if(n_output_channels_per_group % 4 == 0) n_out_channels_per_tile = 4; else if(n_output_channels_per_group % 3 == 0) @@ -432,6 +433,7 @@ void PerformanceConfigConvOclBwdWrw2::HeuristicInit( n_out_channels_per_tile = 2; else n_out_channels_per_tile = 1; + // NOLINTEND(*-braces-around-statements) n_out_channels_tiles = 1; n_out_rows_in_lcl = problem.GetWeightsHeight_(); } diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index b21effc0b3..0c50a6cf37 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -44,8 +44,10 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_271887 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) + { if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) return false; + } #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -65,9 +67,7 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, if(problem.IsTensorsCasted()) return false; if(!problem.IsLayoutDefault()) - { return false; - } return problem.GetDilationW() == 1 && problem.GetDilationH() == 1 && problem.GetWeightsWidth_() == 1 && problem.GetWeightsHeight_() == 1 && diff --git a/src/solver/conv_ocl_dir2Dfwd_exhaustive_search.cpp b/src/solver/conv_ocl_dir2Dfwd_exhaustive_search.cpp index 12ebf5bb05..e51c4b33cb 100644 --- a/src/solver/conv_ocl_dir2Dfwd_exhaustive_search.cpp +++ b/src/solver/conv_ocl_dir2Dfwd_exhaustive_search.cpp @@ -216,11 +216,17 @@ ConvOclDirectFwdLegacyExhaustiveSearch::Search(const ExecutionContext& ctx, const AnyInvokeParams& invoke_ctx) const { if(problem.IsFp16()) + { return SearchImpl(ctx, problem, invoke_ctx); + } else if(problem.IsFp32()) + { return SearchImpl(ctx, problem, invoke_ctx); + } else if(problem.IsBfp16()) + { return SearchImpl(ctx, problem, invoke_ctx); + } else { MIOPEN_THROW("Unsupported float_size"); diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index d9cbeb713f..1c222930b4 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -762,10 +762,12 @@ ConvSolution ConvBinWinoRxS::GetSolution( if(!IsWarned) { if(ctx.GetStream().GetMaxHardwareComputeUnits() > MAX_CU_LIMIT) + { MIOPEN_LOG_WE(SolverDbId() << ": GPU has " << ctx.GetStream().GetMaxHardwareComputeUnits() << "CUs, but this solver supports max " << MAX_CU_LIMIT << "and thus may show sub-optimal performance."); + } IsWarned = true; } diff --git a/src/solver/conv_wino_fury_RxS.cpp b/src/solver/conv_wino_fury_RxS.cpp index 89f870e35e..802235c38c 100644 --- a/src/solver/conv_wino_fury_RxS.cpp +++ b/src/solver/conv_wino_fury_RxS.cpp @@ -212,10 +212,12 @@ ConvWinoFuryRxS::GetSolution(const ExecutionContext& ctx, if(!IsWarned) { if(ctx.GetStream().GetMaxHardwareComputeUnits() > max_cu_limit) + { MIOPEN_LOG_WE(SolverDbId() << ": GPU has " << ctx.GetStream().GetMaxHardwareComputeUnits() << "CUs, but this solver supports max " << max_cu_limit << "and thus may show sub-optimal performance."); + } IsWarned = true; } diff --git a/src/solver/fft.cpp b/src/solver/fft.cpp index 9a3c7858cc..e122b7977d 100644 --- a/src/solver/fft.cpp +++ b/src/solver/fft.cpp @@ -317,9 +317,13 @@ ConvSolution fft::GetSolution(const ExecutionContext& ctx, const ProblemDescript if(((in_h == 28) && (in_w == 28)) || ((in_h == 14) && (in_w == 14)) || ((in_h == 7) && (in_w == 7))) + { cgemm_choice = 2; + } else if((in_h == 27) && (in_w == 27)) + { cgemm_choice = 1; + } if((in_n < 16) || (in_c < 16) || (out_c < 16)) cgemm_choice = 0; @@ -343,13 +347,21 @@ ConvSolution fft::GetSolution(const ExecutionContext& ctx, const ProblemDescript } if((in_h == 28) && (in_w == 28)) + { parms += " -DCFF_IMG_SZ_28_28"; + } else if((in_h == 27) && (in_w == 27)) + { parms += " -DCFF_IMG_SZ_27_27"; + } else if((in_h == 14) && (in_w == 14)) + { parms += " -DCFF_IMG_SZ_14_14"; + } else if((in_h == 7) && (in_w == 7)) + { parms += " -DCFF_IMG_SZ_7_7"; + } const auto workSpaceSize = GetWorkspaceSize(ctx, problem); @@ -428,9 +440,11 @@ ConvSolution fft::GetSolution(const ExecutionContext& ctx, const ProblemDescript const auto& tensors = params.tensors; if(params.workSpaceSize < workSpaceSize) + { MIOPEN_THROW("Not enough workspace for FFT: expected " + std::to_string(workSpaceSize) + ", got " + std::to_string(params.workSpaceSize)); + } float time_fft = 0; int kernel_id = 0; diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 2cfbd257dc..60a06097a8 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -362,8 +362,10 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, const auto y = conv_params.tensors.out; if((workSpace == nullptr && workspace_req > 0) || workSpaceSize < workspace_req) + { MIOPEN_THROW("Not enough workspace for GEMM (" + std::to_string(workSpaceSize) + " provided, " + std::to_string(workspace_req) + " required)"); + } const std::string name = group_count > 1 ? "groupconv" : "convolution"; MIOPEN_LOG_FUNCTION(name + ", 1x1 u2xv2"); @@ -647,8 +649,10 @@ ConvSolution GemmFwd1x1_0_1_int8::GetSolution(const ExecutionContext& context, MIOPEN_LOG_FUNCTION("convolution, 1x1"); if((workSpace == nullptr && workspace_req > 0) || workSpaceSize < workspace_req) + { MIOPEN_THROW("Not enough workspace for GEMM (" + std::to_string(workSpaceSize) + " provided, " + std::to_string(workspace_req) + " required)"); + } // y = w * x miopenStatus_t gemm_status = miopenStatusNotInitialized; @@ -1143,9 +1147,11 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, MIOPEN_LOG_FUNCTION(name + ", non 1x1"); if((workSpace == nullptr && workspace_req > 0) || workSpaceSize < workspace_req) + { MIOPEN_THROW("Not enough workspace for GemmFwdRest (" + std::to_string(workSpaceSize) + " provided, " + std::to_string(workspace_req) + " required)"); + } const auto runs = conv_params.type == InvokeType::Run ? in_n : 1; @@ -1213,6 +1219,7 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, else { if(conv.group_count > 1) + { gemm_status = CallGemmStridedBatched(handle, gemm_desc, w, @@ -1222,7 +1229,9 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, y, out_offset, GemmBackend_t::rocblas); + } else + { gemm_status = CallGemm(handle, gemm_desc, w, @@ -1232,6 +1241,7 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, y, out_offset, GemmBackend_t::rocblas); + } } if(gemm_status != miopenStatusSuccess) diff --git a/src/solver/gemm_bwd.cpp b/src/solver/gemm_bwd.cpp index df7d08304b..1a1074ed76 100644 --- a/src/solver/gemm_bwd.cpp +++ b/src/solver/gemm_bwd.cpp @@ -334,9 +334,11 @@ ConvSolution GemmBwd1x1_stride2::GetSolution(const ExecutionContext& context, } if((workspace_req > 0 && workspace == nullptr) || workspace_size < workspace_req) + { MIOPEN_THROW("Not enough workspace for GemmBwd1x1_stride2. (" + std::to_string(workspace_size) + " < " + std::to_string(workspace_req) + ")"); + } // Initialization required for upsampling in bwd direction float zero = 0.f; @@ -375,6 +377,7 @@ ConvSolution GemmBwd1x1_stride2::GetSolution(const ExecutionContext& context, if(conv_params.type == InvokeType::Run) { if(group_count > 1) + { gemm_status = CallGemmStridedBatched(handle, gemm_desc, w, @@ -384,7 +387,9 @@ ConvSolution GemmBwd1x1_stride2::GetSolution(const ExecutionContext& context, workspace, dyDesc_.GetElementSize(), GemmBackend_t::rocblas); + } else + { // tensors.dx = CNHW2NCHW(transpose(tensors.w) * NCHW2CNHW(tensors.dy)) gemm_status = CallGemm(handle, gemm_desc, @@ -395,6 +400,7 @@ ConvSolution GemmBwd1x1_stride2::GetSolution(const ExecutionContext& context, workspace, dyDesc_.GetElementSize(), GemmBackend_t::rocblas); + } } else { @@ -774,9 +780,11 @@ ConvSolution GemmBwdRest::GetSolution(const ExecutionContext& context, } if((workspace_req > 0 && workspace == nullptr) || workspace_size < workspace_req) + { MIOPEN_THROW("Not enough workspace for GemmBwdRest. (" + std::to_string(workspace_size) + " < " + std::to_string(workspace_req) + ")"); + } const auto gemm_desc = [&]() { auto tmp = tmp_gemm_desc; @@ -796,6 +804,7 @@ ConvSolution GemmBwdRest::GetSolution(const ExecutionContext& context, // tensors.dx = transpose(tensors.w) * tensors.dy if(group_count > 1) + { gemm_status = CallGemmStridedBatched(handle, gemm_desc, w, @@ -805,7 +814,9 @@ ConvSolution GemmBwdRest::GetSolution(const ExecutionContext& context, workspace, 0, GemmBackend_t::rocblas); + } else + { gemm_status = CallGemm(handle, gemm_desc, w, @@ -815,6 +826,7 @@ ConvSolution GemmBwdRest::GetSolution(const ExecutionContext& context, workspace, 0, GemmBackend_t::rocblas); + } if(gemm_status != miopenStatusSuccess) MIOPEN_THROW("GemmBwdRest execution failure."); diff --git a/src/sqlite_db.cpp b/src/sqlite_db.cpp index 5b42ea13a6..dd4c04b82b 100644 --- a/src/sqlite_db.cpp +++ b/src/sqlite_db.cpp @@ -252,7 +252,9 @@ int SQLite::Retry(std::function f, [[maybe_unused]] std::string filename) { ++tries; if(tries < 10) + { std::this_thread::yield(); + } else { auto slot = *exp_bo; diff --git a/src/tensor.cpp b/src/tensor.cpp index dbb3a10a34..61d10ec852 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -462,8 +462,10 @@ std::ostream& operator<<(std::ostream& stream, const TensorDescriptor& t) LogRange(stream << "{", t.lens, ", ") << "}, "; LogRange(stream << "{", t.strides, ", ") << "}, "; if(t.packed) + { stream << "packed" << ", "; + } if(t.cast_type) { diff --git a/test/cbna_inference.cpp b/test/cbna_inference.cpp index f4a732ff96..b941b68106 100644 --- a/test/cbna_inference.cpp +++ b/test/cbna_inference.cpp @@ -217,6 +217,7 @@ struct verify_forward_conv_bias_batchnorm_activ void fail(float = 0) const { if(bias_mode) + { if(doactive) { std::cerr << "Conv+Bias+BatchNorm+Activation Inference:" << std::endl; @@ -225,6 +226,7 @@ struct verify_forward_conv_bias_batchnorm_activ { std::cerr << "Conv+Bias+BatchNorm Inference:" << std::endl; } + } else { if(doactive) @@ -448,6 +450,7 @@ struct cbna_fusion_driver : test_driver if(miopenError != miopenStatusSuccess) { if(bias_mode) + { if(tactiv) { std::cerr << "Conv+Bias+BatchNorm+Activation Inference plan not supported." @@ -457,6 +460,7 @@ struct cbna_fusion_driver : test_driver { std::cerr << "Conv+Bias+BatchNorm Inference plan not supported." << std::endl; } + } else { if(tactiv) diff --git a/test/conv_common.hpp b/test/conv_common.hpp index a3c2e97463..f17d18cade 100644 --- a/test/conv_common.hpp +++ b/test/conv_common.hpp @@ -163,13 +163,21 @@ StringToLayoutType(std::string layout_str, int tensor_vect, int vector_length) if(tensor_vect == 0) { if(layout_str == "NCHW") + { return miopenTensorNCHW; + } else if(layout_str == "NHWC") + { return miopenTensorNHWC; + } else if(layout_str == "NDHWC") + { return miopenTensorNDHWC; + } else if(layout_str == "NCDHW") + { return miopenTensorNCDHW; + } else { MIOPEN_THROW("Non-vectorized tensor only support layout NCHW, NHWC, NCDHW and NDHWC"); @@ -301,8 +309,10 @@ struct conv_base if(preallocate) { for(auto i = 0; i < 3; ++i) + { miopenSetFindOptionPreallocatedTensor( options.get(), arguments[i].id, arguments[i].buffer); + } } EXPECT_EQUAL( @@ -571,7 +581,9 @@ struct verify_forward_conv : conv_base bool is_vect_c = weights.desc.GetVectorLength() > 1; rout.par_for_each([&](auto... is) { if(is_int8 && !is_vect_c) + { rout(is...) = Tout(double(rout(is...)) + double(this->bias)); + } else if(is_vect_c) { for(std::size_t i = 0; i < weights.desc.GetVectorLength(); i++) @@ -680,8 +692,10 @@ struct verify_forward_conv : conv_base const std::size_t ws_size = filter.GetBackwardSolutionWorkspaceSize( handle, input.desc, weights.desc, rout.desc, selected.solution_id); if(ws_size != selected.workspace_size) + { std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size << " != " << ws_size << std::endl; + } } resize_workspace(handle, selected.workspace_size, ws, ws_dev); @@ -747,8 +761,10 @@ struct verify_forward_conv : conv_base const std::size_t ws_size = filter.GetForwardSolutionWorkspaceSize( handle, weights.desc, input.desc, rout.desc, selected.solution_id); if(ws_size != selected.workspace_size) + { std::cout << "WARNING: workspace size mismatch: " << selected.workspace_size << " != " << ws_size << std::endl; + } } resize_workspace(handle, selected.workspace_size, ws, ws_dev); @@ -2033,7 +2049,6 @@ struct conv_driver : test_driver void run() { - if(!input_dims.empty()) filter.spatialDim = get_spatial_dim(); else diff --git a/test/cpu_conv.hpp b/test/cpu_conv.hpp index f9001ef667..cf2427b0d4 100644 --- a/test/cpu_conv.hpp +++ b/test/cpu_conv.hpp @@ -182,10 +182,14 @@ void cpu_convolution_forward_impl(const tensor& in, }); }); if(vector_len > 1) + { out(out_k_id % vector_len, out_n_id, out_k_id / vector_len, out_spatial_id_pack...) = static_cast(acc); + } else + { out(out_n_id, out_k_id, out_spatial_id_pack...) = static_cast(acc); + } }); } diff --git a/test/cpu_reduce_util.hpp b/test/cpu_reduce_util.hpp index 99be88449d..aa258e51d8 100644 --- a/test/cpu_reduce_util.hpp +++ b/test/cpu_reduce_util.hpp @@ -158,7 +158,9 @@ ReduceOpFn2(miopenReduceTensorOp_t op_) changed = true; } else + { changed = false; + } }); case MIOPEN_REDUCE_TENSOR_MAX: @@ -170,7 +172,9 @@ ReduceOpFn2(miopenReduceTensorOp_t op_) changed = true; } else + { changed = false; + } }); case MIOPEN_REDUCE_TENSOR_ADD: @@ -215,7 +219,9 @@ static inline void binop_with_nan_check(miopenNanPropagation_t nanOpt, using std::isnan; if(nanOpt == MIOPEN_NOT_PROPAGATE_NAN) + { opReduce(accuVal, currVal); + } else { if(isnan(currVal)) @@ -288,6 +294,7 @@ get_all_indexes(const std::vector& dimLengths, int dim, std::vector& dimLengths, int dim, std::vector& probsDesc, T alpha_t1s = alpha[aidx_t1s]; T alpha_ts = i == 0 ? alpha_t1s : logaddexp_cpu(&alpha_t1s, &alpha_t1s1); if(i >= 2) + { if(lb_cur != blank_lb && lb_cur != lb_pre) alpha_ts = logaddexp_cpu(&alpha_ts, &alpha_t1s2); + } alpha_ts += probs_logits[pidx]; alpha[aidx_ts] = std::max(alpha_ts, T(NEGATIVE_CUTOFF_VAL)); @@ -237,12 +239,18 @@ void ctc_gradient_cpu(std::vector& probsDesc, T beta_temp = j % 2 == 0 ? beta_buff1[k1] : beta_buff0[k1]; if(k1 <= label_prime_len - 2) + { beta_temp = logaddexp_cpu( &beta_temp, j % 2 == 0 ? &(beta_buff1[k1 + 1]) : &(beta_buff0[k1 + 1])); + } if(k1 <= label_prime_len - 3) + { if(lb_cur != blank_lb && lb_cur != lb_pre) + { beta_temp = logaddexp_cpu( &beta_temp, j % 2 == 0 ? &(beta_buff1[k1 + 2]) : &(beta_buff0[k1 + 2])); + } + } beta_temp += probs_logits[pidx]; beta_temp = std::max(beta_temp, T(NEGATIVE_CUTOFF_VAL)); @@ -306,14 +314,20 @@ void launchCTCLoss(const int class_sz, Tref(NEGATIVE_CUTOFF_VAL)); if(is_softmax_applied) + { for(int j = 0; j < max_time_step * batch_size; j++) + { subvec_logsoftmax_cpu(&(probs[0]), &(workspace_cpu[problog_offset]), j * class_sz, j * class_sz, class_sz); + } + } else + { std::copy(probs.begin(), probs.end(), workspace_cpu.begin() + problog_offset); + } for(int j = 0; j < batch_size; j++) { @@ -402,8 +416,10 @@ void VerifyCTCLoss(std::vector& probsDesc, return; } if(j > 0) + { if(labels[labels_offset[i] + j] == labels[labels_offset[i] + j - 1]) repeat[i]++; + } } if(labelLengths[i] + repeat[i] > inputLengths[i]) @@ -501,8 +517,10 @@ void GetCTCLossWorkspaceSizeCPU(std::vector probsDesc, return; } if(j > 0) + { if(labels[labels_offset[i] + j] == labels[labels_offset[i] + j - 1]) repeat[i]++; + } } if(labelLengths[i] + repeat[i] > inputLengths[i]) @@ -727,8 +745,10 @@ struct ctc_driver : test_driver labelLengths[i] = prng::gen_A_to_B(1, labelLen - 1); for(int i = 0; i < batchSize; i++) + { if(inputLengths[i] < labelLengths[i] * 2 + 1) inputLengths[i] = labelLengths[i] * 2 + 1; + } int batch_sz = batchSize; int class_sz = numClass + 1; diff --git a/test/driver.hpp b/test/driver.hpp index cd1bb4cbe2..01f9acd85d 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -500,7 +500,9 @@ struct test_driver return dims; } else + { return {single}; + } }}; } @@ -548,10 +550,14 @@ struct test_driver return subvec; } else + { return dims; + } } else + { return {dims.front()}; + } }}; } @@ -670,13 +676,17 @@ struct test_driver auto cpu_nan_idx = find_idx(out_cpu, miopen::not_finite); if(cpu_nan_idx >= 0) + { std::cout << "Non finite number found in cpu at " << cpu_nan_idx << ": " << out_cpu[cpu_nan_idx] << std::endl; + } auto gpu_nan_idx = find_idx(out_gpu, miopen::not_finite); if(gpu_nan_idx >= 0) + { std::cout << "Non finite number found in gpu at " << gpu_nan_idx << ": " << out_gpu[gpu_nan_idx] << std::endl; + } } else if(miopen::range_zero(out_cpu) and miopen::range_zero(out_gpu)) { @@ -711,10 +721,12 @@ struct test_driver [&](auto i) { // cppcheck-suppress knownConditionTrueFalse if(continue_) + { continue_ = this->compare_and_report( std::get(out_cpu), std::get(out_gpu), compare, report, [&](int) { return fail(i); }); + } }, is...); return continue_; @@ -957,10 +969,12 @@ void run_data(Iterator start, Iterator last, Action a) run_data(std::next(start), last, a); } else + { for(auto&& src : sources) { src([=] { run_data(std::next(start), last, a); }); } + } } struct keyword_set diff --git a/test/dropout.cpp b/test/dropout.cpp index 49522e1bd1..b22d8d9207 100644 --- a/test/dropout.cpp +++ b/test/dropout.cpp @@ -311,8 +311,10 @@ struct dropout_driver : test_driver if(mask) { for(size_t i = 0; i < in.desc.GetElementSize(); i++) + { reserveSpace[i] = static_cast(prng::gen_canonical() > dropout_rate); + } } DropoutDesc.dropout = dropout_rate; diff --git a/test/dropout_util.hpp b/test/dropout_util.hpp index 1505c77288..33a73f590f 100644 --- a/test/dropout_util.hpp +++ b/test/dropout_util.hpp @@ -309,9 +309,13 @@ void DropoutForwardVerify(miopen::Handle& handle, 256; for(size_t i0 = 0; i0 < in_len[0]; i0++) + { for(size_t i1 = 0; i1 < in_len[1]; i1++) + { for(size_t i2 = 0; i2 < in_len[2]; i2++) + { for(size_t i3 = 0; i3 < in_len[3]; i3++) + { for(size_t i4 = 0; i4 < in_len[4]; i4++) { size_t oi = out_offset + i0 * out_str[0] + i1 * out_str[1] + @@ -324,15 +328,21 @@ void DropoutForwardVerify(miopen::Handle& handle, size_t ri = rsvsp_offset + si; if(!use_mask) + { reservespace[ri] = uniform_distribution_emu(xorwow_next(&states[si % glb_sz])) > dropout_rate; + } output[oi] = bool(reservespace[ri]) && !miopen::float_equal(dropout_rate, 1.0) ? static_cast(input[ii] / (1 - dropout_rate)) : T(0); } + } + } + } + } } template diff --git a/test/gpu_nchw_nhwc_transpose.cpp b/test/gpu_nchw_nhwc_transpose.cpp index d12f0162f1..5f81bc64a6 100644 --- a/test/gpu_nchw_nhwc_transpose.cpp +++ b/test/gpu_nchw_nhwc_transpose.cpp @@ -140,18 +140,19 @@ enum tensor_layout_t std::string tensor_layout_to_string(tensor_layout_t layout) { - std::string layout_string("N/A"); - if(layout == miopen_tensor_layout_nchw) - layout_string = "NCHW"; - else if(layout == miopen_tensor_layout_ncdhw) - layout_string = "NCDHW"; - else if(layout == miopen_tensor_layout_nhwc) - layout_string = "NHWC"; - else if(layout == miopen_tensor_layout_ndhwc) - layout_string = "NDHWC"; - else + switch(layout) + { + case miopen_tensor_layout_nchw: + return "NCHW"; + case miopen_tensor_layout_ncdhw: + return "NCDHW"; + case miopen_tensor_layout_nhwc: + return "NHWC"; + case miopen_tensor_layout_ndhwc: + return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); - return layout_string; + } } template diff --git a/test/gpu_reference_kernel.cpp b/test/gpu_reference_kernel.cpp index a6a0b1e2bc..272c6c07a8 100644 --- a/test/gpu_reference_kernel.cpp +++ b/test/gpu_reference_kernel.cpp @@ -58,18 +58,19 @@ enum tensor_layout_t std::string tensor_layout_to_string(tensor_layout_t layout) { - std::string layout_string("N/A"); - if(layout == miopen_tensor_layout_nchw) - layout_string = "NCHW"; - else if(layout == miopen_tensor_layout_ncdhw) - layout_string = "NCDHW"; - else if(layout == miopen_tensor_layout_nhwc) - layout_string = "NHWC"; - else if(layout == miopen_tensor_layout_ndhwc) - layout_string = "NDHWC"; - else + switch(layout) + { + case miopen_tensor_layout_nchw: + return "NCHW"; + case miopen_tensor_layout_ncdhw: + return "NCDHW"; + case miopen_tensor_layout_nhwc: + return "NHWC"; + case miopen_tensor_layout_ndhwc: + return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); - return layout_string; + } } struct gpu_reference_kernel_base diff --git a/test/handle_test.cpp b/test/handle_test.cpp index 1ab319d6fb..2548a7ad4b 100644 --- a/test/handle_test.cpp +++ b/test/handle_test.cpp @@ -50,6 +50,7 @@ enum kernel_type_t std::string Write2s(kernel_type_t kern_type) { if(kern_type == miopenHIPKernelType) + { return "#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS\n" "#include \n" #if WORKAROUND_SWDEV_257056_PCH_MISSING_MACROS @@ -76,10 +77,15 @@ std::string Write2s(kernel_type_t kern_type) " data[num] *= 2;\n" "}\n" "}\n"; + } else if(kern_type == miopenOpenCLKernelType) + { return "__kernel void write(__global int* data) { data[get_global_id(0)] *= 2; }\n"; + } else + { MIOPEN_THROW("Unsupported kernel type"); + } } void run2s(miopen::Handle& h, std::size_t n, kernel_type_t kern_type) @@ -87,9 +93,12 @@ void run2s(miopen::Handle& h, std::size_t n, kernel_type_t kern_type) std::vector data_in(n, 1); auto data_dev = h.Write(data_in); if(kern_type == miopenOpenCLKernelType) + { h.AddKernel("GEMM", "", Write2s(miopenOpenCLKernelType), "write", {n, 1, 1}, {n, 1, 1}, "")( data_dev.get()); + } else if(kern_type == miopenHIPKernelType) + { h.AddKernel("NoAlgo", "", "test_hip.cpp", @@ -100,8 +109,11 @@ void run2s(miopen::Handle& h, std::size_t n, kernel_type_t kern_type) 0, false, Write2s(miopenHIPKernelType))(data_dev.get()); + } else + { MIOPEN_THROW("Unsupported kernel type"); + } std::fill(data_in.begin(), data_in.end(), 2); auto data_out = h.Read(data_dev, n); @@ -123,8 +135,11 @@ void test_multithreads(kernel_type_t kern_type, const bool with_stream = false) std::string WriteError(kernel_type_t kern_type) { if(kern_type == miopenOpenCLKernelType) + { return "__kernel void write(__global int* data) { data[i] = 0; }\n"; + } else if(kern_type == miopenHIPKernelType) + { return "#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS\n" "#include \n" "#endif\n" @@ -133,8 +148,11 @@ std::string WriteError(kernel_type_t kern_type) " data[num] *= 2;\n" "}\n" "}\n"; + } else + { MIOPEN_THROW("Unsupported kernel type"); + } } void test_errors(kernel_type_t kern_type) @@ -191,8 +209,11 @@ void test_errors(kernel_type_t kern_type) std::string WriteNop(kernel_type_t kern_type) { if(kern_type == miopenOpenCLKernelType) + { return "__kernel void write(__global int* data) {}\n"; + } else if(kern_type == miopenHIPKernelType) + { return "#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS\n" "#include \n" "#endif\n" @@ -200,8 +221,11 @@ std::string WriteNop(kernel_type_t kern_type) "__global__ void write(int* data) {\n" "}\n" "}\n"; + } else + { MIOPEN_THROW("Unsupported kernel type"); + } } void test_warnings(kernel_type_t kern_type) @@ -209,11 +233,14 @@ void test_warnings(kernel_type_t kern_type) auto&& h = get_handle(); #if MIOPEN_BUILD_DEV if(kern_type == miopenOpenCLKernelType) + { EXPECT(throws([&] { h.AddKernel("GEMM", "", WriteNop(kern_type), "write", {1, 1, 1}, {1, 1, 1}, ""); MIOPEN_LOG_E("FAILED: Build of the OpenCL kernel should produce warnings"); })); + } else if(kern_type == miopenHIPKernelType) + { EXPECT(throws([&] { h.AddKernel("NoAlgo", "", @@ -227,6 +254,7 @@ void test_warnings(kernel_type_t kern_type) WriteNop(kern_type)); MIOPEN_LOG_E("FAILED: Build of the HIP kernel 'nop_hip.cpp' should produce warnings"); })); + } #else (void)kern_type; (void)h; // To silence warnings. diff --git a/test/na_inference.cpp b/test/na_inference.cpp index 129c970a3f..ea76b26079 100644 --- a/test/na_inference.cpp +++ b/test/na_inference.cpp @@ -214,6 +214,7 @@ struct na_fusion_driver : test_driver { amode = transform_mode(amode); + // NOLINTBEGIN(*-braces-around-statements) if(amode == "PASSTHRU") activ_mode = miopenActivationPASTHRU; else if(amode == "LOGISTIC") @@ -234,6 +235,7 @@ struct na_fusion_driver : test_driver activ_mode = miopenActivationLEAKYRELU; else if(amode == "ELU") activ_mode = miopenActivationELU; + // NOLINTEND(*-braces-around-statements) int input_c, input_h, input_w; std::tie(std::ignore, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); diff --git a/test/na_train.cpp b/test/na_train.cpp index 24529058ed..c6d585964c 100644 --- a/test/na_train.cpp +++ b/test/na_train.cpp @@ -779,6 +779,7 @@ struct na_fusion_driver : test_driver { amode = transform_mode(amode); + // NOLINTBEGIN(*-braces-around-statements) if(amode == "PASSTHRU") activ_mode = miopenActivationPASTHRU; else if(amode == "LOGISTIC") @@ -799,6 +800,7 @@ struct na_fusion_driver : test_driver activ_mode = miopenActivationLEAKYRELU; else if(amode == "ELU") activ_mode = miopenActivationELU; + // NOLINTEND(*-braces-around-statements) std::size_t input_n, input_c, input_h, input_w; std::tie(input_n, input_c, input_h, input_w) = miopen::tien<4>(input.desc.GetLengths()); diff --git a/test/perfdb.cpp b/test/perfdb.cpp index 4cc0a7c790..198e81f0df 100644 --- a/test/perfdb.cpp +++ b/test/perfdb.cpp @@ -937,10 +937,12 @@ class DbMultiThreadedReadTest : public DbTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::ReadWorkItem(i, c, "mt"); }); + } } MIOPEN_LOG_CUSTOM(LoggingLevel::Default, "Test", "Waiting for test threads..."); @@ -983,8 +985,10 @@ class DbMultiProcessTest : public DbTest ArgsHelper::db_class_arg + " " + ArgsHelper::db_class::Get(); if(thread_logs_root()) + { command += std::string(" --") + ArgsHelper::logs_path_arg + " " + *thread_logs_root(); + } if(full_set()) command += " --all"; @@ -1067,8 +1071,10 @@ class DbMultiProcessReadTest : public DbTest ArgsHelper::db_class::Get(); if(thread_logs_root()) + { command += std::string(" --") + ArgsHelper::logs_path_arg + " " + *thread_logs_root(); + } if(full_set()) command += " --all"; @@ -1348,10 +1354,12 @@ class DbMultiFileMultiThreadedReadTest : public DbMultiFileTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::ReadWorkItem(i, c, "mt"); }); + } } MIOPEN_LOG_CUSTOM(LoggingLevel::Default, "Test", "Waiting for test threads..."); @@ -1389,10 +1397,12 @@ class DbMultiFileMultiThreadedTest : public DbMultiFileTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::WorkItem(i, c, "mt"); }); + } } MIOPEN_LOG_CUSTOM(LoggingLevel::Default, "Test", "Waiting for test threads..."); @@ -1438,10 +1448,14 @@ struct PerfDbDriver : test_driver if(mt_child_id >= 0) { if(mt_child_db_class == ArgsHelper::db_class::db) + { DbMultiProcessTest::WorkItem( mt_child_id, mt_child_db_path, test_write); + } else if(mt_child_db_class == ArgsHelper::db_class::ramdb) + { DbMultiProcessTest::WorkItem(mt_child_id, mt_child_db_path, test_write); + } return; } diff --git a/test/pooling_common.hpp b/test/pooling_common.hpp index 9058d34321..ae55b2e133 100644 --- a/test/pooling_common.hpp +++ b/test/pooling_common.hpp @@ -105,15 +105,15 @@ struct pooling_operators return (m); } else + { return x + y; + } } double final(double x, double y) { if(filter.GetMode() == miopenPoolingMax) - { return (x); - } else return x / y; } @@ -703,6 +703,7 @@ struct pooling_driver : test_driver } for(int i = 0; i < spt_dim; i++) + { if(lens[i] > (input_desc.GetLengths()[i + 2] + static_cast(2) * pads[i])) { show_command(); @@ -711,6 +712,7 @@ struct pooling_driver : test_driver << std::endl; return; } + } if(full_set) { diff --git a/test/reduce_test.cpp b/test/reduce_test.cpp index bb2aa17e07..3ca771fc64 100644 --- a/test/reduce_test.cpp +++ b/test/reduce_test.cpp @@ -202,10 +202,12 @@ struct verify_reduce_with_indices std::vector toReduceDims; for(int i = 0; i < inLengths.size(); i++) + { if(inLengths[i] == outLengths[i]) invariantDims.push_back(i); else toReduceDims.push_back(i); + } invariantLengths.resize(invariantDims.size()); for(int i = 0; i < invariantDims.size(); i++) @@ -246,7 +248,7 @@ struct verify_reduce_with_indices int currIndex = get_flatten_offset(inLengths, src_index); binop_with_nan_check2(nanOpt, opReduce, accuVal, currVal, accuIndex, currIndex); - }; + } // scale the accumulated value if(!float_equal_one(alpha)) @@ -256,7 +258,7 @@ struct verify_reduce_with_indices if(!float_equal_zero(beta)) { accuVal += convert_type(output.data[0]) * convert_type(beta); - }; + } // store the reduced value to dst location res.data[0] = convert_type(accuVal); @@ -317,8 +319,10 @@ struct verify_reduce_with_indices // scale the prior dst value and add it to the accumulated value if(!float_equal_zero(beta)) + { accuVal += convert_type(output.data[dst_offset]) * convert_type(beta); + } // store the reduced value to dst location res.data[dst_offset] = convert_type(accuVal); @@ -495,10 +499,12 @@ struct verify_reduce_no_indices std::vector toReduceDims; for(int i = 0; i < inLengths.size(); i++) + { if(inLengths[i] == outLengths[i]) invariantDims.push_back(i); else toReduceDims.push_back(i); + } invariantLengths.resize(invariantDims.size()); for(int i = 0; i < invariantDims.size(); i++) @@ -604,8 +610,10 @@ struct verify_reduce_no_indices // scale the prior dst value and add it to the accumulated value if(!float_equal_zero(beta)) + { accuVal += convert_type(output.data[dst_offset]) * convert_type(beta); + } // store the reduced value to dst location res.data[dst_offset] = convert_type(accuVal); @@ -715,13 +723,17 @@ struct reduce_driver : test_driver std::vector> get_tensor_lengths() { if(std::is_same::value) + { return { {4, 3, 60, 50}, }; + } else + { return { {64, 3, 280, 81}, }; + } } std::vector> get_toreduce_dims() @@ -758,9 +770,13 @@ struct reduce_driver : test_driver { if(reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX || reduceOp == MIOPEN_REDUCE_TENSOR_AMAX) + { compTypeVal = static_cast(miopenHalf); // let compType be same as the data type + } else + { compTypeVal = static_cast(miopenFloat); + } } miopen::ReduceTensorDescriptor reduceDesc( @@ -797,15 +813,21 @@ struct reduce_driver : test_driver uint64_t max_value; if(reduceOp == MIOPEN_REDUCE_TENSOR_MUL) + { max_value = miopen_type{} == miopenHalf ? 41 : miopen_type{} == miopenInt8 ? 127 : 111; + } else if(reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || reduceOp == MIOPEN_REDUCE_TENSOR_NORM2) + { max_value = 3; + } else + { max_value = miopen_type{} == miopenHalf ? 13 : miopen_type{} == miopenInt8 ? 127 : 999; + } // default data gneration (used by MIN/MAX) auto gen_value = [&](auto... is) { @@ -856,7 +878,9 @@ struct reduce_driver : test_driver if(reduceOp == MIOPEN_REDUCE_TENSOR_ADD || reduceOp == MIOPEN_REDUCE_TENSOR_AVG) this->tolerance = 80 * 10; if(reduceOp == MIOPEN_REDUCE_TENSOR_MUL) + { this->tolerance = 80 * 300; + } else if(reduceOp == MIOPEN_REDUCE_TENSOR_NORM1 || reduceOp == MIOPEN_REDUCE_TENSOR_NORM2) { if(toReduceDims.size() == 4) diff --git a/test/soft_max.cpp b/test/soft_max.cpp index d9f4573c02..d15c1f920a 100644 --- a/test/soft_max.cpp +++ b/test/soft_max.cpp @@ -96,6 +96,7 @@ struct verify_forward_sofmax miopen::tien<4>(out.desc.GetStrides()); if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE) + { par_ford(in_n)([&](int o) { if(algo == MIOPEN_SOFTMAX_FAST) { @@ -156,7 +157,9 @@ struct verify_forward_sofmax } } }); + } else + { par_ford(in_n, in_h, in_w)([&](int o, int i, int j) { if(algo == MIOPEN_SOFTMAX_FAST) { @@ -217,6 +220,7 @@ struct verify_forward_sofmax } } }); + } return out; } @@ -286,57 +290,77 @@ struct verify_backward_sofmax miopen::tien<4>(dout.desc.GetStrides()); if(mode == MIOPEN_SOFTMAX_MODE_INSTANCE) + { par_ford(in_n)([&](int o) { double sum = 0; ford(in_c, in_h, in_w)([&](int c, int i, int j) { if(algo == MIOPEN_SOFTMAX_LOG) + { sum += dout[o * out_nstr + c * out_cstr + i * out_hstr + j]; + } else + { sum += out[o * out_nstr + c * out_cstr + i * out_hstr + j] * dout[o * out_nstr + c * out_cstr + i * out_hstr + j]; + } }); ford(in_c, in_h, in_w)([&](int c, int i, int j) { if(algo == MIOPEN_SOFTMAX_LOG) + { din[o * in_nstr + c * in_cstr + i * in_hstr + j] = T(alpha * (dout[o * out_nstr + c * out_cstr + i * out_hstr + j] - sum * std::exp( out[o * out_nstr + c * out_cstr + i * out_hstr + j])) + beta * din[o * in_nstr + c * in_cstr + i * in_hstr + j]); + } else + { din[o * in_nstr + c * in_cstr + i * in_hstr + j] = alpha * (out[o * out_nstr + c * out_cstr + i * out_hstr + j] * (dout[o * out_nstr + c * out_cstr + i * out_hstr + j] - sum)) + beta * din[o * in_nstr + c * in_cstr + i * in_hstr + j]; + } }); }); + } else + { par_ford(in_n, in_h, in_w)([&](int o, int i, int j) { double sum = 0; ford(in_c)([&](int c) { if(algo == MIOPEN_SOFTMAX_LOG) + { sum += dout[o * out_nstr + c * out_cstr + i * out_hstr + j]; + } else + { sum += out[o * out_nstr + c * out_cstr + i * out_hstr + j] * dout[o * out_nstr + c * out_cstr + i * out_hstr + j]; + } }); ford(in_c)([&](int c) { if(algo == MIOPEN_SOFTMAX_LOG) + { din[o * in_nstr + c * in_cstr + i * in_hstr + j] = alpha * (dout[o * out_nstr + c * out_cstr + i * out_hstr + j] - sum * std::exp( out[o * out_nstr + c * out_cstr + i * out_hstr + j])) + beta * din[o * in_nstr + c * in_cstr + i * in_hstr + j]; + } else + { din[o * in_nstr + c * in_cstr + i * in_hstr + j] = alpha * (out[o * out_nstr + c * out_cstr + i * out_hstr + j] * (dout[o * out_nstr + c * out_cstr + i * out_hstr + j] - sum)) + beta * din[o * in_nstr + c * in_cstr + i * in_hstr + j]; + } }); }); + } return din; } diff --git a/test/sqlite_perfdb.cpp b/test/sqlite_perfdb.cpp index f435c10b35..4607c57d11 100644 --- a/test/sqlite_perfdb.cpp +++ b/test/sqlite_perfdb.cpp @@ -763,10 +763,12 @@ class DbMultiThreadedTest : public DbTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::WorkItem(i, c, "mt"); }); + } } std::cout << "Waiting for test threads..." << std::endl; @@ -800,10 +802,12 @@ class DbMultiThreadedReadTest : public DbTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::ReadWorkItem(i, c, "mt"); }); + } } std::cout << "Waiting for test threads..." << std::endl; @@ -843,8 +847,10 @@ class DbMultiProcessTest : public DbTest std::to_string(id++) + " --" + path_arg + " " + temp_file.Path(); if(thread_logs_root()) + { command += std::string(" --") + DbMultiThreadedTest::logs_path_arg + " " + *thread_logs_root(); + } if(full_set()) command += " --all"; @@ -920,8 +926,10 @@ class DbMultiProcessReadTest : public DbTest p; if(thread_logs_root()) + { command += std::string(" --") + DbMultiThreadedTest::logs_path_arg + " " + *thread_logs_root(); + } if(full_set()) command += " --all"; @@ -1183,10 +1191,12 @@ class DbMultiFileMultiThreadedReadTest : public DbMultiFileTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::ReadWorkItem(i, c, "mt"); }); + } } std::cout << "Waiting for test threads..." << std::endl; @@ -1221,10 +1231,12 @@ class DbMultiFileMultiThreadedTest : public DbMultiFileTest std::unique_lock lock(mutex); for(auto i = 0u; i < DBMultiThreadedTestWork::threads_count; i++) + { threads.emplace_back([c, &mutex, i]() { (void)std::unique_lock(mutex); DBMultiThreadedTestWork::WorkItem(i, c, "mt"); }); + } } std::cout << "Waiting for test threads..." << std::endl; diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index fcf3420320..2955f5e662 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -138,24 +138,26 @@ enum tensor_layout_t std::string tensor_layout_to_string(tensor_layout_t layout) { - std::string layout_string("N/A"); - if(layout == miopen_tensor_layout_nchw) - layout_string = "NCHW"; - else if(layout == miopen_tensor_layout_ncdhw) - layout_string = "NCDHW"; - else if(layout == miopen_tensor_layout_nhwc) - layout_string = "NHWC"; - else if(layout == miopen_tensor_layout_ndhwc) - layout_string = "NDHWC"; - else + switch(layout) + { + case miopen_tensor_layout_nchw: + return "NCHW"; + case miopen_tensor_layout_ncdhw: + return "NCDHW"; + case miopen_tensor_layout_nhwc: + return "NHWC"; + case miopen_tensor_layout_ndhwc: + return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); - return layout_string; + } } std::string supported_reorder_to_string(uint32_t order_0, uint32_t order_1, uint32_t order_2, uint32_t order_3) { std::string layout_string("N/A"); + // NOLINTBEGIN(*-braces-around-statements) if((order_0 == 0) && (order_1 == 1) && (order_2 == 3) && (order_3 == 2)) layout_string = "r0132"; else if((order_0 == 0) && (order_1 == 2) && (order_2 == 1) && (order_3 == 3)) @@ -204,6 +206,7 @@ supported_reorder_to_string(uint32_t order_0, uint32_t order_1, uint32_t order_2 layout_string = "r3210"; else MIOPEN_THROW("Unsupported reorder layout"); + // NOLINTEND(*-braces-around-statements) return layout_string; } diff --git a/test/tensor_trans.cpp b/test/tensor_trans.cpp index 6e9e533df0..df94e427fd 100644 --- a/test/tensor_trans.cpp +++ b/test/tensor_trans.cpp @@ -60,8 +60,11 @@ void tensor_trans(const tensor& src, int nhw_out = n * hw_out; for(int n_i = 0; n_i < n; n_i++) + { for(int c_i = 0; c_i < c; c_i++) + { for(int h_i = 0; h_i < (forward ? h_out : h_in); h_i++) + { for(int w_i = 0; w_i < (forward ? w_out : w_in); w_i++) { int in_offset = @@ -74,6 +77,9 @@ void tensor_trans(const tensor& src, dst.data[out_offset] = src.data[in_offset]; } + } + } + } } template @@ -124,6 +130,7 @@ struct verify_tensor_trans type = miopenHalf; if(forward) + { miopen::transpose_NCHW2CNHW(handle, n, c, @@ -138,7 +145,9 @@ struct verify_tensor_trans stride_h, stride_w, type); + } else + { miopen::transpose_CNHW2NCHW(handle, n, c, @@ -153,6 +162,7 @@ struct verify_tensor_trans stride_h, stride_w, type); + } r.data = handle.Read(dst_dev, dst.data.size()); return r; diff --git a/test/tensor_util.hpp b/test/tensor_util.hpp index d8a24872b4..c5650ca4f0 100644 --- a/test/tensor_util.hpp +++ b/test/tensor_util.hpp @@ -61,10 +61,14 @@ void operate_over_subtensor_impl(const data_operator_t& r_data_operator, for(int i = 0; i < rSubDesc.GetLengths()[current_dim]; ++i) { if(current_dim == max_dim) + { r_data_operator(rSuperTensor[index]); + } else + { operate_over_subtensor_impl( r_data_operator, rSuperTensor, rSubDesc, current_dim + 1, index); + } index += current_stride; } diff --git a/test/tensor_vec.cpp b/test/tensor_vec.cpp index 98e059c8eb..46d62e72ed 100644 --- a/test/tensor_vec.cpp +++ b/test/tensor_vec.cpp @@ -132,8 +132,10 @@ void tensor_vec_backward( int in_offset = c_i * in_nhw + n_hi_i * in_hw + h_i * in_w + w_i * vec_size + n_lo_i; if(n_i < n_dst) + { dst.data[out_offset] = T(alpha * float(src.data[in_offset]) + beta * float(dst.data[out_offset])); + } } else { @@ -143,8 +145,10 @@ void tensor_vec_backward( int in_offset = n_i * in_chw + c_hi_i * in_hw + h_i * in_w + w_i * vec_size + c_lo_i; if(c_i < c_dst) + { dst.data[out_offset] = T(alpha * float(src.data[in_offset]) + beta * float(dst.data[out_offset])); + } } } } @@ -340,13 +344,17 @@ struct tensor_vec_driver : test_driver } if(trans) + { dst_lens[0] = (dst_lens[0] % vec_size != 0) ? dst_lens[0] + (vec_size - dst_lens[0] % vec_size) : dst_lens[0]; + } else + { dst_lens[1] = (dst_lens[1] % vec_size != 0) ? dst_lens[1] + (vec_size - dst_lens[1] % vec_size) : dst_lens[1]; + } uint64_t max_value = miopen_type{} == miopenHalf ? 5 : miopen_type{} == miopenInt8 ? 127 From 8dd286e1a5ca61feeb455bf9c7ce030b98f4c01a Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:50:50 +0200 Subject: [PATCH 2/6] Fix reducetensor_api --- src/reducetensor_api.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 6551c3a803..d180b45711 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -45,6 +45,12 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, case miopenHalf: ss << "reducefp16"; break; + case miopenFloat: + ss << "reducefp"; + break; + case miopenInt32: + ss << "reduceint"; + break; case miopenInt8: ss << "reduceint8"; break; @@ -54,6 +60,14 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, case miopenDouble: ss << "reducefp64"; break; +#ifdef MIOPEN_BETA_API + case miopenFloat8: + ss << "reducefp8"; + break; + case miopenBFloat8: + ss << "reducebfp8"; + break; +#endif default: ss << "reduce"; } From 3b63d77b0901a2a664e5a5da11c274f26a61f308 Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:53:42 +0200 Subject: [PATCH 3/6] Fix formatting --- src/logger.cpp | 27 ++++++++--------------- src/reducetensor_api.cpp | 35 ++++++++--------------------- src/rnn_api.cpp | 38 +++++++++----------------------- test/gpu_nchw_nhwc_transpose.cpp | 15 +++++-------- test/gpu_reference_kernel.cpp | 15 +++++-------- test/tensor_reorder.cpp | 15 +++++-------- 6 files changed, 43 insertions(+), 102 deletions(-) diff --git a/src/logger.cpp b/src/logger.cpp index e39a43ea2f..9a82f1c1ac 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -153,24 +153,15 @@ const char* LoggingLevelToCString(const LoggingLevel level) { switch(level) { - case LoggingLevel::Default: - return "Default"; - case LoggingLevel::Quiet: - return "Quiet"; - case LoggingLevel::Fatal: - return "Fatal"; - case LoggingLevel::Error: - return "Error"; - case LoggingLevel::Warning: - return "Warning"; - case LoggingLevel::Info: - return "Info"; - case LoggingLevel::Info2: - return "Info2"; - case LoggingLevel::Trace: - return "Trace"; - default: - return ""; + case LoggingLevel::Default: return "Default"; + case LoggingLevel::Quiet: return "Quiet"; + case LoggingLevel::Fatal: return "Fatal"; + case LoggingLevel::Error: return "Error"; + case LoggingLevel::Warning: return "Warning"; + case LoggingLevel::Info: return "Info"; + case LoggingLevel::Info2: return "Info2"; + case LoggingLevel::Trace: return "Trace"; + default: return ""; } } bool IsLoggingCmd() diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index d180b45711..33a4a49d38 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -42,34 +42,17 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, switch(aDesc.GetType()) { - case miopenHalf: - ss << "reducefp16"; - break; - case miopenFloat: - ss << "reducefp"; - break; - case miopenInt32: - ss << "reduceint"; - break; - case miopenInt8: - ss << "reduceint8"; - break; - case miopenBFloat16: - ss << "reducebfp16"; - break; - case miopenDouble: - ss << "reducefp64"; - break; + case miopenHalf: ss << "reducefp16"; break; + case miopenFloat: ss << "reducefp"; break; + case miopenInt32: ss << "reduceint"; break; + case miopenInt8: ss << "reduceint8"; break; + case miopenBFloat16: ss << "reducebfp16"; break; + case miopenDouble: ss << "reducefp64"; break; #ifdef MIOPEN_BETA_API - case miopenFloat8: - ss << "reducefp8"; - break; - case miopenBFloat8: - ss << "reducebfp8"; - break; + case miopenFloat8: ss << "reducefp8"; break; + case miopenBFloat8: ss << "reducebfp8"; break; #endif - default: - ss << "reduce"; + default: ss << "reduce"; } ss << " -A " << *reinterpret_cast(alpha); diff --git a/src/rnn_api.cpp b/src/rnn_api.cpp index 605d6e7428..3f83c16826 100644 --- a/src/rnn_api.cpp +++ b/src/rnn_api.cpp @@ -540,20 +540,11 @@ static void LogCmdRNN(const miopenTensorDescriptor_t* xDesc, switch(rnnMode) { - case miopenRNNRELU: - mode = "relu"; - break; - case miopenRNNTANH: - mode = "tanh"; - break; - case miopenLSTM: - mode = "lstm"; - break; - case miopenGRU: - mode = "gru"; - break; - default: - mode = ""; + case miopenRNNRELU: mode = "relu"; break; + case miopenRNNTANH: mode = "tanh"; break; + case miopenLSTM: mode = "lstm"; break; + case miopenGRU: mode = "gru"; break; + default: mode = ""; } std::string batch_sz; @@ -613,20 +604,11 @@ static void LogCmdRNN(const miopenSeqTensorDescriptor_t xDesc, switch(rnnMode) { - case miopenRNNRELU: - mode = "relu"; - break; - case miopenRNNTANH: - mode = "tanh"; - break; - case miopenLSTM: - mode = "lstm"; - break; - case miopenGRU: - mode = "gru"; - break; - default: - mode = ""; + case miopenRNNRELU: mode = "relu"; break; + case miopenRNNTANH: mode = "tanh"; break; + case miopenLSTM: mode = "lstm"; break; + case miopenGRU: mode = "gru"; break; + default: mode = ""; } std::string seq_len_array; diff --git a/test/gpu_nchw_nhwc_transpose.cpp b/test/gpu_nchw_nhwc_transpose.cpp index 5f81bc64a6..a8bdbdaaa1 100644 --- a/test/gpu_nchw_nhwc_transpose.cpp +++ b/test/gpu_nchw_nhwc_transpose.cpp @@ -142,16 +142,11 @@ std::string tensor_layout_to_string(tensor_layout_t layout) { switch(layout) { - case miopen_tensor_layout_nchw: - return "NCHW"; - case miopen_tensor_layout_ncdhw: - return "NCDHW"; - case miopen_tensor_layout_nhwc: - return "NHWC"; - case miopen_tensor_layout_ndhwc: - return "NDHWC"; - default: - MIOPEN_THROW("Unsupported tensor layout"); + case miopen_tensor_layout_nchw: return "NCHW"; + case miopen_tensor_layout_ncdhw: return "NCDHW"; + case miopen_tensor_layout_nhwc: return "NHWC"; + case miopen_tensor_layout_ndhwc: return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); } } diff --git a/test/gpu_reference_kernel.cpp b/test/gpu_reference_kernel.cpp index b20a3f5b1c..628529daa0 100644 --- a/test/gpu_reference_kernel.cpp +++ b/test/gpu_reference_kernel.cpp @@ -60,16 +60,11 @@ std::string tensor_layout_to_string(tensor_layout_t layout) { switch(layout) { - case miopen_tensor_layout_nchw: - return "NCHW"; - case miopen_tensor_layout_ncdhw: - return "NCDHW"; - case miopen_tensor_layout_nhwc: - return "NHWC"; - case miopen_tensor_layout_ndhwc: - return "NDHWC"; - default: - MIOPEN_THROW("Unsupported tensor layout"); + case miopen_tensor_layout_nchw: return "NCHW"; + case miopen_tensor_layout_ncdhw: return "NCDHW"; + case miopen_tensor_layout_nhwc: return "NHWC"; + case miopen_tensor_layout_ndhwc: return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); } } diff --git a/test/tensor_reorder.cpp b/test/tensor_reorder.cpp index 2955f5e662..dc1a38f508 100644 --- a/test/tensor_reorder.cpp +++ b/test/tensor_reorder.cpp @@ -140,16 +140,11 @@ std::string tensor_layout_to_string(tensor_layout_t layout) { switch(layout) { - case miopen_tensor_layout_nchw: - return "NCHW"; - case miopen_tensor_layout_ncdhw: - return "NCDHW"; - case miopen_tensor_layout_nhwc: - return "NHWC"; - case miopen_tensor_layout_ndhwc: - return "NDHWC"; - default: - MIOPEN_THROW("Unsupported tensor layout"); + case miopen_tensor_layout_nchw: return "NCHW"; + case miopen_tensor_layout_ncdhw: return "NCDHW"; + case miopen_tensor_layout_nhwc: return "NHWC"; + case miopen_tensor_layout_ndhwc: return "NDHWC"; + default: MIOPEN_THROW("Unsupported tensor layout"); } } From 2bd2fd07e1e0087643f339c014d6ac4b7a099139 Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:11:03 +0200 Subject: [PATCH 4/6] Fix command --- cmake/ClangTidy.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake index c0d71c2535..6a9763a169 100644 --- a/cmake/ClangTidy.cmake +++ b/cmake/ClangTidy.cmake @@ -150,7 +150,7 @@ function(clang_tidy_check TARGET) set(tidy_target tidy-target-${TARGET}-${tidy_file}) add_custom_target(${tidy_target} DEPENDS ${SOURCE} - COMMAND ${CLANG_TIDY_COMMAND} ${SOURCE} "-config-file=${PROJECT_SOURCE_DIR}/.clang-tidy" "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" + COMMAND ${CLANG_TIDY_COMMAND} "-config-file=${PROJECT_SOURCE_DIR}/.clang-tidy" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..." ) From d293e222695b8ace06e14521c6b6024de593be2b Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Mon, 30 Oct 2023 17:34:19 +0100 Subject: [PATCH 5/6] Resolve review comments --- src/reducetensor_api.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/reducetensor_api.cpp b/src/reducetensor_api.cpp index 33a4a49d38..39efcf9eca 100644 --- a/src/reducetensor_api.cpp +++ b/src/reducetensor_api.cpp @@ -48,10 +48,8 @@ static void LogCmdRedux(const miopen::ReduceTensorDescriptor reduceTensorDesc, case miopenInt8: ss << "reduceint8"; break; case miopenBFloat16: ss << "reducebfp16"; break; case miopenDouble: ss << "reducefp64"; break; -#ifdef MIOPEN_BETA_API case miopenFloat8: ss << "reducefp8"; break; case miopenBFloat8: ss << "reducebfp8"; break; -#endif default: ss << "reduce"; } From a0dd296739c00ad9233c96213da8f485875d0cbb Mon Sep 17 00:00:00 2001 From: Evgenii Averin <86725875+averinevg@users.noreply.github.com> Date: Wed, 1 Nov 2023 14:23:29 +0100 Subject: [PATCH 6/6] Resolve review comments --- .clang-tidy | 175 ++++++++++++++++++++++++++++++++++++++++++ CMakeLists.txt | 139 +-------------------------------- cmake/ClangTidy.cmake | 2 - 3 files changed, 176 insertions(+), 140 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index e0e491d787..37a8aa66a1 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,3 +1,178 @@ +# Some checks are suppressed: +# +# This check is useless for us. Many objects (like tensors or problem descriptions) +# have mutiple parameters of the same type +# -bugprone-easily-swappable-parameters +# +# Too many narrowing conversions in our code +# -bugprone-narrowing-conversions +# +# We shouldn't be using rand() +# -cert-msc30-c +# +# We really shouldn't use bitwise operators with signed integers, but opencl leaves us no choice +# -hicpp-signed-bitwise +# +# This one is extremely slow, and probably has lots of FPs +# -misc-confusable-identifiers +# +# TODO We are not ready to use it, but very useful +# -readability-function-cognitive-complexity +# +# We dont think this is a useful check. Disabled on migraphx +# -readability-identifier-length +# +# There are many FPs with this, let's disable it (ditto in MIGraphX) +# -readability-suspicious-call-argument +# +# TODO Code Quality WORKAROUND ROCm 5.1 update +# -cert-err33-c +# -google-readability-casting +# -hicpp-use-emplace +# -modernize-use-emplace +# -performance-unnecessary-copy-initialization +# -readability-container-data-pointer +# +# TODO Code Quality WORKAROUND ROCm 5.3 && Ubuntu 22.04 && C++17 && cppcheck 2.9 update +# -bugprone-use-after-move +# -clang-analyzer-cplusplus.NewDeleteLeaks +# -hicpp-deprecated-headers +# -hicpp-invalid-access-moved +# -hicpp-member-init +# -modernize-concat-nested-namespaces +# -modernize-deprecated-headers +# -modernize-macro-to-enum +# -modernize-unary-static-assert +# -modernize-use-nodiscard +# -performance-no-automatic-move +# -readability-redundant-declaration +# -readability-simplify-boolean-expr +# +# TODO Code Quality WORKAROUND ROCm 5.4.2 +# -misc-const-correctness +# +# TODO Code Quality WORKAROUND ROCm 5.6 +# -cppcoreguidelines-avoid-const-or-ref-data-members +# -cppcoreguidelines-avoid-do-while +# -misc-use-anonymous-namespace +# +# TODO Code Quality WORKAROUND ROCm 5.7 +# -bugprone-lambda-function-name +# -cppcoreguidelines-avoid-capture-default-when-capturing-this +# -cppcoreguidelines-rvalue-reference-param-not-moved +# -llvmlibc-inline-function-decl +# -readability-avoid-unconditional-preprocessor-if +# +Checks: >- + *, + -abseil-*, + -altera-*, + -android-cloexec-fopen, + -bugprone-easily-swappable-parameters, + -bugprone-exception-escape, + -bugprone-lambda-function-name, + -bugprone-macro-parentheses, + -bugprone-narrowing-conversions, + -bugprone-use-after-move, + -cert-env33-c, + -cert-err33-c, + -cert-msc30-c, + -cert-msc32-c, + -cert-msc50-cpp, + -cert-msc51-cpp, + -clang-analyzer-alpha.core.CastToStruct, + -clang-analyzer-cplusplus.NewDeleteLeaks, + -clang-analyzer-optin.performance.Padding, + -clang-diagnostic-extern-c-compat, + -clang-diagnostic-unused-command-line-argument, + -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-capture-default-when-capturing-this, + -cppcoreguidelines-avoid-const-or-ref-data-members, + -cppcoreguidelines-avoid-do-while, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-explicit-virtual-functions, + -cppcoreguidelines-init-variables, + -cppcoreguidelines-macro-usage, + -cppcoreguidelines-narrowing-conversions, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-prefer-member-initializer, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-constant-array-index, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-member-init, + -cppcoreguidelines-pro-type-reinterpret-cast, + -cppcoreguidelines-pro-type-union-access, + -cppcoreguidelines-pro-type-vararg, + -cppcoreguidelines-rvalue-reference-param-not-moved, + -cppcoreguidelines-special-member-functions, + -fuchsia-*, + -google-explicit-constructor, + -google-readability-casting, + -google-readability-todo, + -google-runtime-int, + -google-runtime-references, + -hicpp-avoid-c-arrays, + -hicpp-deprecated-headers, + -hicpp-explicit-conversions, + -hicpp-invalid-access-moved, + -hicpp-member-init, + -hicpp-named-parameter, + -hicpp-no-array-decay, + -hicpp-signed-bitwise, + -hicpp-special-member-functions, + -hicpp-uppercase-literal-suffix, + -hicpp-use-auto, + -hicpp-use-emplace, + -hicpp-use-equals-default, + -hicpp-use-override, + -hicpp-vararg, + -llvm-else-after-return, + -llvm-header-guard, + -llvm-include-order, + -llvmlibc-callee-namespace, + -llvmlibc-implementation-in-namespace, + -llvmlibc-inline-function-decl, + -llvmlibc-restrict-system-libc-headers, + -llvm-qualified-auto, + -misc-confusable-identifiers, + -misc-const-correctness, + -misc-misplaced-const, + -misc-non-private-member-variables-in-classes, + -misc-no-recursion, + -misc-use-anonymous-namespace, + -modernize-avoid-bind, + -modernize-avoid-c-arrays, + -modernize-deprecated-headers, + -modernize-macro-to-enum, + -modernize-pass-by-value, + -modernize-use-auto, + -modernize-use-default-member-init, + -modernize-use-emplace, + -modernize-use-equals-default, + -modernize-use-trailing-return-type, + -modernize-use-transparent-functors, + -modernize-use-nodiscard, + -modernize-concat-nested-namespaces, + -modernize-unary-static-assert, + -performance-no-automatic-move, + -performance-unnecessary-copy-initialization, + -performance-unnecessary-value-param, + -readability-avoid-unconditional-preprocessor-if, + -readability-container-data-pointer, + -readability-convert-member-functions-to-static, + -readability-else-after-return, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-isolate-declaration, + -readability-magic-numbers, + -readability-named-parameter, + -readability-qualified-auto, + -readability-redundant-declaration, + -readability-redundant-string-init, + -readability-simplify-boolean-expr, + -readability-suspicious-call-argument, + -readability-uppercase-literal-suffix, + CheckOptions: - key: google-readability-braces-around-statements.ShortStatementLines value: '6' diff --git a/CMakeLists.txt b/CMakeLists.txt index f972c948d7..6dc21730a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -543,156 +543,19 @@ if(CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+") # Enable tidy on hip elseif(MIOPEN_BACKEND STREQUAL "HIP" OR MIOPEN_BACKEND STREQUAL "HIPNOGPU") set(MIOPEN_TIDY_ERRORS ALL) - endif() include(ClangTidy) enable_clang_tidy( CHECKS - * - -abseil-* - -altera-* - -android-cloexec-fopen - # This check is useless for us. Many objects (like tensors or problem descriptions) - # naturally have mutiple parameters of the same type. - -bugprone-easily-swappable-parameters - -bugprone-exception-escape - -bugprone-macro-parentheses - # too many narrowing conversions in our code - -bugprone-narrowing-conversions - -cert-env33-c - # Yea we shouldn't be using rand() - -cert-msc30-c - -cert-msc32-c - -cert-msc50-cpp - -cert-msc51-cpp - -clang-analyzer-alpha.core.CastToStruct - -clang-analyzer-optin.performance.Padding - -clang-diagnostic-extern-c-compat - -clang-diagnostic-unused-command-line-argument - -cppcoreguidelines-avoid-c-arrays - -cppcoreguidelines-avoid-magic-numbers - -cppcoreguidelines-explicit-virtual-functions - -cppcoreguidelines-init-variables - -cppcoreguidelines-macro-usage - -cppcoreguidelines-narrowing-conversions - -cppcoreguidelines-non-private-member-variables-in-classes - -cppcoreguidelines-prefer-member-initializer - -cppcoreguidelines-pro-bounds-array-to-pointer-decay - -cppcoreguidelines-pro-bounds-constant-array-index - -cppcoreguidelines-pro-bounds-pointer-arithmetic - -cppcoreguidelines-pro-type-member-init - -cppcoreguidelines-pro-type-reinterpret-cast - -cppcoreguidelines-pro-type-union-access - -cppcoreguidelines-pro-type-vararg - -cppcoreguidelines-special-member-functions - -fuchsia-* - -google-explicit-constructor - -google-readability-todo - -google-runtime-int - -google-runtime-references - -hicpp-avoid-c-arrays - -hicpp-explicit-conversions - -hicpp-named-parameter - -hicpp-no-array-decay - # We really shouldn't use bitwise operators with signed integers, but opencl leaves us no choice - -hicpp-signed-bitwise - -hicpp-special-member-functions - -hicpp-uppercase-literal-suffix - -hicpp-use-auto - -hicpp-use-equals-default - -hicpp-use-override - -hicpp-vararg - -llvm-else-after-return - -llvm-header-guard - -llvm-include-order - -llvmlibc-callee-namespace - -llvmlibc-implementation-in-namespace - -llvmlibc-restrict-system-libc-headers - -llvm-qualified-auto - # This one is extremely slow, and probably has lots of FPs. - -misc-confusable-identifiers - -misc-misplaced-const - -misc-non-private-member-variables-in-classes - -misc-no-recursion - -modernize-avoid-bind - -modernize-avoid-c-arrays - -modernize-pass-by-value - -modernize-use-auto - -modernize-use-default-member-init - -modernize-use-equals-default - -modernize-use-trailing-return-type - -modernize-use-transparent-functors - -modernize-use-nodiscard - -modernize-concat-nested-namespaces - -modernize-unary-static-assert - -performance-unnecessary-value-param - -readability-convert-member-functions-to-static - -readability-else-after-return - # TODO We are not ready to use it, but very useful. - -readability-function-cognitive-complexity - # We dont think this is a useful check. Disabled on migraphx. - -readability-identifier-length - -readability-isolate-declaration - -readability-magic-numbers - -readability-named-parameter - -readability-qualified-auto - -readability-redundant-string-init - # There are many FPs with this, let's disable it (ditto in MIGraphX) - -readability-suspicious-call-argument - -readability-uppercase-literal-suffix - ################################################################### - # TODO Code Quality WORKAROUND ROCm 5.1 update - ################################################################### - -cert-err33-c - -google-readability-casting - -hicpp-use-emplace - -modernize-use-emplace - -performance-unnecessary-copy-initialization - -readability-container-data-pointer - ################################################################### - # TODO Code Quality WORKAROUND ROCm 5.3 && - # Ubuntu 22.04 && C++17 && cppcheck 2.9 update - ################################################################### - -bugprone-use-after-move - -hicpp-invalid-access-moved - -modernize-use-nodiscard - -modernize-unary-static-assert - -modernize-macro-to-enum - -modernize-concat-nested-namespaces - -readability-redundant-declaration - -readability-simplify-boolean-expr - -hicpp-deprecated-headers - -hicpp-member-init - -performance-no-automatic-move - -clang-analyzer-cplusplus.NewDeleteLeaks - -modernize-deprecated-headers - ################################################################### - # TODO Code Quality WORKAROUND ROCm 5.4.2 - ################################################################### - -misc-const-correctness - ################################################################### - # TODO Code Quality WORKAROUND ROCm 5.6 - ################################################################### - -cppcoreguidelines-avoid-const-or-ref-data-members - -cppcoreguidelines-avoid-do-while - -misc-use-anonymous-namespace - ################################################################### - # TODO Code Quality WORKAROUND ROCm 5.7 - ################################################################### - -llvmlibc-inline-function-decl - -cppcoreguidelines-avoid-capture-default-when-capturing-this - -cppcoreguidelines-rvalue-reference-param-not-moved - -readability-avoid-unconditional-preprocessor-if - -bugprone-lambda-function-name ${MIOPEN_TIDY_CHECKS} ${MIOPEN_TIDY_ERRORS} HEADER_FILTER "\.hpp$" EXTRA_ARGS -DMIOPEN_USE_CLANG_TIDY - ) + include(CppCheck) enable_cppcheck( CHECKS diff --git a/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake index 6a9763a169..6e8ecf4409 100644 --- a/cmake/ClangTidy.cmake +++ b/cmake/ClangTidy.cmake @@ -93,8 +93,6 @@ macro(enable_clang_tidy) set(CLANG_TIDY_ALL ALL) endif() - message(STATUS "Clang tidy checks: ${CLANG_TIDY_CHECKS}") - if (${PARSE_ANALYZE_TEMPORARY_DTORS}) set(CLANG_TIDY_ANALYZE_TEMPORARY_DTORS "-analyze-temporary-dtors") endif()