diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index f04b204bd5..1928a31e52 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -346,6 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor); */ MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor); +/*! @ingroup softmax + * @brief Creates the miopenSoftmaxDescriptor_t type + */ +MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor); + /*! @ingroup tensor * @enum miopenDataType_t * MIOpen floating point datatypes. Both 32-bit and 16-bit floats are supported in MIOpen. @@ -5313,7 +5318,12 @@ typedef enum miopenTensorBiasX = 8, miopenTensorBiasY = 9, miopenTensorBias = 10, + miopenTensorSoftmaxX = 11, + miopenTensorSoftmaxY = 12, + miopenTensorSoftmaxDX = 13, + miopenTensorSoftmaxDY = 14, #endif + } miopenTensorArgumentId_t; /*! @enum miopenTensorArgumentId_t @@ -5336,6 +5346,48 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem, miopenConvolutionDescriptor_t operatorDesc, miopenProblemDirection_t direction); +/*! @brief Creates the Softmax descriptor object + * + * @param softmaxDesc Pointer to an softmax descriptor type + * @return miopenStatus_t + */ + +MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc); + +/*! @brief Sets the softmax descriptor details + * + * Sets all of the descriptor details for the softmax layer + * + * @param softmaxDesc Pointer to a softmax layer descriptor + * @param alpha Softmax alpha parameter + * @param beta Softmax beta parameter + * @param algorithm Softmax algorithm + * @param mode Softmax mode + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc, + float alpha, + float beta, + miopenSoftmaxAlgorithm_t algorithm, + miopenSoftmaxMode_t mode); + +/*! @brief Gets the softmax layer descriptor details + * + * Retrieves all of the descriptor details for the softmax layer. + * + * @param softmaxDesc Pointer to a softmax layer descriptor (input) + * @param alpha Softmax alpha parameter (output) + * @param beta Softmax beta parameter (output) + * @param algorithm Softmax algorithm (output) + * @param mode Softmax mode (output) + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc, + float* alpha, + float* beta, + miopenSoftmaxAlgorithm_t* algorithm, + miopenSoftmaxMode_t* mode); + /*! @brief Destroys a problem object. * * @param problem Problem to destroy @@ -5596,6 +5648,18 @@ MIOPEN_EXPORT miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopen MIOPEN_EXPORT miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem, miopenProblemDirection_t direction); +/*! @brief Initializes a problem object describing a softmax operation. + * + * @param problem Pointer to the problem to initialize + * @param operatorDesc Descriptor of the operator to be used + * @param direction Direction of the operation + * @return miopenStatus_t + */ + +MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem, + miopenSoftmaxDescriptor_t operatorDesc, + miopenProblemDirection_t direction); + #endif /** @} */ diff --git a/src/api/find2_0_commons.cpp b/src/api/find2_0_commons.cpp index ce203197a5..e9bd5e51f2 100644 --- a/src/api/find2_0_commons.cpp +++ b/src/api/find2_0_commons.cpp @@ -89,6 +89,14 @@ miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem, miopenProblemDi }); } +miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem, + miopenSoftmaxDescriptor_t operatorDesc, + miopenProblemDirection_t direction) +{ + MIOPEN_LOG_FUNCTION(problem, direction); + return MakeProblem(problem, operatorDesc, direction); +} + miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2) { MIOPEN_LOG_FUNCTION(problem1, problem2); @@ -263,6 +271,11 @@ inline std::ostream& operator<<(std::ostream& stream, const miopenTensorArgument case miopenTensorBias: stream << "Bias"; break; case miopenTensorBiasX: stream << "BiasX"; break; case miopenTensorBiasY: stream << "BiasY"; break; + case miopenTensorSoftmaxX: stream << "SoftmaxX"; break; + case miopenTensorSoftmaxY: stream << "SoftmaxY"; break; + case miopenTensorSoftmaxDX: stream << "SoftmaxDX"; break; + case miopenTensorSoftmaxDY: stream << "SoftmaxDY"; break; + case miopenTensorArgumentIdInvalid: stream << "Invalid"; break; } diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index 2900008785..d95ea96cc9 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -59,13 +60,17 @@ namespace conv { struct ProblemDescription; } // namespace conv +namespace softmax { +struct ProblemDescription; +} // namespace softmax + struct BiasDescriptor { }; // The order of types is important for deserialization and should be preserved between releases. using OperatorDescriptor = - boost::variant; + boost::variant; struct Problem { @@ -99,6 +104,7 @@ struct Problem conv::ProblemDescription AsConvolution() const; activ::ProblemDescription AsActivation() const; + softmax::ProblemDescription AsSoftmax() const; [[nodiscard]] miopenTensorArgumentId_t GetInputId() const; [[nodiscard]] miopenTensorArgumentId_t GetOutputId() const; @@ -155,6 +161,12 @@ struct Problem const Buffers& buffers, const ConvolutionDescriptor& conv_desc) const; + std::vector FindSolutionsImpl(Handle& handle, + const FindOptions& options, + std::size_t max_solutions, + const Buffers& buffers, + const SoftmaxDescriptor& softmax_desc) const; + void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const; void LogDriverCommand(const ActivationDescriptor& descriptor) const; }; diff --git a/src/include/miopen/softmax.hpp b/src/include/miopen/softmax.hpp index b4f8909908..0ed0f300be 100644 --- a/src/include/miopen/softmax.hpp +++ b/src/include/miopen/softmax.hpp @@ -28,12 +28,47 @@ #include #include +#include + +#include namespace miopen { struct Handle; struct TensorDescriptor; +struct SoftmaxDescriptor : miopenSoftmaxDescriptor +{ + SoftmaxDescriptor() {} + + float GetAlpha() const { return alpha; } + float GetBeta() const { return beta; } + miopenSoftmaxAlgorithm_t GetAlgorithm() const { return algorithm; } + miopenSoftmaxMode_t GetMode() const { return mode; } + + void SetParams(float alpha_, + float beta_, + miopenSoftmaxAlgorithm_t algorithm_, + miopenSoftmaxMode_t mode_) + { + alpha = alpha_; + beta = beta_; + algorithm = algorithm_; + mode = mode_; + } + + friend std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x); + + friend void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor); + friend void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor); + +private: + float alpha; + float beta; + miopenSoftmaxAlgorithm_t algorithm; + miopenSoftmaxMode_t mode; +}; + miopenStatus_t SoftmaxForward(Handle& handle, const void* alpha, const void* beta, @@ -62,4 +97,7 @@ miopenStatus_t SoftmaxBackward(Handle& handle, int dx_offset = 0); } // namespace miopen + +MIOPEN_DEFINE_OBJECT(miopenSoftmaxDescriptor, miopen::SoftmaxDescriptor); + #endif // _MIOPEN_SOFTMAX_HPP_ diff --git a/src/include/miopen/softmax/invoke_params.hpp b/src/include/miopen/softmax/invoke_params.hpp index c2792929ab..f2d6240e98 100644 --- a/src/include/miopen/softmax/invoke_params.hpp +++ b/src/include/miopen/softmax/invoke_params.hpp @@ -74,9 +74,9 @@ struct InvokeParams : public miopen::InvokeParams Data_t dx_, miopenSoftmaxAlgorithm_t algorithm_, miopenSoftmaxMode_t mode_, - int y_offset_, - int dy_offset_, - int dx_offset_) + int y_offset_ = 0, + int dy_offset_ = 0, + int dx_offset_ = 0) : algorithm(algorithm_), mode(mode_), diff --git a/src/include/miopen/solution.hpp b/src/include/miopen/solution.hpp index 4fab925bf2..4f3643b51b 100644 --- a/src/include/miopen/solution.hpp +++ b/src/include/miopen/solution.hpp @@ -111,6 +111,12 @@ struct Solution : miopenSolution std::size_t workspace_size, const ConvolutionDescriptor& conv_desc); + void RunImpl(Handle& handle, + const std::unordered_map& inputs, + Data_t /*workspace*/, + std::size_t /*workspace_size*/, + const SoftmaxDescriptor& softmax_desc); + void RunImpl(Handle& handle, const std::unordered_map& inputs, Data_t workspace, diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp index dbf086030e..09e16be6a1 100644 --- a/src/include/miopen/solver_id.hpp +++ b/src/include/miopen/solver_id.hpp @@ -54,7 +54,8 @@ enum class Primitive Pooling, Normalization, Reduce, - Cat + Cat, + Softmax }; struct MIOPEN_EXPORT Id diff --git a/src/problem.cpp b/src/problem.cpp index 2ea839391c..0b8dbe478f 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include #include @@ -175,6 +177,9 @@ Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t m [&](const ConvolutionDescriptor& op_desc) { return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc); }, + [&](const SoftmaxDescriptor& op_desc) { + return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc); + }, [&](const ActivationDescriptor& /*op_desc*/) -> std::vector { MIOPEN_THROW(miopenStatusNotImplemented); }, @@ -277,6 +282,33 @@ activ::ProblemDescription Problem::AsActivation() const } } +softmax::ProblemDescription Problem::AsSoftmax() const +{ + const auto& softmax_desc = boost::get(operator_descriptor); + + float alpha = softmax_desc.GetAlpha(); + float beta = softmax_desc.GetBeta(); + + softmax::ProblemDescription problem_description = + (GetDirection() == miopenProblemDirectionForward) + ? softmax::ProblemDescription( + &alpha, + &beta, + GetTensorDescriptorChecked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX"), + GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"), + softmax_desc.GetAlgorithm(), + softmax_desc.GetMode()) + : softmax::ProblemDescription( + &alpha, + &beta, + GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"), + GetTensorDescriptorChecked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY"), + GetTensorDescriptorChecked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX"), + softmax_desc.GetAlgorithm(), + softmax_desc.GetMode()); + return problem_description; +} + std::vector Problem::FindSolutionsImpl(Handle& handle, const FindOptions& options, std::size_t max_solutions, @@ -431,6 +463,61 @@ std::vector Problem::FindSolutionsImpl(Handle& handle, return ret; } +std::vector +Problem::FindSolutionsImpl(Handle& handle, + [[maybe_unused]] const FindOptions& options, + std::size_t max_solutions, + [[maybe_unused]] const Buffers& buffers, + [[maybe_unused]] const SoftmaxDescriptor& softmax_desc) const +{ + auto ret = std::vector(); + + auto ctx = ExecutionContext{&handle}; + + const softmax::ProblemDescription problem_description = AsSoftmax(); + + const auto algo = AlgorithmName{"Softmax"}; + + static solver::softmax::AttnSoftmax attnSoftmaxSolver; + static solver::softmax::Softmax regularSoftmaxSolver; + + std::vector solvers; + + solvers.push_back(&attnSoftmaxSolver); + solvers.push_back(®ularSoftmaxSolver); + + for(auto solver : solvers) + { + if(!solver->IsApplicable(ctx, problem_description)) + { + MIOPEN_LOG_I2(solver->SolverDbId() << ": Not applicable"); + continue; + } + + auto solution = Solution(); + + /// \todo time measurement will be done later. For now we set less time for attention + /// softmax and slightly bigger for regular + solution.SetTime(solver == &attnSoftmaxSolver ? 1.0f : 2.0f); + solution.SetWorkspaceSize(solver->GetWorkspaceSize(ctx, problem_description)); + solution.SetSolver(solver->SolverDbId()); + solution.SetProblem({*this}); + + MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , " + << solution.GetWorkspaceSize() << ", " + << solution.GetTime()); + + ret.emplace_back(std::move(solution)); + + if(ret.size() >= max_solutions) + { + break; + } + } + + return ret; +} + void Problem::ValidateGroupCount(const TensorDescriptor& xDesc, const TensorDescriptor& wDesc, const ConvolutionDescriptor& conv) @@ -456,7 +543,8 @@ void Problem::LogDriverCommand() const const auto log_function = boost::hof::match([&](const ConvolutionDescriptor& op_desc) { LogDriverCommand(op_desc); }, [&](const ActivationDescriptor& op_desc) { LogDriverCommand(op_desc); }, - [&](const BiasDescriptor&) {}); + [&](const BiasDescriptor&) {}, + [&](const SoftmaxDescriptor&) {}); boost::apply_visitor(log_function, operator_descriptor); } @@ -576,6 +664,7 @@ void Problem::CalculateOutput() [&](const ActivationDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }, + [&](const SoftmaxDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }, [&](const BiasDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }), operator_descriptor); } @@ -585,7 +674,8 @@ miopenTensorArgumentId_t Problem::GetInputId() const return boost::apply_visitor( boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionX; }, [](const ActivationDescriptor&) { return miopenTensorActivationX; }, - [](const BiasDescriptor&) { return miopenTensorBiasX; }), + [](const BiasDescriptor&) { return miopenTensorBiasX; }, + [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxX; }), operator_descriptor); } @@ -594,7 +684,8 @@ miopenTensorArgumentId_t Problem::GetOutputId() const return boost::apply_visitor( boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionY; }, [](const ActivationDescriptor&) { return miopenTensorActivationY; }, - [](const BiasDescriptor&) { return miopenTensorBiasY; }), + [](const BiasDescriptor&) { return miopenTensorBiasY; }, + [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxY; }), operator_descriptor); } @@ -679,7 +770,14 @@ void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& p [&](const BiasDescriptor&) { plan.AddOp(std::make_shared( problem.GetTensorDescriptorChecked(miopenTensorBias, "miopenTensorBias"))); + }, + [&](const SoftmaxDescriptor&) { + // Not implemented + assert(false); + MIOPEN_THROW(miopenStatusNotImplemented, + "Softmax is not implemented for FusedProblem"); }), + problem.operator_descriptor); } @@ -741,7 +839,14 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams( const auto bias_ptr = buffers.at(miopenTensorBias); operator_args.params.emplace_back( std::make_unique(bias_ptr)); + }, + [&](const SoftmaxDescriptor&) { + // Not implemented + assert(false); + MIOPEN_THROW(miopenStatusNotImplemented, + "Softmax is not implemented for FusedProblem"); }), + problem.operator_descriptor); } diff --git a/src/softmax.cpp b/src/softmax.cpp index 04e81fc7de..38b233e2e6 100644 --- a/src/softmax.cpp +++ b/src/softmax.cpp @@ -33,8 +33,73 @@ #include #include +#include + namespace miopen { +extern "C" miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc) +{ + MIOPEN_LOG_FUNCTION(softmaxDesc); + return miopen::try_([&] { + auto& desc = miopen::deref(softmaxDesc); + desc = new miopen::SoftmaxDescriptor(); + }); +} + +extern "C" miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc, + float alpha, + float beta, + miopenSoftmaxAlgorithm_t algorithm, + miopenSoftmaxMode_t mode) +{ + + MIOPEN_LOG_FUNCTION(softmaxDesc, alpha, beta, algorithm, mode); + return miopen::try_( + [&] { miopen::deref(softmaxDesc).SetParams(alpha, beta, algorithm, mode); }); +} + +extern "C" miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc, + float* alpha, + float* beta, + miopenSoftmaxAlgorithm_t* algorithm, + miopenSoftmaxMode_t* mode) +{ + MIOPEN_LOG_FUNCTION(softmaxDesc); + return miopen::try_([&] { + *alpha = miopen::deref(softmaxDesc).GetAlpha(); + *beta = miopen::deref(softmaxDesc).GetBeta(); + *algorithm = miopen::deref(softmaxDesc).GetAlgorithm(); + *mode = miopen::deref(softmaxDesc).GetMode(); + }); +} + +std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x) +{ + stream << "softmax," + << "alpha" << x.GetAlpha() << ",beta" << x.GetBeta() << ",algorithm" << x.GetAlgorithm() + << ",mode" << x.GetMode() << ","; + + return stream; +} + +void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor) +{ + json = nlohmann::json{ + {"alpha", descriptor.GetAlpha()}, + {"beta", descriptor.GetBeta()}, + {"algorithm", descriptor.GetAlgorithm()}, + {"mode", descriptor.GetMode()}, + }; +} + +void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor) +{ + json.at("alpha").get_to(descriptor.alpha); + json.at("beta").get_to(descriptor.beta); + json.at("algorithm").get_to(descriptor.algorithm); + json.at("mode").get_to(descriptor.mode); +} + miopenStatus_t SoftmaxForward(Handle& handle, const void* alpha, const void* beta, diff --git a/src/solution.cpp b/src/solution.cpp index e146191639..646d34d484 100644 --- a/src/solution.cpp +++ b/src/solution.cpp @@ -31,6 +31,10 @@ #include #include +#include +#include +#include + #include #include @@ -70,6 +74,9 @@ void Solution::Run(Handle& handle, [&](const ConvolutionDescriptor& op_desc) { RunImpl(handle, inputs, workspace, workspace_size, op_desc); }, + [&](const SoftmaxDescriptor& op_desc) { + RunImpl(handle, inputs, workspace, workspace_size, op_desc); + }, [&](const ActivationDescriptor& /*op_desc*/) { MIOPEN_THROW(miopenStatusNotImplemented); }, @@ -113,8 +120,10 @@ void Solution::LogDriverCommand(const ActivationDescriptor& desc) const void Solution::LogDriverCommand(const Problem& problem_) const { boost::apply_visitor( - boost::hof::match([&](const BiasDescriptor&) { /* \todo: think on how to log bias */ }, - [&](const auto& op_desc) { LogDriverCommand(op_desc); }), + boost::hof::match( + [&](const BiasDescriptor&) { /* \todo: think on how to log bias */ }, + [&](const SoftmaxDescriptor&) { /* \todo: think on how to log softmax */ }, + [&](const auto& op_desc) { LogDriverCommand(op_desc); }), problem_.GetOperatorDescriptor()); } @@ -232,6 +241,91 @@ void Solution::RunImpl(Handle& handle, checkNumericsOutput_(); } +void Solution::RunImpl(Handle& handle, + const std::unordered_map& inputs, + Data_t /*workspace*/, + std::size_t /*workspace_size*/, + const SoftmaxDescriptor& softmax_desc) +{ + + const auto& problem_casted = boost::get(problem.item); + + const auto get_input_checked = [&](auto name, const std::string& name_str) { + const auto& found = inputs.find(name); + if(found == inputs.end()) + { + MIOPEN_THROW(miopenStatusInvalidValue, + "Problem is missing " + name_str + " tensor descriptor."); + } + auto ret = found->second; + if(!ret.descriptor.has_value()) + ret.descriptor = problem_casted.GetTensorDescriptorChecked(name, name_str); + return ret; + }; + + const softmax::ProblemDescription problem_description = problem_casted.AsSoftmax(); + + float alpha = softmax_desc.GetAlpha(); + float beta = softmax_desc.GetBeta(); + miopenSoftmaxAlgorithm_t algorithm = softmax_desc.GetAlgorithm(); + miopenSoftmaxMode_t mode = softmax_desc.GetMode(); + + const auto invoke_ctx = [&]() -> AnyInvokeParams { + switch(problem_casted.GetDirection()) + { + case miopenProblemDirectionForward: { + auto x = get_input_checked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX"); + auto y = get_input_checked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"); + + return softmax::InvokeParams( + &alpha, &beta, *x.descriptor, x.buffer, *y.descriptor, y.buffer, algorithm, mode); + } + case miopenProblemDirectionBackward: { + auto y = get_input_checked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"); + auto dy = get_input_checked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY"); + auto dx = get_input_checked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX"); + + return softmax::InvokeParams(&alpha, + &beta, + *y.descriptor, + y.buffer, + *dy.descriptor, + dy.buffer, + *dx.descriptor, + dx.buffer, + algorithm, + mode); + } + + default: MIOPEN_THROW(miopenStatusNotImplemented); + } + }(); + + const auto net_cfg = problem_description.MakeNetworkConfig(); + const auto found_invoker = handle.GetInvoker(net_cfg, GetSolver()); + + if(found_invoker) + { + (*found_invoker)(handle, invoke_ctx); + } + else + { + auto ctx = ExecutionContext{&handle}; + + solver::softmax::Softmax regularSoftmax; + solver::softmax::AttnSoftmax attnSoftmax; + + const auto softmax_solution = GetSolver().ToString() == regularSoftmax.SolverDbId() + ? regularSoftmax.GetSolution(ctx, problem_description) + : attnSoftmax.GetSolution(ctx, problem_description); + + decltype(auto) invoker = handle.PrepareInvoker(*softmax_solution.invoker_factory, + softmax_solution.construction_params); + handle.RegisterInvoker(invoker, net_cfg, GetSolver().ToString()); + invoker(handle, invoke_ctx); + } +} + void Solution::RunImpl(Handle& handle, const std::unordered_map& inputs, Data_t /*workspace*/, diff --git a/src/solver.cpp b/src/solver.cpp index e4800fbd2d..a3a17bf1d3 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -642,6 +643,10 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) ++id, conv::ConvHipImplicitGemmGroupWrwXdlops{}, miopenConvolutionAlgoImplicitGEMM); + + Register(registry, ++id, Primitive::Softmax, softmax::Softmax{}.SolverDbId()); + Register(registry, ++id, Primitive::Softmax, softmax::AttnSoftmax{}.SolverDbId()); + // IMPORTANT: New solvers should be added to the end of the function! } diff --git a/test/gtest/softmax_find20.cpp b/test/gtest/softmax_find20.cpp new file mode 100644 index 0000000000..c88afd73cf --- /dev/null +++ b/test/gtest/softmax_find20.cpp @@ -0,0 +1,327 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "test.hpp" +#include "get_handle.hpp" +#include "tensor_holder.hpp" +#include "verify.hpp" + +#include + +#include + +#include + +#include + +#include + +using namespace miopen; + +class SoftmaxFind20Test +{ +public: + SoftmaxFind20Test(bool forward) : problem(nullptr), isForward(forward) { Initialize(); } + + void AddTensorDescriptors() + { + std::cerr << "Creating softmax tensor descriptors..." << std::endl; + + auto test_set_tensor_descriptor = [this](miopenTensorArgumentId_t name, + TensorDescriptor& desc) { + EXPECT_EQUAL(miopenSetProblemTensorDescriptor(problem, name, &desc), + miopenStatusSuccess); + }; + + if(isForward) + { + test_set_tensor_descriptor(miopenTensorSoftmaxX, xTensor.desc); + test_set_tensor_descriptor(miopenTensorSoftmaxY, yTensor.desc); + } + else + { + test_set_tensor_descriptor(miopenTensorSoftmaxY, yTensor.desc); + test_set_tensor_descriptor(miopenTensorSoftmaxDY, dyTensor.desc); + test_set_tensor_descriptor(miopenTensorSoftmaxDX, dxTensor.desc); + } + + std::cerr << "Created softmax tensor descriptors." << std::endl; + } + + std::vector TestFindSolutions(Handle& handle) + { + std::cerr << "Testing miopenFindSolutions..." << std::endl; + + auto solutions = std::vector{}; + std::size_t found; + + // We expect to get only 1 or 2 solutions for softmax for now. Hardcode value 16 as just big + // enough value + solutions.resize(16); + + EXPECT_EQUAL(miopenFindSolutions( + &handle, problem, nullptr, solutions.data(), &found, solutions.size()), + miopenStatusSuccess); + EXPECT_TRUE(found > 0); + + solutions.resize(found); + + std::cerr << "Finished testing miopenFindSolutions." << std::endl; + return solutions; + } + + void TestSolutionAttributes(const std::vector& solutions) + { + std::cerr << "Testing miopenGetSolution..." << std::endl; + + for(const auto& solution : solutions) + { + float time; + std::size_t workspace_size; + uint64_t solver_id; + + EXPECT_EQUAL(miopenGetSolutionTime(solution, &time), miopenStatusSuccess); + EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size), + miopenStatusSuccess); + EXPECT_EQUAL(miopenGetSolutionSolverId(solution, &solver_id), miopenStatusSuccess); + } + + std::cerr << "Finished testing miopenGetSolution." << std::endl; + } + + void TestRunSolutionsForward(Handle& handle, const std::vector& solutions) + { + std::cerr << "Testing solution functions..." << std::endl; + + miopenTensorDescriptor_t x_desc = &xTensor.desc, y_desc = &yTensor.desc; + + const unsigned int numTensors = 2; + + for(const auto& solution : solutions) + { + auto arguments = std::make_unique(numTensors); + + auto in_gpu = handle.Write(xTensor.data); + auto out_gpu = handle.Write(yTensor.data); + + miopenTensorArgumentId_t names[numTensors] = {miopenTensorSoftmaxX, + miopenTensorSoftmaxY}; + void* buffers[numTensors] = {in_gpu.get(), out_gpu.get()}; + miopenTensorDescriptor_t descriptors[numTensors] = {x_desc, y_desc}; + + for(auto i = 0; i < numTensors; ++i) + { + arguments[i].id = names[i]; + arguments[i].descriptor = &descriptors[i]; + arguments[i].buffer = buffers[i]; + } + + std::cerr << "Run a solution." << std::endl; + EXPECT_EQUAL( + miopenRunSolution(&handle, solution, numTensors, arguments.get(), nullptr, 0), + miopenStatusSuccess); + + float alpha = softmax_descriptor.GetAlpha(); + float beta = softmax_descriptor.GetBeta(); + + // tensor yTensorDup = yTensor; + tensor yTensorRef = tensor{test_n, test_c, test_h, test_w}; + + auto out_gpu_ref = handle.Write(yTensorRef.data); + + // Run softmax in a usual way (which is tested) and compare results + EXPECT_EQUAL(miopenSoftmaxForward_V2(&handle, + &alpha, + x_desc, + in_gpu.get(), + &beta, + &yTensorRef.desc, + out_gpu_ref.get(), + softmax_descriptor.GetAlgorithm(), + softmax_descriptor.GetMode()), + miopenStatusSuccess); + + yTensor.data = handle.Read(out_gpu, yTensor.data.size()); + yTensorRef.data = handle.Read(out_gpu_ref, yTensorRef.data.size()); + + double error = miopen::rms_range(yTensorRef.data, yTensor.data); + const double tolerance = 1e-3; + + EXPECT_TRUE(std::isfinite(error) && error <= tolerance) + << "Outputs do not match each other. Error:" << error; + } + + std::cerr << "Finished testing solution functions." << std::endl; + } + + void TestRunSolutionsBackward(Handle& handle, const std::vector& solutions) + { + std::cerr << "Testing solution functions..." << std::endl; + + miopenTensorDescriptor_t y_desc = &yTensor.desc; + miopenTensorDescriptor_t dy_desc = &dyTensor.desc; + miopenTensorDescriptor_t dx_desc = &dxTensor.desc; + + const unsigned int numTensors = 3; + + for(const auto& solution : solutions) + { + auto arguments = std::make_unique(numTensors); + + auto in1_gpu = handle.Write(yTensor.data); + auto in2_gpu = handle.Write(dyTensor.data); + auto out_gpu = handle.Write(dxTensor.data); + + miopenTensorArgumentId_t names[numTensors] = { + miopenTensorSoftmaxY, miopenTensorSoftmaxDY, miopenTensorSoftmaxDX}; + void* buffers[numTensors] = {in1_gpu.get(), in2_gpu.get(), out_gpu.get()}; + miopenTensorDescriptor_t descriptors[numTensors] = {y_desc, dy_desc, dx_desc}; + + for(auto i = 0; i < numTensors; ++i) + { + arguments[i].id = names[i]; + arguments[i].descriptor = &descriptors[i]; + arguments[i].buffer = buffers[i]; + } + + std::cerr << "Run a solution." << std::endl; + EXPECT_EQUAL( + miopenRunSolution(&handle, solution, numTensors, arguments.get(), nullptr, 0), + miopenStatusSuccess); + + float alpha = softmax_descriptor.GetAlpha(); + float beta = softmax_descriptor.GetBeta(); + + // tensor yTensorDup = yTensor; + tensor dxTensorRef = tensor{test_n, test_c, test_h, test_w}; + + // this is dx + auto out_gpu_ref = handle.Write(dxTensorRef.data); + + // Run softmax in a usual way (which is tested) and compare results + EXPECT_EQUAL(miopenSoftmaxBackward_V2(&handle, + &alpha, + y_desc, + in1_gpu.get(), + dy_desc, + in2_gpu.get(), + &beta, + &dxTensorRef.desc, + out_gpu_ref.get(), + softmax_descriptor.GetAlgorithm(), + softmax_descriptor.GetMode()), + miopenStatusSuccess); + + yTensor.data = handle.Read(out_gpu, yTensor.data.size()); + dxTensorRef.data = handle.Read(out_gpu_ref, dxTensorRef.data.size()); + + double error = miopen::rms_range(dxTensorRef.data, yTensor.data); + const double tolerance = 1e-3; + + EXPECT_TRUE(std::isfinite(error) && error <= tolerance) + << "Outputs do not match each other. Error:" << error; + } + + std::cerr << "Finished testing solution functions." << std::endl; + } + + void Finalize() { EXPECT_EQUAL(miopenDestroyProblem(problem), miopenStatusSuccess); } + +private: + void Initialize() + { + softmax_descriptor.SetParams( + 1.0f, 0.0f, MIOPEN_SOFTMAX_ACCURATE, MIOPEN_SOFTMAX_MODE_CHANNEL); + + if(isForward) + { + xTensor = + tensor{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17}); + yTensor = tensor{test_n, test_c, test_h, test_w}; + + EXPECT_EQUAL(miopenCreateSoftmaxProblem( + &problem, &softmax_descriptor, miopenProblemDirectionForward), + miopenStatusSuccess); + } + else + { + yTensor = + tensor{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17}); + dyTensor = + tensor{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17}); + dxTensor = tensor{test_n, test_c, test_h, test_w}; + + EXPECT_EQUAL(miopenCreateSoftmaxProblem( + &problem, &softmax_descriptor, miopenProblemDirectionBackward), + miopenStatusSuccess); + } + + AddTensorDescriptors(); + } + +private: + tensor xTensor; + tensor yTensor; + + tensor dxTensor; + tensor dyTensor; + + SoftmaxDescriptor softmax_descriptor; + miopenProblem_t problem; + + bool isForward; + + const unsigned int test_n = 100; + const unsigned int test_c = 3; + const unsigned int test_h = 32; + const unsigned int test_w = 32; +}; + +TEST(TestSoftmaxFind20, softmaxForward) +{ + Handle& handle = get_handle(); + + SoftmaxFind20Test test(true); + + std::vector solutions = test.TestFindSolutions(handle); + test.TestSolutionAttributes(solutions); + + test.TestRunSolutionsForward(handle, solutions); + test.Finalize(); +} + +TEST(TestSoftmaxFind20, softmaxBackward) +{ + Handle& handle = get_handle(); + + SoftmaxFind20Test test(false); + + std::vector solutions = test.TestFindSolutions(handle); + test.TestSolutionAttributes(solutions); + + test.TestRunSolutionsBackward(handle, solutions); + test.Finalize(); +}