ROCm · junliume · Mar 21, 2024 · Feb 5, 2024 · Feb 5, 2024 · Feb 8, 2024
@@ -346,6 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor);
  */
 MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor);
 
+/*! @ingroup softmax
+ * @brief Creates the miopenSoftmaxDescriptor_t type
+ */
+MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor);
+
 /*! @ingroup tensor
  * @enum miopenDataType_t
  * MIOpen floating point datatypes. Both 32-bit and 16-bit floats are supported in MIOpen.
@@ -5314,6 +5319,11 @@ typedef enum
     miopenTensorBiasY        = 9,
     miopenTensorBias         = 10,
 #endif
+    miopenTensorSoftmaxX  = 11,
+    miopenTensorSoftmaxY  = 12,
+    miopenTensorSoftmaxDX = 13,
+    miopenTensorSoftmaxDY = 14,
+
 } miopenTensorArgumentId_t;
 
 /*! @enum miopenTensorArgumentId_t
@@ -5336,6 +5346,18 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem,
                                                      miopenConvolutionDescriptor_t operatorDesc,
                                                      miopenProblemDirection_t direction);
 
+/*! @brief Initializes a problem object describing a softmax operation.
+ *
+ * @param problem      Pointer to the problem to initialize
+ * @param operatorDesc Descriptor of the operator to be used
+ * @param direction    Direction of the operation
+ * @return             miopenStatus_t
+ */
+
+MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
+                                                        miopenSoftmaxDescriptor_t operatorDesc,
+                                                        miopenProblemDirection_t direction);
+
 /*! @brief Destroys a problem object.
  *
  * @param problem Problem to destroy

@@ -89,6 +89,14 @@ miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem, miopenProblemDi
     });
 }
 
+miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
+                                          miopenSoftmaxDescriptor_t operatorDesc,
+                                          miopenProblemDirection_t direction)
+{
+    MIOPEN_LOG_FUNCTION(problem, direction);
+    return MakeProblem(problem, operatorDesc, direction);
+}
+
 miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2)
 {
     MIOPEN_LOG_FUNCTION(problem1, problem2);
@@ -263,6 +271,11 @@ inline std::ostream& operator<<(std::ostream& stream, const miopenTensorArgument
     case miopenTensorBias: stream << "Bias"; break;
     case miopenTensorBiasX: stream << "BiasX"; break;
     case miopenTensorBiasY: stream << "BiasY"; break;
+    case miopenTensorSoftmaxX: stream << "SoftmaxX"; break;
+    case miopenTensorSoftmaxY: stream << "SoftmaxY"; break;
+    case miopenTensorSoftmaxDX: stream << "SoftmaxDX"; break;
+    case miopenTensorSoftmaxDY: stream << "SoftmaxDY"; break;
+
     case miopenTensorArgumentIdInvalid: stream << "Invalid"; break;
     }
 

@@ -31,6 +31,7 @@
 #include <miopen/activ.hpp>
 #include <miopen/allocator.hpp>
 #include <miopen/convolution.hpp>
+#include <miopen/softmax.hpp>
 #include <miopen/object.hpp>
 #include <miopen/solver_id.hpp>
 #include <miopen/tensor.hpp>
@@ -59,13 +60,17 @@ namespace conv {
 struct ProblemDescription;
 } // namespace conv
 
+namespace softmax {
+struct ProblemDescription;
+} // namespace softmax
+
 struct BiasDescriptor
 {
 };
 
 // The order of types is important for deserialization and should be preserved between releases.
 using OperatorDescriptor =
-    boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor>;
+    boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor, SoftmaxDescriptor>;
 
 struct Problem
 {
@@ -99,6 +104,7 @@ struct Problem
 
     conv::ProblemDescription AsConvolution() const;
     activ::ProblemDescription AsActivation() const;
+    softmax::ProblemDescription AsSoftmax() const;
 
     [[nodiscard]] miopenTensorArgumentId_t GetInputId() const;
     [[nodiscard]] miopenTensorArgumentId_t GetOutputId() const;
@@ -155,6 +161,12 @@ struct Problem
                                             const Buffers& buffers,
                                             const ConvolutionDescriptor& conv_desc) const;
 
+    std::vector<Solution> FindSolutionsImpl(Handle& handle,
+                                            const FindOptions& options,
+                                            std::size_t max_solutions,
+                                            const Buffers& buffers,
+                                            const SoftmaxDescriptor& softmax_desc) const;
+
     void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const;
     void LogDriverCommand(const ActivationDescriptor& descriptor) const;
 };

@@ -28,12 +28,47 @@
 
 #include <miopen/common.hpp>
 #include <miopen/miopen.h>
+#include <miopen/object.hpp>
+
+#include <nlohmann/json_fwd.hpp>
 
 namespace miopen {
 
 struct Handle;
 struct TensorDescriptor;
 
+struct SoftmaxDescriptor : miopenSoftmaxDescriptor
+{
+    SoftmaxDescriptor() {}
+
+    float GetAlpha() const { return alpha; }
+    float GetBeta() const { return beta; }
+    miopenSoftmaxAlgorithm_t GetAlgorithm() const { return algorithm; }
+    miopenSoftmaxMode_t GetMode() const { return mode; }
+
+    void SetParams(float alpha_,
+                   float beta_,
+                   miopenSoftmaxAlgorithm_t algorithm_,
+                   miopenSoftmaxMode_t mode_)
+    {
+        alpha     = alpha_;
+        beta      = beta_;
+        algorithm = algorithm_;
+        mode      = mode_;
+    }
+
+    friend std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x);
+
+    friend void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor);
+    friend void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor);
+
+private:
+    float alpha;
+    float beta;
-    float alpha;
-    float beta;
+    bool has_alpha;
+    bool has_beta;
-    float alpha;
-    float beta;
+    bool has_alpha;
+    bool has_beta;
+    miopenSoftmaxAlgorithm_t algorithm;
+    miopenSoftmaxMode_t mode;
+};
+
 miopenStatus_t SoftmaxForward(Handle& handle,
                               const void* alpha,
                               const void* beta,
@@ -62,4 +97,7 @@ miopenStatus_t SoftmaxBackward(Handle& handle,
                                int dx_offset = 0);
 
 } // namespace miopen
+
+MIOPEN_DEFINE_OBJECT(miopenSoftmaxDescriptor, miopen::SoftmaxDescriptor);
+
 #endif // _MIOPEN_SOFTMAX_HPP_
@@ -74,9 +74,9 @@ struct InvokeParams : public miopen::InvokeParams
                  Data_t dx_,
                  miopenSoftmaxAlgorithm_t algorithm_,
                  miopenSoftmaxMode_t mode_,
-                 int y_offset_,
-                 int dy_offset_,
-                 int dx_offset_)
+                 int y_offset_  = 0,
+                 int dy_offset_ = 0,
+                 int dx_offset_ = 0)
         : algorithm(algorithm_),
           mode(mode_),
 

@@ -111,6 +111,12 @@ struct Solution : miopenSolution
                  std::size_t workspace_size,
                  const ConvolutionDescriptor& conv_desc);
 
+    void RunImpl(Handle& handle,
+                 const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
+                 Data_t /*workspace*/,
+                 std::size_t /*workspace_size*/,
+                 const SoftmaxDescriptor& softmax_desc);
+
     void RunImpl(Handle& handle,
                  const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
                  Data_t workspace,

@@ -54,7 +54,8 @@ enum class Primitive
     Pooling,
     Normalization,
     Reduce,
-    Cat
+    Cat,
+    Softmax
 };
 
 struct MIOPEN_EXPORT Id

@@ -31,6 +31,8 @@
 #include <miopen/conv/problem_description.hpp>
 #include <miopen/convolution.hpp>
 #include <miopen/conv_algo_name.hpp>
+#include <miopen/softmax/problem_description.hpp>
+#include <miopen/softmax/solvers.hpp>
 #include <miopen/datatype.hpp>
 #include <miopen/execution_context.hpp>
 #include <miopen/fusion_plan.hpp>
@@ -175,6 +177,9 @@ Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t m
             [&](const ConvolutionDescriptor& op_desc) {
                 return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
             },
+            [&](const SoftmaxDescriptor& op_desc) {
+                return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
+            },
             [&](const ActivationDescriptor& /*op_desc*/) -> std::vector<Solution> {
                 MIOPEN_THROW(miopenStatusNotImplemented);
             },
@@ -277,6 +282,33 @@ activ::ProblemDescription Problem::AsActivation() const
     }
 }
 
+softmax::ProblemDescription Problem::AsSoftmax() const
+{
+    const auto& softmax_desc = boost::get<SoftmaxDescriptor>(operator_descriptor);
+
+    float alpha = softmax_desc.GetAlpha();
+    float beta  = softmax_desc.GetBeta();
+
+    softmax::ProblemDescription problem_description =
+        (GetDirection() == miopenProblemDirectionForward)
+            ? softmax::ProblemDescription(
+                  &alpha,
+                  &beta,
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
+                  softmax_desc.GetAlgorithm(),
+                  softmax_desc.GetMode())
+            : softmax::ProblemDescription(
+                  &alpha,
+                  &beta,
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX"),
+                  softmax_desc.GetAlgorithm(),
+                  softmax_desc.GetMode());
+    return problem_description;
+}
+
 std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
                                                  const FindOptions& options,
                                                  std::size_t max_solutions,
@@ -431,6 +463,60 @@ std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
     return ret;
 }
 
+std::vector<Solution>
+Problem::FindSolutionsImpl(Handle& handle,
+                           [[maybe_unused]] const FindOptions& options,
+                           std::size_t max_solutions,
+                           [[maybe_unused]] const Buffers& buffers,
+                           [[maybe_unused]] const SoftmaxDescriptor& softmax_desc) const
+{
+    auto ret = std::vector<Solution>();
+
+    auto ctx = ExecutionContext{&handle};
+
+    const softmax::ProblemDescription problem_description = AsSoftmax();
+
+    const auto algo = AlgorithmName{"Softmax"};
+
+    solver::softmax::AttnSoftmax attnSoftmaxSolver;
+    solver::softmax::Softmax regularSoftmaxSolver;
+
+    std::vector<solver::softmax::SoftmaxSolver*> solvers;
+
+    solvers.push_back(&attnSoftmaxSolver);
+    solvers.push_back(&regularSoftmaxSolver);
 struct SolverContainer 
 struct SolverContainer 
+
+    for(auto solver : solvers)
+    {
+        if(!solver->IsApplicable(ctx, problem_description))
+        {
+            continue;
+        }
+
+        auto solution = Solution();
+
+        /// \todo time measurement will be done later. For now we set less time for attention
+        /// softmax and slightly bigger for regular
+        solution.SetTime(solver == &attnSoftmaxSolver ? 0.0f : 1.0f);
+        solution.SetWorkspaceSize(solver->GetWorkspaceSize(ctx, problem_description));
+        solution.SetSolver(solver->SolverDbId());
+        solution.SetProblem({*this});
+
+        MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , "
+                                        << solution.GetWorkspaceSize() << ", "
+                                        << solution.GetTime());
+
+        ret.emplace_back(std::move(solution));
+
+        if(ret.size() >= max_solutions)
+        {
+            break;
+        }
+    }
+
+    return ret;
+}
+
 void Problem::ValidateGroupCount(const TensorDescriptor& xDesc,
                                  const TensorDescriptor& wDesc,
                                  const ConvolutionDescriptor& conv)
@@ -456,7 +542,8 @@ void Problem::LogDriverCommand() const
     const auto log_function =
         boost::hof::match([&](const ConvolutionDescriptor& op_desc) { LogDriverCommand(op_desc); },
                           [&](const ActivationDescriptor& op_desc) { LogDriverCommand(op_desc); },
-                          [&](const BiasDescriptor&) {});
+                          [&](const BiasDescriptor&) {},
+                          [&](const SoftmaxDescriptor&) {});
 
     boost::apply_visitor(log_function, operator_descriptor);
 }
@@ -576,6 +663,7 @@ void Problem::CalculateOutput()
             [&](const ActivationDescriptor&) {
                 RegisterTensorDescriptor(GetOutputId(), GetInput());
             },
+            [&](const SoftmaxDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); },
             [&](const BiasDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }),
         operator_descriptor);
 }
@@ -585,7 +673,8 @@ miopenTensorArgumentId_t Problem::GetInputId() const
     return boost::apply_visitor(
         boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionX; },
                           [](const ActivationDescriptor&) { return miopenTensorActivationX; },
-                          [](const BiasDescriptor&) { return miopenTensorBiasX; }),
+                          [](const BiasDescriptor&) { return miopenTensorBiasX; },
+                          [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxX; }),
         operator_descriptor);
 }
 
@@ -594,7 +683,8 @@ miopenTensorArgumentId_t Problem::GetOutputId() const
     return boost::apply_visitor(
         boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionY; },
                           [](const ActivationDescriptor&) { return miopenTensorActivationY; },
-                          [](const BiasDescriptor&) { return miopenTensorBiasY; }),
+                          [](const BiasDescriptor&) { return miopenTensorBiasY; },
+                          [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxY; }),
         operator_descriptor);
 }
 
@@ -679,7 +769,14 @@ void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& p
             [&](const BiasDescriptor&) {
                 plan.AddOp(std::make_shared<BiasFusionOpDescriptor>(
                     problem.GetTensorDescriptorChecked(miopenTensorBias, "miopenTensorBias")));
+            },
+            [&](const SoftmaxDescriptor&) {
+                // Not implemented
+                assert(false);
+                MIOPEN_THROW(miopenStatusNotImplemented,
+                             "Softmax is not implemented for FusedProblem");
             }),
+
         problem.operator_descriptor);
 }
 
@@ -741,7 +838,14 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams(
                     const auto bias_ptr = buffers.at(miopenTensorBias);
                     operator_args.params.emplace_back(
                         std::make_unique<miopen::fusion::BiasOpInvokeParam>(bias_ptr));
+                },
+                [&](const SoftmaxDescriptor&) {
+                    // Not implemented
+                    assert(false);
+                    MIOPEN_THROW(miopenStatusNotImplemented,
+                                 "Softmax is not implemented for FusedProblem");
                 }),
+
             problem.operator_descriptor);
     }
-Original file line number
+Diff line change
@@ Expand Up / @@ -54,7 +54,8 @@ enum class Primitive @@
         Pooling,
         Normalization,
         Reduce,
-        Cat
+        Cat,
+        Softmax
     };
     struct MIOPEN_EXPORT Id
@@ Expand Down @@