diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h
index f04b204bd5..1928a31e52 100644
--- a/include/miopen/miopen.h
+++ b/include/miopen/miopen.h
@@ -346,6 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor);
  */
 MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor);
 
+/*! @ingroup softmax
+ * @brief Creates the miopenSoftmaxDescriptor_t type
+ */
+MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor);
+
 /*! @ingroup tensor
  * @enum miopenDataType_t
  * MIOpen floating point datatypes. Both 32-bit and 16-bit floats are supported in MIOpen.
@@ -5313,7 +5318,12 @@ typedef enum
     miopenTensorBiasX        = 8,
     miopenTensorBiasY        = 9,
     miopenTensorBias         = 10,
+    miopenTensorSoftmaxX     = 11,
+    miopenTensorSoftmaxY     = 12,
+    miopenTensorSoftmaxDX    = 13,
+    miopenTensorSoftmaxDY    = 14,
 #endif
+
 } miopenTensorArgumentId_t;
 
 /*! @enum miopenTensorArgumentId_t
@@ -5336,6 +5346,48 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem,
                                                      miopenConvolutionDescriptor_t operatorDesc,
                                                      miopenProblemDirection_t direction);
 
+/*! @brief Creates the Softmax descriptor object
+ *
+ * @param softmaxDesc Pointer to an softmax descriptor type
+ * @return            miopenStatus_t
+ */
+
+MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc);
+
+/*! @brief Sets the softmax descriptor details
+ *
+ * Sets all of the descriptor details for the softmax layer
+ *
+ * @param softmaxDesc  Pointer to a softmax layer descriptor
+ * @param alpha        Softmax alpha parameter
+ * @param beta         Softmax beta parameter
+ * @param algorithm    Softmax algorithm
+ * @param mode         Softmax mode
+ * @return             miopenStatus_t
+ */
+MIOPEN_EXPORT miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc,
+                                                        float alpha,
+                                                        float beta,
+                                                        miopenSoftmaxAlgorithm_t algorithm,
+                                                        miopenSoftmaxMode_t mode);
+
+/*! @brief Gets the softmax layer descriptor details
+ *
+ * Retrieves all of the descriptor details for the softmax layer.
+ *
+ * @param softmaxDesc   Pointer to a softmax layer descriptor (input)
+ * @param alpha         Softmax alpha parameter (output)
+ * @param beta          Softmax beta parameter (output)
+ * @param algorithm     Softmax algorithm (output)
+ * @param mode          Softmax mode (output)
+ * @return              miopenStatus_t
+ */
+MIOPEN_EXPORT miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc,
+                                                        float* alpha,
+                                                        float* beta,
+                                                        miopenSoftmaxAlgorithm_t* algorithm,
+                                                        miopenSoftmaxMode_t* mode);
+
 /*! @brief Destroys a problem object.
  *
  * @param problem Problem to destroy
@@ -5596,6 +5648,18 @@ MIOPEN_EXPORT miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopen
 MIOPEN_EXPORT miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem,
                                                      miopenProblemDirection_t direction);
 
+/*! @brief Initializes a problem object describing a softmax operation.
+ *
+ * @param problem      Pointer to the problem to initialize
+ * @param operatorDesc Descriptor of the operator to be used
+ * @param direction    Direction of the operation
+ * @return             miopenStatus_t
+ */
+
+MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
+                                                        miopenSoftmaxDescriptor_t operatorDesc,
+                                                        miopenProblemDirection_t direction);
+
 #endif
 
 /** @} */
diff --git a/src/api/find2_0_commons.cpp b/src/api/find2_0_commons.cpp
index ce203197a5..e9bd5e51f2 100644
--- a/src/api/find2_0_commons.cpp
+++ b/src/api/find2_0_commons.cpp
@@ -89,6 +89,14 @@ miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem, miopenProblemDi
     });
 }
 
+miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
+                                          miopenSoftmaxDescriptor_t operatorDesc,
+                                          miopenProblemDirection_t direction)
+{
+    MIOPEN_LOG_FUNCTION(problem, direction);
+    return MakeProblem(problem, operatorDesc, direction);
+}
+
 miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2)
 {
     MIOPEN_LOG_FUNCTION(problem1, problem2);
@@ -263,6 +271,11 @@ inline std::ostream& operator<<(std::ostream& stream, const miopenTensorArgument
     case miopenTensorBias: stream << "Bias"; break;
     case miopenTensorBiasX: stream << "BiasX"; break;
     case miopenTensorBiasY: stream << "BiasY"; break;
+    case miopenTensorSoftmaxX: stream << "SoftmaxX"; break;
+    case miopenTensorSoftmaxY: stream << "SoftmaxY"; break;
+    case miopenTensorSoftmaxDX: stream << "SoftmaxDX"; break;
+    case miopenTensorSoftmaxDY: stream << "SoftmaxDY"; break;
+
     case miopenTensorArgumentIdInvalid: stream << "Invalid"; break;
     }
 
diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp
index 2900008785..d95ea96cc9 100644
--- a/src/include/miopen/problem.hpp
+++ b/src/include/miopen/problem.hpp
@@ -31,6 +31,7 @@
 #include <miopen/activ.hpp>
 #include <miopen/allocator.hpp>
 #include <miopen/convolution.hpp>
+#include <miopen/softmax.hpp>
 #include <miopen/object.hpp>
 #include <miopen/solver_id.hpp>
 #include <miopen/tensor.hpp>
@@ -59,13 +60,17 @@ namespace conv {
 struct ProblemDescription;
 } // namespace conv
 
+namespace softmax {
+struct ProblemDescription;
+} // namespace softmax
+
 struct BiasDescriptor
 {
 };
 
 // The order of types is important for deserialization and should be preserved between releases.
 using OperatorDescriptor =
-    boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor>;
+    boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor, SoftmaxDescriptor>;
 
 struct Problem
 {
@@ -99,6 +104,7 @@ struct Problem
 
     conv::ProblemDescription AsConvolution() const;
     activ::ProblemDescription AsActivation() const;
+    softmax::ProblemDescription AsSoftmax() const;
 
     [[nodiscard]] miopenTensorArgumentId_t GetInputId() const;
     [[nodiscard]] miopenTensorArgumentId_t GetOutputId() const;
@@ -155,6 +161,12 @@ struct Problem
                                             const Buffers& buffers,
                                             const ConvolutionDescriptor& conv_desc) const;
 
+    std::vector<Solution> FindSolutionsImpl(Handle& handle,
+                                            const FindOptions& options,
+                                            std::size_t max_solutions,
+                                            const Buffers& buffers,
+                                            const SoftmaxDescriptor& softmax_desc) const;
+
     void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const;
     void LogDriverCommand(const ActivationDescriptor& descriptor) const;
 };
diff --git a/src/include/miopen/softmax.hpp b/src/include/miopen/softmax.hpp
index b4f8909908..0ed0f300be 100644
--- a/src/include/miopen/softmax.hpp
+++ b/src/include/miopen/softmax.hpp
@@ -28,12 +28,47 @@
 
 #include <miopen/common.hpp>
 #include <miopen/miopen.h>
+#include <miopen/object.hpp>
+
+#include <nlohmann/json_fwd.hpp>
 
 namespace miopen {
 
 struct Handle;
 struct TensorDescriptor;
 
+struct SoftmaxDescriptor : miopenSoftmaxDescriptor
+{
+    SoftmaxDescriptor() {}
+
+    float GetAlpha() const { return alpha; }
+    float GetBeta() const { return beta; }
+    miopenSoftmaxAlgorithm_t GetAlgorithm() const { return algorithm; }
+    miopenSoftmaxMode_t GetMode() const { return mode; }
+
+    void SetParams(float alpha_,
+                   float beta_,
+                   miopenSoftmaxAlgorithm_t algorithm_,
+                   miopenSoftmaxMode_t mode_)
+    {
+        alpha     = alpha_;
+        beta      = beta_;
+        algorithm = algorithm_;
+        mode      = mode_;
+    }
+
+    friend std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x);
+
+    friend void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor);
+    friend void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor);
+
+private:
+    float alpha;
+    float beta;
+    miopenSoftmaxAlgorithm_t algorithm;
+    miopenSoftmaxMode_t mode;
+};
+
 miopenStatus_t SoftmaxForward(Handle& handle,
                               const void* alpha,
                               const void* beta,
@@ -62,4 +97,7 @@ miopenStatus_t SoftmaxBackward(Handle& handle,
                                int dx_offset = 0);
 
 } // namespace miopen
+
+MIOPEN_DEFINE_OBJECT(miopenSoftmaxDescriptor, miopen::SoftmaxDescriptor);
+
 #endif // _MIOPEN_SOFTMAX_HPP_
diff --git a/src/include/miopen/softmax/invoke_params.hpp b/src/include/miopen/softmax/invoke_params.hpp
index c2792929ab..f2d6240e98 100644
--- a/src/include/miopen/softmax/invoke_params.hpp
+++ b/src/include/miopen/softmax/invoke_params.hpp
@@ -74,9 +74,9 @@ struct InvokeParams : public miopen::InvokeParams
                  Data_t dx_,
                  miopenSoftmaxAlgorithm_t algorithm_,
                  miopenSoftmaxMode_t mode_,
-                 int y_offset_,
-                 int dy_offset_,
-                 int dx_offset_)
+                 int y_offset_  = 0,
+                 int dy_offset_ = 0,
+                 int dx_offset_ = 0)
         : algorithm(algorithm_),
           mode(mode_),
 
diff --git a/src/include/miopen/solution.hpp b/src/include/miopen/solution.hpp
index 4fab925bf2..4f3643b51b 100644
--- a/src/include/miopen/solution.hpp
+++ b/src/include/miopen/solution.hpp
@@ -111,6 +111,12 @@ struct Solution : miopenSolution
                  std::size_t workspace_size,
                  const ConvolutionDescriptor& conv_desc);
 
+    void RunImpl(Handle& handle,
+                 const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
+                 Data_t /*workspace*/,
+                 std::size_t /*workspace_size*/,
+                 const SoftmaxDescriptor& softmax_desc);
+
     void RunImpl(Handle& handle,
                  const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
                  Data_t workspace,
diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp
index dbf086030e..09e16be6a1 100644
--- a/src/include/miopen/solver_id.hpp
+++ b/src/include/miopen/solver_id.hpp
@@ -54,7 +54,8 @@ enum class Primitive
     Pooling,
     Normalization,
     Reduce,
-    Cat
+    Cat,
+    Softmax
 };
 
 struct MIOPEN_EXPORT Id
diff --git a/src/problem.cpp b/src/problem.cpp
index 2ea839391c..0b8dbe478f 100644
--- a/src/problem.cpp
+++ b/src/problem.cpp
@@ -31,6 +31,8 @@
 #include <miopen/conv/problem_description.hpp>
 #include <miopen/convolution.hpp>
 #include <miopen/conv_algo_name.hpp>
+#include <miopen/softmax/problem_description.hpp>
+#include <miopen/softmax/solvers.hpp>
 #include <miopen/datatype.hpp>
 #include <miopen/execution_context.hpp>
 #include <miopen/fusion_plan.hpp>
@@ -175,6 +177,9 @@ Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t m
             [&](const ConvolutionDescriptor& op_desc) {
                 return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
             },
+            [&](const SoftmaxDescriptor& op_desc) {
+                return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
+            },
             [&](const ActivationDescriptor& /*op_desc*/) -> std::vector<Solution> {
                 MIOPEN_THROW(miopenStatusNotImplemented);
             },
@@ -277,6 +282,33 @@ activ::ProblemDescription Problem::AsActivation() const
     }
 }
 
+softmax::ProblemDescription Problem::AsSoftmax() const
+{
+    const auto& softmax_desc = boost::get<SoftmaxDescriptor>(operator_descriptor);
+
+    float alpha = softmax_desc.GetAlpha();
+    float beta  = softmax_desc.GetBeta();
+
+    softmax::ProblemDescription problem_description =
+        (GetDirection() == miopenProblemDirectionForward)
+            ? softmax::ProblemDescription(
+                  &alpha,
+                  &beta,
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
+                  softmax_desc.GetAlgorithm(),
+                  softmax_desc.GetMode())
+            : softmax::ProblemDescription(
+                  &alpha,
+                  &beta,
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY"),
+                  GetTensorDescriptorChecked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX"),
+                  softmax_desc.GetAlgorithm(),
+                  softmax_desc.GetMode());
+    return problem_description;
+}
+
 std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
                                                  const FindOptions& options,
                                                  std::size_t max_solutions,
@@ -431,6 +463,61 @@ std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
     return ret;
 }
 
+std::vector<Solution>
+Problem::FindSolutionsImpl(Handle& handle,
+                           [[maybe_unused]] const FindOptions& options,
+                           std::size_t max_solutions,
+                           [[maybe_unused]] const Buffers& buffers,
+                           [[maybe_unused]] const SoftmaxDescriptor& softmax_desc) const
+{
+    auto ret = std::vector<Solution>();
+
+    auto ctx = ExecutionContext{&handle};
+
+    const softmax::ProblemDescription problem_description = AsSoftmax();
+
+    const auto algo = AlgorithmName{"Softmax"};
+
+    static solver::softmax::AttnSoftmax attnSoftmaxSolver;
+    static solver::softmax::Softmax regularSoftmaxSolver;
+
+    std::vector<solver::softmax::SoftmaxSolver*> solvers;
+
+    solvers.push_back(&attnSoftmaxSolver);
+    solvers.push_back(&regularSoftmaxSolver);
+
+    for(auto solver : solvers)
+    {
+        if(!solver->IsApplicable(ctx, problem_description))
+        {
+            MIOPEN_LOG_I2(solver->SolverDbId() << ": Not applicable");
+            continue;
+        }
+
+        auto solution = Solution();
+
+        /// \todo time measurement will be done later. For now we set less time for attention
+        /// softmax and slightly bigger for regular
+        solution.SetTime(solver == &attnSoftmaxSolver ? 1.0f : 2.0f);
+        solution.SetWorkspaceSize(solver->GetWorkspaceSize(ctx, problem_description));
+        solution.SetSolver(solver->SolverDbId());
+        solution.SetProblem({*this});
+
+        MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , "
+                                        << solution.GetWorkspaceSize() << ", "
+                                        << solution.GetTime());
+
+        ret.emplace_back(std::move(solution));
+
+        if(ret.size() >= max_solutions)
+        {
+            break;
+        }
+    }
+
+    return ret;
+}
+
 void Problem::ValidateGroupCount(const TensorDescriptor& xDesc,
                                  const TensorDescriptor& wDesc,
                                  const ConvolutionDescriptor& conv)
@@ -456,7 +543,8 @@ void Problem::LogDriverCommand() const
     const auto log_function =
         boost::hof::match([&](const ConvolutionDescriptor& op_desc) { LogDriverCommand(op_desc); },
                           [&](const ActivationDescriptor& op_desc) { LogDriverCommand(op_desc); },
-                          [&](const BiasDescriptor&) {});
+                          [&](const BiasDescriptor&) {},
+                          [&](const SoftmaxDescriptor&) {});
 
     boost::apply_visitor(log_function, operator_descriptor);
 }
@@ -576,6 +664,7 @@ void Problem::CalculateOutput()
             [&](const ActivationDescriptor&) {
                 RegisterTensorDescriptor(GetOutputId(), GetInput());
             },
+            [&](const SoftmaxDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); },
             [&](const BiasDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }),
         operator_descriptor);
 }
@@ -585,7 +674,8 @@ miopenTensorArgumentId_t Problem::GetInputId() const
     return boost::apply_visitor(
         boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionX; },
                           [](const ActivationDescriptor&) { return miopenTensorActivationX; },
-                          [](const BiasDescriptor&) { return miopenTensorBiasX; }),
+                          [](const BiasDescriptor&) { return miopenTensorBiasX; },
+                          [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxX; }),
         operator_descriptor);
 }
 
@@ -594,7 +684,8 @@ miopenTensorArgumentId_t Problem::GetOutputId() const
     return boost::apply_visitor(
         boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionY; },
                           [](const ActivationDescriptor&) { return miopenTensorActivationY; },
-                          [](const BiasDescriptor&) { return miopenTensorBiasY; }),
+                          [](const BiasDescriptor&) { return miopenTensorBiasY; },
+                          [](const SoftmaxDescriptor&) { return miopenTensorSoftmaxY; }),
         operator_descriptor);
 }
 
@@ -679,7 +770,14 @@ void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& p
             [&](const BiasDescriptor&) {
                 plan.AddOp(std::make_shared<BiasFusionOpDescriptor>(
                     problem.GetTensorDescriptorChecked(miopenTensorBias, "miopenTensorBias")));
+            },
+            [&](const SoftmaxDescriptor&) {
+                // Not implemented
+                assert(false);
+                MIOPEN_THROW(miopenStatusNotImplemented,
+                             "Softmax is not implemented for FusedProblem");
             }),
+
         problem.operator_descriptor);
 }
 
@@ -741,7 +839,14 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams(
                     const auto bias_ptr = buffers.at(miopenTensorBias);
                     operator_args.params.emplace_back(
                         std::make_unique<miopen::fusion::BiasOpInvokeParam>(bias_ptr));
+                },
+                [&](const SoftmaxDescriptor&) {
+                    // Not implemented
+                    assert(false);
+                    MIOPEN_THROW(miopenStatusNotImplemented,
+                                 "Softmax is not implemented for FusedProblem");
                 }),
+
             problem.operator_descriptor);
     }
 
diff --git a/src/softmax.cpp b/src/softmax.cpp
index 04e81fc7de..38b233e2e6 100644
--- a/src/softmax.cpp
+++ b/src/softmax.cpp
@@ -33,8 +33,73 @@
 #include <miopen/softmax/solvers.hpp>
 #include <miopen/find_solution.hpp>
 
+#include <nlohmann/json.hpp>
+
 namespace miopen {
 
+extern "C" miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc)
+{
+    MIOPEN_LOG_FUNCTION(softmaxDesc);
+    return miopen::try_([&] {
+        auto& desc = miopen::deref(softmaxDesc);
+        desc       = new miopen::SoftmaxDescriptor();
+    });
+}
+
+extern "C" miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc,
+                                                     float alpha,
+                                                     float beta,
+                                                     miopenSoftmaxAlgorithm_t algorithm,
+                                                     miopenSoftmaxMode_t mode)
+{
+
+    MIOPEN_LOG_FUNCTION(softmaxDesc, alpha, beta, algorithm, mode);
+    return miopen::try_(
+        [&] { miopen::deref(softmaxDesc).SetParams(alpha, beta, algorithm, mode); });
+}
+
+extern "C" miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc,
+                                                     float* alpha,
+                                                     float* beta,
+                                                     miopenSoftmaxAlgorithm_t* algorithm,
+                                                     miopenSoftmaxMode_t* mode)
+{
+    MIOPEN_LOG_FUNCTION(softmaxDesc);
+    return miopen::try_([&] {
+        *alpha     = miopen::deref(softmaxDesc).GetAlpha();
+        *beta      = miopen::deref(softmaxDesc).GetBeta();
+        *algorithm = miopen::deref(softmaxDesc).GetAlgorithm();
+        *mode      = miopen::deref(softmaxDesc).GetMode();
+    });
+}
+
+std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x)
+{
+    stream << "softmax,"
+           << "alpha" << x.GetAlpha() << ",beta" << x.GetBeta() << ",algorithm" << x.GetAlgorithm()
+           << ",mode" << x.GetMode() << ",";
+
+    return stream;
+}
+
+void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor)
+{
+    json = nlohmann::json{
+        {"alpha", descriptor.GetAlpha()},
+        {"beta", descriptor.GetBeta()},
+        {"algorithm", descriptor.GetAlgorithm()},
+        {"mode", descriptor.GetMode()},
+    };
+}
+
+void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor)
+{
+    json.at("alpha").get_to(descriptor.alpha);
+    json.at("beta").get_to(descriptor.beta);
+    json.at("algorithm").get_to(descriptor.algorithm);
+    json.at("mode").get_to(descriptor.mode);
+}
+
 miopenStatus_t SoftmaxForward(Handle& handle,
                               const void* alpha,
                               const void* beta,
diff --git a/src/solution.cpp b/src/solution.cpp
index e146191639..646d34d484 100644
--- a/src/solution.cpp
+++ b/src/solution.cpp
@@ -31,6 +31,10 @@
 #include <miopen/conv/data_invoke_params.hpp>
 #include <miopen/conv/wrw_invoke_params.hpp>
 
+#include <miopen/softmax/invoke_params.hpp>
+#include <miopen/softmax/problem_description.hpp>
+#include <miopen/softmax/solvers.hpp>
+
 #include <nlohmann/json.hpp>
 
 #include <boost/hof/match.hpp>
@@ -70,6 +74,9 @@ void Solution::Run(Handle& handle,
                         [&](const ConvolutionDescriptor& op_desc) {
                             RunImpl(handle, inputs, workspace, workspace_size, op_desc);
                         },
+                        [&](const SoftmaxDescriptor& op_desc) {
+                            RunImpl(handle, inputs, workspace, workspace_size, op_desc);
+                        },
                         [&](const ActivationDescriptor& /*op_desc*/) {
                             MIOPEN_THROW(miopenStatusNotImplemented);
                         },
@@ -113,8 +120,10 @@ void Solution::LogDriverCommand(const ActivationDescriptor& desc) const
 void Solution::LogDriverCommand(const Problem& problem_) const
 {
     boost::apply_visitor(
-        boost::hof::match([&](const BiasDescriptor&) { /* \todo: think on how to log bias */ },
-                          [&](const auto& op_desc) { LogDriverCommand(op_desc); }),
+        boost::hof::match(
+            [&](const BiasDescriptor&) { /* \todo: think on how to log bias */ },
+            [&](const SoftmaxDescriptor&) { /* \todo: think on how to log softmax */ },
+            [&](const auto& op_desc) { LogDriverCommand(op_desc); }),
         problem_.GetOperatorDescriptor());
 }
 
@@ -232,6 +241,91 @@ void Solution::RunImpl(Handle& handle,
     checkNumericsOutput_();
 }
 
+void Solution::RunImpl(Handle& handle,
+                       const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
+                       Data_t /*workspace*/,
+                       std::size_t /*workspace_size*/,
+                       const SoftmaxDescriptor& softmax_desc)
+{
+
+    const auto& problem_casted = boost::get<const Problem&>(problem.item);
+
+    const auto get_input_checked = [&](auto name, const std::string& name_str) {
+        const auto& found = inputs.find(name);
+        if(found == inputs.end())
+        {
+            MIOPEN_THROW(miopenStatusInvalidValue,
+                         "Problem is missing " + name_str + " tensor descriptor.");
+        }
+        auto ret = found->second;
+        if(!ret.descriptor.has_value())
+            ret.descriptor = problem_casted.GetTensorDescriptorChecked(name, name_str);
+        return ret;
+    };
+
+    const softmax::ProblemDescription problem_description = problem_casted.AsSoftmax();
+
+    float alpha                        = softmax_desc.GetAlpha();
+    float beta                         = softmax_desc.GetBeta();
+    miopenSoftmaxAlgorithm_t algorithm = softmax_desc.GetAlgorithm();
+    miopenSoftmaxMode_t mode           = softmax_desc.GetMode();
+
+    const auto invoke_ctx = [&]() -> AnyInvokeParams {
+        switch(problem_casted.GetDirection())
+        {
+        case miopenProblemDirectionForward: {
+            auto x = get_input_checked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX");
+            auto y = get_input_checked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY");
+
+            return softmax::InvokeParams(
+                &alpha, &beta, *x.descriptor, x.buffer, *y.descriptor, y.buffer, algorithm, mode);
+        }
+        case miopenProblemDirectionBackward: {
+            auto y  = get_input_checked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY");
+            auto dy = get_input_checked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY");
+            auto dx = get_input_checked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX");
+
+            return softmax::InvokeParams(&alpha,
+                                         &beta,
+                                         *y.descriptor,
+                                         y.buffer,
+                                         *dy.descriptor,
+                                         dy.buffer,
+                                         *dx.descriptor,
+                                         dx.buffer,
+                                         algorithm,
+                                         mode);
+        }
+
+        default: MIOPEN_THROW(miopenStatusNotImplemented);
+        }
+    }();
+
+    const auto net_cfg       = problem_description.MakeNetworkConfig();
+    const auto found_invoker = handle.GetInvoker(net_cfg, GetSolver());
+
+    if(found_invoker)
+    {
+        (*found_invoker)(handle, invoke_ctx);
+    }
+    else
+    {
+        auto ctx = ExecutionContext{&handle};
+
+        solver::softmax::Softmax regularSoftmax;
+        solver::softmax::AttnSoftmax attnSoftmax;
+
+        const auto softmax_solution = GetSolver().ToString() == regularSoftmax.SolverDbId()
+                                          ? regularSoftmax.GetSolution(ctx, problem_description)
+                                          : attnSoftmax.GetSolution(ctx, problem_description);
+
+        decltype(auto) invoker = handle.PrepareInvoker(*softmax_solution.invoker_factory,
+                                                       softmax_solution.construction_params);
+        handle.RegisterInvoker(invoker, net_cfg, GetSolver().ToString());
+        invoker(handle, invoke_ctx);
+    }
+}
+
 void Solution::RunImpl(Handle& handle,
                        const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
                        Data_t /*workspace*/,
diff --git a/src/solver.cpp b/src/solver.cpp
index e4800fbd2d..a3a17bf1d3 100644
--- a/src/solver.cpp
+++ b/src/solver.cpp
@@ -33,6 +33,7 @@
 #include <miopen/layernorm/solvers.hpp>
 #include <miopen/pooling/solvers.hpp>
 #include <miopen/reduce/solvers.hpp>
+#include <miopen/softmax/solvers.hpp>
 
 #include <miopen/conv_algo_name.hpp>
 #include <miopen/db.hpp>
@@ -642,6 +643,10 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
                        ++id,
                        conv::ConvHipImplicitGemmGroupWrwXdlops{},
                        miopenConvolutionAlgoImplicitGEMM);
+
+    Register(registry, ++id, Primitive::Softmax, softmax::Softmax{}.SolverDbId());
+    Register(registry, ++id, Primitive::Softmax, softmax::AttnSoftmax{}.SolverDbId());
+
     // IMPORTANT: New solvers should be added to the end of the function!
 }
 
diff --git a/test/gtest/softmax_find20.cpp b/test/gtest/softmax_find20.cpp
new file mode 100644
index 0000000000..c88afd73cf
--- /dev/null
+++ b/test/gtest/softmax_find20.cpp
@@ -0,0 +1,327 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "test.hpp"
+#include "get_handle.hpp"
+#include "tensor_holder.hpp"
+#include "verify.hpp"
+
+#include <miopen/softmax.hpp>
+
+#include <miopen/miopen.h>
+
+#include <miopen/solution.hpp>
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+using namespace miopen;
+
+class SoftmaxFind20Test
+{
+public:
+    SoftmaxFind20Test(bool forward) : problem(nullptr), isForward(forward) { Initialize(); }
+
+    void AddTensorDescriptors()
+    {
+        std::cerr << "Creating softmax tensor descriptors..." << std::endl;
+
+        auto test_set_tensor_descriptor = [this](miopenTensorArgumentId_t name,
+                                                 TensorDescriptor& desc) {
+            EXPECT_EQUAL(miopenSetProblemTensorDescriptor(problem, name, &desc),
+                         miopenStatusSuccess);
+        };
+
+        if(isForward)
+        {
+            test_set_tensor_descriptor(miopenTensorSoftmaxX, xTensor.desc);
+            test_set_tensor_descriptor(miopenTensorSoftmaxY, yTensor.desc);
+        }
+        else
+        {
+            test_set_tensor_descriptor(miopenTensorSoftmaxY, yTensor.desc);
+            test_set_tensor_descriptor(miopenTensorSoftmaxDY, dyTensor.desc);
+            test_set_tensor_descriptor(miopenTensorSoftmaxDX, dxTensor.desc);
+        }
+
+        std::cerr << "Created softmax tensor descriptors." << std::endl;
+    }
+
+    std::vector<miopenSolution_t> TestFindSolutions(Handle& handle)
+    {
+        std::cerr << "Testing miopenFindSolutions..." << std::endl;
+
+        auto solutions = std::vector<miopenSolution_t>{};
+        std::size_t found;
+
+        // We expect to get only 1 or 2 solutions for softmax for now. Hardcode value 16 as just big
+        // enough value
+        solutions.resize(16);
+
+        EXPECT_EQUAL(miopenFindSolutions(
+                         &handle, problem, nullptr, solutions.data(), &found, solutions.size()),
+                     miopenStatusSuccess);
+        EXPECT_TRUE(found > 0);
+
+        solutions.resize(found);
+
+        std::cerr << "Finished testing miopenFindSolutions." << std::endl;
+        return solutions;
+    }
+
+    void TestSolutionAttributes(const std::vector<miopenSolution_t>& solutions)
+    {
+        std::cerr << "Testing miopenGetSolution<Attribute>..." << std::endl;
+
+        for(const auto& solution : solutions)
+        {
+            float time;
+            std::size_t workspace_size;
+            uint64_t solver_id;
+
+            EXPECT_EQUAL(miopenGetSolutionTime(solution, &time), miopenStatusSuccess);
+            EXPECT_EQUAL(miopenGetSolutionWorkspaceSize(solution, &workspace_size),
+                         miopenStatusSuccess);
+            EXPECT_EQUAL(miopenGetSolutionSolverId(solution, &solver_id), miopenStatusSuccess);
+        }
+
+        std::cerr << "Finished testing miopenGetSolution<Attribute>." << std::endl;
+    }
+
+    void TestRunSolutionsForward(Handle& handle, const std::vector<miopenSolution_t>& solutions)
+    {
+        std::cerr << "Testing solution functions..." << std::endl;
+
+        miopenTensorDescriptor_t x_desc = &xTensor.desc, y_desc = &yTensor.desc;
+
+        const unsigned int numTensors = 2;
+
+        for(const auto& solution : solutions)
+        {
+            auto arguments = std::make_unique<miopenTensorArgument_t[]>(numTensors);
+
+            auto in_gpu  = handle.Write(xTensor.data);
+            auto out_gpu = handle.Write(yTensor.data);
+
+            miopenTensorArgumentId_t names[numTensors]       = {miopenTensorSoftmaxX,
+                                                          miopenTensorSoftmaxY};
+            void* buffers[numTensors]                        = {in_gpu.get(), out_gpu.get()};
+            miopenTensorDescriptor_t descriptors[numTensors] = {x_desc, y_desc};
+
+            for(auto i = 0; i < numTensors; ++i)
+            {
+                arguments[i].id         = names[i];
+                arguments[i].descriptor = &descriptors[i];
+                arguments[i].buffer     = buffers[i];
+            }
+
+            std::cerr << "Run a solution." << std::endl;
+            EXPECT_EQUAL(
+                miopenRunSolution(&handle, solution, numTensors, arguments.get(), nullptr, 0),
+                miopenStatusSuccess);
+
+            float alpha = softmax_descriptor.GetAlpha();
+            float beta  = softmax_descriptor.GetBeta();
+
+            // tensor<float> yTensorDup = yTensor;
+            tensor<float> yTensorRef = tensor<float>{test_n, test_c, test_h, test_w};
+
+            auto out_gpu_ref = handle.Write(yTensorRef.data);
+
+            // Run softmax in a usual way (which is tested) and compare results
+            EXPECT_EQUAL(miopenSoftmaxForward_V2(&handle,
+                                                 &alpha,
+                                                 x_desc,
+                                                 in_gpu.get(),
+                                                 &beta,
+                                                 &yTensorRef.desc,
+                                                 out_gpu_ref.get(),
+                                                 softmax_descriptor.GetAlgorithm(),
+                                                 softmax_descriptor.GetMode()),
+                         miopenStatusSuccess);
+
+            yTensor.data    = handle.Read<float>(out_gpu, yTensor.data.size());
+            yTensorRef.data = handle.Read<float>(out_gpu_ref, yTensorRef.data.size());
+
+            double error           = miopen::rms_range(yTensorRef.data, yTensor.data);
+            const double tolerance = 1e-3;
+
+            EXPECT_TRUE(std::isfinite(error) && error <= tolerance)
+                << "Outputs do not match each other. Error:" << error;
+        }
+
+        std::cerr << "Finished testing solution functions." << std::endl;
+    }
+
+    void TestRunSolutionsBackward(Handle& handle, const std::vector<miopenSolution_t>& solutions)
+    {
+        std::cerr << "Testing solution functions..." << std::endl;
+
+        miopenTensorDescriptor_t y_desc  = &yTensor.desc;
+        miopenTensorDescriptor_t dy_desc = &dyTensor.desc;
+        miopenTensorDescriptor_t dx_desc = &dxTensor.desc;
+
+        const unsigned int numTensors = 3;
+
+        for(const auto& solution : solutions)
+        {
+            auto arguments = std::make_unique<miopenTensorArgument_t[]>(numTensors);
+
+            auto in1_gpu = handle.Write(yTensor.data);
+            auto in2_gpu = handle.Write(dyTensor.data);
+            auto out_gpu = handle.Write(dxTensor.data);
+
+            miopenTensorArgumentId_t names[numTensors] = {
+                miopenTensorSoftmaxY, miopenTensorSoftmaxDY, miopenTensorSoftmaxDX};
+            void* buffers[numTensors] = {in1_gpu.get(), in2_gpu.get(), out_gpu.get()};
+            miopenTensorDescriptor_t descriptors[numTensors] = {y_desc, dy_desc, dx_desc};
+
+            for(auto i = 0; i < numTensors; ++i)
+            {
+                arguments[i].id         = names[i];
+                arguments[i].descriptor = &descriptors[i];
+                arguments[i].buffer     = buffers[i];
+            }
+
+            std::cerr << "Run a solution." << std::endl;
+            EXPECT_EQUAL(
+                miopenRunSolution(&handle, solution, numTensors, arguments.get(), nullptr, 0),
+                miopenStatusSuccess);
+
+            float alpha = softmax_descriptor.GetAlpha();
+            float beta  = softmax_descriptor.GetBeta();
+
+            // tensor<float> yTensorDup = yTensor;
+            tensor<float> dxTensorRef = tensor<float>{test_n, test_c, test_h, test_w};
+
+            // this is dx
+            auto out_gpu_ref = handle.Write(dxTensorRef.data);
+
+            // Run softmax in a usual way (which is tested) and compare results
+            EXPECT_EQUAL(miopenSoftmaxBackward_V2(&handle,
+                                                  &alpha,
+                                                  y_desc,
+                                                  in1_gpu.get(),
+                                                  dy_desc,
+                                                  in2_gpu.get(),
+                                                  &beta,
+                                                  &dxTensorRef.desc,
+                                                  out_gpu_ref.get(),
+                                                  softmax_descriptor.GetAlgorithm(),
+                                                  softmax_descriptor.GetMode()),
+                         miopenStatusSuccess);
+
+            yTensor.data     = handle.Read<float>(out_gpu, yTensor.data.size());
+            dxTensorRef.data = handle.Read<float>(out_gpu_ref, dxTensorRef.data.size());
+
+            double error           = miopen::rms_range(dxTensorRef.data, yTensor.data);
+            const double tolerance = 1e-3;
+
+            EXPECT_TRUE(std::isfinite(error) && error <= tolerance)
+                << "Outputs do not match each other. Error:" << error;
+        }
+
+        std::cerr << "Finished testing solution functions." << std::endl;
+    }
+
+    void Finalize() { EXPECT_EQUAL(miopenDestroyProblem(problem), miopenStatusSuccess); }
+
+private:
+    void Initialize()
+    {
+        softmax_descriptor.SetParams(
+            1.0f, 0.0f, MIOPEN_SOFTMAX_ACCURATE, MIOPEN_SOFTMAX_MODE_CHANNEL);
+
+        if(isForward)
+        {
+            xTensor =
+                tensor<float>{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17});
+            yTensor = tensor<float>{test_n, test_c, test_h, test_w};
+
+            EXPECT_EQUAL(miopenCreateSoftmaxProblem(
+                             &problem, &softmax_descriptor, miopenProblemDirectionForward),
+                         miopenStatusSuccess);
+        }
+        else
+        {
+            yTensor =
+                tensor<float>{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17});
+            dyTensor =
+                tensor<float>{test_n, test_c, test_h, test_w}.generate(tensor_elem_gen_integer{17});
+            dxTensor = tensor<float>{test_n, test_c, test_h, test_w};
+
+            EXPECT_EQUAL(miopenCreateSoftmaxProblem(
+                             &problem, &softmax_descriptor, miopenProblemDirectionBackward),
+                         miopenStatusSuccess);
+        }
+
+        AddTensorDescriptors();
+    }
+
+private:
+    tensor<float> xTensor;
+    tensor<float> yTensor;
+
+    tensor<float> dxTensor;
+    tensor<float> dyTensor;
+
+    SoftmaxDescriptor softmax_descriptor;
+    miopenProblem_t problem;
+
+    bool isForward;
+
+    const unsigned int test_n = 100;
+    const unsigned int test_c = 3;
+    const unsigned int test_h = 32;
+    const unsigned int test_w = 32;
+};
+
+TEST(TestSoftmaxFind20, softmaxForward)
+{
+    Handle& handle = get_handle();
+
+    SoftmaxFind20Test test(true);
+
+    std::vector<miopenSolution_t> solutions = test.TestFindSolutions(handle);
+    test.TestSolutionAttributes(solutions);
+
+    test.TestRunSolutionsForward(handle, solutions);
+    test.Finalize();
+}
+
+TEST(TestSoftmaxFind20, softmaxBackward)
+{
+    Handle& handle = get_handle();
+
+    SoftmaxFind20Test test(false);
+
+    std::vector<miopenSolution_t> solutions = test.TestFindSolutions(handle);
+    test.TestSolutionAttributes(solutions);
+
+    test.TestRunSolutionsBackward(handle, solutions);
+    test.Finalize();
+}