Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Softmax for find20 #2776

Merged
merged 38 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b985479
temporary commit
Feb 5, 2024
2611905
Merge branch 'develop' into vgolovko/softmax_for_find20
Feb 5, 2024
07295fd
initial commit
Feb 8, 2024
4ffa3ca
temporary commit. Some functionality added
Feb 14, 2024
343db12
Merge branch 'develop' into vgolovko/softmax_for_find20
Feb 14, 2024
08fe02e
compilation fixes
Feb 19, 2024
a500ffa
A basic test has been added
Feb 26, 2024
60eb817
softmaxForward test fixes
Feb 28, 2024
22d0557
SoftmaxBackward test + format clang run
Feb 29, 2024
b5c5f39
tidy clang fixes
Feb 29, 2024
4b348c2
some fixes
Feb 29, 2024
3d15064
comment removed
Feb 29, 2024
e278107
Merge branch 'develop' into vgolovko/softmax_for_find20
Feb 29, 2024
800d0bd
fix clang tidy
Mar 1, 2024
f96c834
Merge branch 'vgolovko/softmax_for_find20' of https://github.com/ROCm…
Mar 1, 2024
9b05596
check changed
Mar 1, 2024
9fe01b0
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 4, 2024
e0d5e57
Test's ctor introduced instead of direct Initialize call
Mar 4, 2024
8a566a5
tidy-checks fix
Mar 5, 2024
96bb3ee
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 5, 2024
d5e5fa1
some comments removed
Mar 6, 2024
2169f9e
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 7, 2024
e8b2edc
minor code formatting
Mar 7, 2024
d407f53
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 11, 2024
2694e4b
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 12, 2024
b3db167
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 13, 2024
a9984e2
minor change
Mar 14, 2024
33b3f0a
Read the Docs fail check
Mar 14, 2024
3c7fb96
format run on sources
Mar 14, 2024
88dd97c
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 15, 2024
3754f93
changes after code review
Mar 15, 2024
9384e0f
clang format run
Mar 15, 2024
ad2d52e
alpha and beta in network config restored for now
Mar 15, 2024
18cb331
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 20, 2024
5149aa0
definitions added
Mar 20, 2024
7990828
Put softmax Find 2.0 createproblem denition under beta api
Mar 20, 2024
dba6f95
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 21, 2024
43abd49
Merge branch 'develop' into vgolovko/softmax_for_find20
Mar 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions include/miopen/miopen.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,11 @@ MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor);
*/
MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor);

/*! @ingroup softmax
* @brief Creates the miopenSoftmaxDescriptor_t type
*/
MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor);

/*! @ingroup tensor
* @enum miopenDataType_t
* MIOpen floating point datatypes. Both 32-bit and 16-bit floats are supported in MIOpen.
Expand Down Expand Up @@ -5314,6 +5319,11 @@ typedef enum
miopenTensorBiasY = 9,
miopenTensorBias = 10,
#endif
miopenTensorSoftmaxX = 11,
miopenTensorSoftmaxY = 12,
miopenTensorSoftmaxDX = 13,
miopenTensorSoftmaxDY = 14,
Vsevolod1983 marked this conversation as resolved.
Show resolved Hide resolved

} miopenTensorArgumentId_t;

/*! @enum miopenTensorArgumentId_t
Expand All @@ -5336,6 +5346,18 @@ MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem,
miopenConvolutionDescriptor_t operatorDesc,
miopenProblemDirection_t direction);

/*! @brief Initializes a problem object describing a softmax operation.
*
* @param problem Pointer to the problem to initialize
* @param operatorDesc Descriptor of the operator to be used
* @param direction Direction of the operation
* @return miopenStatus_t
*/

MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
miopenSoftmaxDescriptor_t operatorDesc,
miopenProblemDirection_t direction);

/*! @brief Destroys a problem object.
*
* @param problem Problem to destroy
Expand Down
13 changes: 13 additions & 0 deletions src/api/find2_0_commons.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem, miopenProblemDi
});
}

miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
miopenSoftmaxDescriptor_t operatorDesc,
miopenProblemDirection_t direction)
{
MIOPEN_LOG_FUNCTION(problem, direction);
return MakeProblem(problem, operatorDesc, direction);
}

miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2)
{
MIOPEN_LOG_FUNCTION(problem1, problem2);
Expand Down Expand Up @@ -263,6 +271,11 @@ inline std::ostream& operator<<(std::ostream& stream, const miopenTensorArgument
case miopenTensorBias: stream << "Bias"; break;
case miopenTensorBiasX: stream << "BiasX"; break;
case miopenTensorBiasY: stream << "BiasY"; break;
case miopenTensorSoftmaxX: stream << "SoftmaxX"; break;
case miopenTensorSoftmaxY: stream << "SoftmaxY"; break;
case miopenTensorSoftmaxDX: stream << "SoftmaxDX"; break;
case miopenTensorSoftmaxDY: stream << "SoftmaxDY"; break;

case miopenTensorArgumentIdInvalid: stream << "Invalid"; break;
}

Expand Down
14 changes: 13 additions & 1 deletion src/include/miopen/problem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <miopen/activ.hpp>
#include <miopen/allocator.hpp>
#include <miopen/convolution.hpp>
#include <miopen/softmax.hpp>
#include <miopen/object.hpp>
#include <miopen/solver_id.hpp>
#include <miopen/tensor.hpp>
Expand Down Expand Up @@ -59,13 +60,17 @@ namespace conv {
struct ProblemDescription;
} // namespace conv

namespace softmax {
struct ProblemDescription;
} // namespace softmax

struct BiasDescriptor
{
};

// The order of types is important for deserialization and should be preserved between releases.
using OperatorDescriptor =
boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor>;
boost::variant<ConvolutionDescriptor, ActivationDescriptor, BiasDescriptor, SoftmaxDescriptor>;

struct Problem
{
Expand Down Expand Up @@ -99,6 +104,7 @@ struct Problem

conv::ProblemDescription AsConvolution() const;
activ::ProblemDescription AsActivation() const;
softmax::ProblemDescription AsSoftmax() const;

[[nodiscard]] miopenTensorArgumentId_t GetInputId() const;
[[nodiscard]] miopenTensorArgumentId_t GetOutputId() const;
Expand Down Expand Up @@ -155,6 +161,12 @@ struct Problem
const Buffers& buffers,
const ConvolutionDescriptor& conv_desc) const;

std::vector<Solution> FindSolutionsImpl(Handle& handle,
const FindOptions& options,
std::size_t max_solutions,
const Buffers& buffers,
const SoftmaxDescriptor& softmax_desc) const;

void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const;
void LogDriverCommand(const ActivationDescriptor& descriptor) const;
};
Expand Down
38 changes: 38 additions & 0 deletions src/include/miopen/softmax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,47 @@

#include <miopen/common.hpp>
#include <miopen/miopen.h>
#include <miopen/object.hpp>

#include <nlohmann/json_fwd.hpp>

namespace miopen {

struct Handle;
struct TensorDescriptor;

struct SoftmaxDescriptor : miopenSoftmaxDescriptor
{
SoftmaxDescriptor() {}

float GetAlpha() const { return alpha; }
float GetBeta() const { return beta; }
miopenSoftmaxAlgorithm_t GetAlgorithm() const { return algorithm; }
miopenSoftmaxMode_t GetMode() const { return mode; }

void SetParams(float alpha_,
float beta_,
miopenSoftmaxAlgorithm_t algorithm_,
miopenSoftmaxMode_t mode_)
{
alpha = alpha_;
beta = beta_;
algorithm = algorithm_;
mode = mode_;
}

friend std::ostream& operator<<(std::ostream& stream, const SoftmaxDescriptor& x);

friend void to_json(nlohmann::json& json, const SoftmaxDescriptor& descriptor);
friend void from_json(const nlohmann::json& json, SoftmaxDescriptor& descriptor);

private:
float alpha;
float beta;
Comment on lines +66 to +67
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather store them as bool. This way later it would be harder to accidentally use them instead of actual values. On subsequent runs float values here would be incorrect anyway.

Suggested change
float alpha;
float beta;
bool has_alpha;
bool has_beta;

miopenSoftmaxAlgorithm_t algorithm;
miopenSoftmaxMode_t mode;
};

miopenStatus_t SoftmaxForward(Handle& handle,
const void* alpha,
const void* beta,
Expand Down Expand Up @@ -62,4 +97,7 @@ miopenStatus_t SoftmaxBackward(Handle& handle,
int dx_offset = 0);

} // namespace miopen

MIOPEN_DEFINE_OBJECT(miopenSoftmaxDescriptor, miopen::SoftmaxDescriptor);

#endif // _MIOPEN_SOFTMAX_HPP_
6 changes: 3 additions & 3 deletions src/include/miopen/softmax/invoke_params.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ struct InvokeParams : public miopen::InvokeParams
Data_t dx_,
miopenSoftmaxAlgorithm_t algorithm_,
miopenSoftmaxMode_t mode_,
int y_offset_,
int dy_offset_,
int dx_offset_)
int y_offset_ = 0,
int dy_offset_ = 0,
int dx_offset_ = 0)
: algorithm(algorithm_),
mode(mode_),

Expand Down
6 changes: 6 additions & 0 deletions src/include/miopen/solution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ struct Solution : miopenSolution
std::size_t workspace_size,
const ConvolutionDescriptor& conv_desc);

void RunImpl(Handle& handle,
const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
Data_t /*workspace*/,
std::size_t /*workspace_size*/,
const SoftmaxDescriptor& softmax_desc);

void RunImpl(Handle& handle,
const std::unordered_map<miopenTensorArgumentId_t, RunInput>& inputs,
Data_t workspace,
Expand Down
3 changes: 2 additions & 1 deletion src/include/miopen/solver_id.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ enum class Primitive
Pooling,
Normalization,
Reduce,
Cat
Cat,
Softmax
};

struct MIOPEN_EXPORT Id
Expand Down
110 changes: 107 additions & 3 deletions src/problem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <miopen/conv/problem_description.hpp>
#include <miopen/convolution.hpp>
#include <miopen/conv_algo_name.hpp>
#include <miopen/softmax/problem_description.hpp>
#include <miopen/softmax/solvers.hpp>
#include <miopen/datatype.hpp>
#include <miopen/execution_context.hpp>
#include <miopen/fusion_plan.hpp>
Expand Down Expand Up @@ -175,6 +177,9 @@ Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t m
[&](const ConvolutionDescriptor& op_desc) {
return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
},
[&](const SoftmaxDescriptor& op_desc) {
return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc);
},
[&](const ActivationDescriptor& /*op_desc*/) -> std::vector<Solution> {
MIOPEN_THROW(miopenStatusNotImplemented);
},
Expand Down Expand Up @@ -277,6 +282,33 @@ activ::ProblemDescription Problem::AsActivation() const
}
}

softmax::ProblemDescription Problem::AsSoftmax() const
{
const auto& softmax_desc = boost::get<SoftmaxDescriptor>(operator_descriptor);

float alpha = softmax_desc.GetAlpha();
float beta = softmax_desc.GetBeta();

softmax::ProblemDescription problem_description =
(GetDirection() == miopenProblemDirectionForward)
? softmax::ProblemDescription(
&alpha,
&beta,
Comment on lines +295 to +296
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Performance] @Vsevolod1983 See #2671 (review)

Copy link
Contributor Author

@Vsevolod1983 Vsevolod1983 Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed alpha and beta from network config in softmax primitive. Is it enough for the this PR ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Vsevolod1983 I would begin with removing alpha/beta from PD (and then fix build errors by forwarding alpha/beta via InvokeParams). However I think we need to discuss #2671 (review) first.

/cc @CAHEK7 @DrizztDoUrden

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we do this kind of refactoring as a separate ticket / PR ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need. Actually a/b affects kernel compilation, like it does for convolutions. There are some optimizations with default a/b values, so we need at least a "default a/b" flag in the problem description and in the network config.

Copy link
Contributor Author

@Vsevolod1983 Vsevolod1983 Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I restored alpha and beta in network config in this PR until we decide what exactly we want to do next.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Informative] @Vsevolod1983 @CAHEK7 it's interesting that "default alpha" optimization won't give us much, but "default beta" opt removes 1 global memory read, which may be substantial (./src/kernels/MIOpenSoftmax.cl)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@atamazov right now default alpha gives much since it enables attention softmax solver, which is faster and does not support a/b in terms of normal softmax operation.

Both values are used in the isApplicable method and can't be just removed.

GetTensorDescriptorChecked(miopenTensorSoftmaxX, "miopenTensorSoftmaxX"),
GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
softmax_desc.GetAlgorithm(),
softmax_desc.GetMode())
: softmax::ProblemDescription(
&alpha,
&beta,
GetTensorDescriptorChecked(miopenTensorSoftmaxY, "miopenTensorSoftmaxY"),
GetTensorDescriptorChecked(miopenTensorSoftmaxDY, "miopenTensorSoftmaxDY"),
GetTensorDescriptorChecked(miopenTensorSoftmaxDX, "miopenTensorSoftmaxDX"),
softmax_desc.GetAlgorithm(),
softmax_desc.GetMode());
return problem_description;
}

std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
const FindOptions& options,
std::size_t max_solutions,
Expand Down Expand Up @@ -431,6 +463,60 @@ std::vector<Solution> Problem::FindSolutionsImpl(Handle& handle,
return ret;
}

std::vector<Solution>
Problem::FindSolutionsImpl(Handle& handle,
[[maybe_unused]] const FindOptions& options,
std::size_t max_solutions,
[[maybe_unused]] const Buffers& buffers,
[[maybe_unused]] const SoftmaxDescriptor& softmax_desc) const
{
auto ret = std::vector<Solution>();

auto ctx = ExecutionContext{&handle};

const softmax::ProblemDescription problem_description = AsSoftmax();

const auto algo = AlgorithmName{"Softmax"};

solver::softmax::AttnSoftmax attnSoftmaxSolver;
solver::softmax::Softmax regularSoftmaxSolver;

std::vector<solver::softmax::SoftmaxSolver*> solvers;

solvers.push_back(&attnSoftmaxSolver);
solvers.push_back(&regularSoftmaxSolver);
Copy link
Contributor

@atamazov atamazov Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Can be postponed] I think we can statically initialize solvers.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DrizztDoUrden Can you please suggest @Vsevolod1983 the technique for doing this? Thanks.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most basic primitive for this is

struct SolverContainer

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is postponable in contrast to a/b problem


for(auto solver : solvers)
{
if(!solver->IsApplicable(ctx, problem_description))
{
continue;
Vsevolod1983 marked this conversation as resolved.
Show resolved Hide resolved
}

auto solution = Solution();

/// \todo time measurement will be done later. For now we set less time for attention
/// softmax and slightly bigger for regular
solution.SetTime(solver == &attnSoftmaxSolver ? 0.0f : 1.0f);
Vsevolod1983 marked this conversation as resolved.
Show resolved Hide resolved
solution.SetWorkspaceSize(solver->GetWorkspaceSize(ctx, problem_description));
solution.SetSolver(solver->SolverDbId());
solution.SetProblem({*this});

MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , "
<< solution.GetWorkspaceSize() << ", "
<< solution.GetTime());

ret.emplace_back(std::move(solution));

if(ret.size() >= max_solutions)
{
break;
}
}

return ret;
}

void Problem::ValidateGroupCount(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
const ConvolutionDescriptor& conv)
Expand All @@ -456,7 +542,8 @@ void Problem::LogDriverCommand() const
const auto log_function =
boost::hof::match([&](const ConvolutionDescriptor& op_desc) { LogDriverCommand(op_desc); },
[&](const ActivationDescriptor& op_desc) { LogDriverCommand(op_desc); },
[&](const BiasDescriptor&) {});
[&](const BiasDescriptor&) {},
[&](const SoftmaxDescriptor&) {});

boost::apply_visitor(log_function, operator_descriptor);
}
Expand Down Expand Up @@ -576,6 +663,7 @@ void Problem::CalculateOutput()
[&](const ActivationDescriptor&) {
RegisterTensorDescriptor(GetOutputId(), GetInput());
},
[&](const SoftmaxDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); },
[&](const BiasDescriptor&) { RegisterTensorDescriptor(GetOutputId(), GetInput()); }),
operator_descriptor);
}
Expand All @@ -585,7 +673,8 @@ miopenTensorArgumentId_t Problem::GetInputId() const
return boost::apply_visitor(
boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionX; },
[](const ActivationDescriptor&) { return miopenTensorActivationX; },
[](const BiasDescriptor&) { return miopenTensorBiasX; }),
[](const BiasDescriptor&) { return miopenTensorBiasX; },
[](const SoftmaxDescriptor&) { return miopenTensorSoftmaxX; }),
operator_descriptor);
}

Expand All @@ -594,7 +683,8 @@ miopenTensorArgumentId_t Problem::GetOutputId() const
return boost::apply_visitor(
boost::hof::match([](const ConvolutionDescriptor&) { return miopenTensorConvolutionY; },
[](const ActivationDescriptor&) { return miopenTensorActivationY; },
[](const BiasDescriptor&) { return miopenTensorBiasY; }),
[](const BiasDescriptor&) { return miopenTensorBiasY; },
[](const SoftmaxDescriptor&) { return miopenTensorSoftmaxY; }),
operator_descriptor);
}

Expand Down Expand Up @@ -679,7 +769,14 @@ void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& p
[&](const BiasDescriptor&) {
plan.AddOp(std::make_shared<BiasFusionOpDescriptor>(
problem.GetTensorDescriptorChecked(miopenTensorBias, "miopenTensorBias")));
},
[&](const SoftmaxDescriptor&) {
// Not implemented
assert(false);
MIOPEN_THROW(miopenStatusNotImplemented,
"Softmax is not implemented for FusedProblem");
}),

problem.operator_descriptor);
}

Expand Down Expand Up @@ -741,7 +838,14 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams(
const auto bias_ptr = buffers.at(miopenTensorBias);
operator_args.params.emplace_back(
std::make_unique<miopen::fusion::BiasOpInvokeParam>(bias_ptr));
},
[&](const SoftmaxDescriptor&) {
// Not implemented
assert(false);
MIOPEN_THROW(miopenStatusNotImplemented,
"Softmax is not implemented for FusedProblem");
}),

problem.operator_descriptor);
}

Expand Down
Loading