Skip to content

Commit

Permalink
add support for tensor scale add op
Browse files Browse the repository at this point in the history
  • Loading branch information
amberhassaan committed Dec 13, 2023
1 parent bf88d8a commit 3539871
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 44 deletions.
46 changes: 34 additions & 12 deletions src/fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle,
assert(workspaceSizeInBytes == 0);
std::ignore = workspace;
std::ignore = workspaceSizeInBytes;
/// \todo: add workspace support in fusion

/*
if(alpha1 != nullptr)
{
const auto falpha1 = *(static_cast<const float*>(alpha1));
Expand All @@ -92,12 +95,17 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle,
if(falpha2 != 1.0f)
MIOPEN_THROW(miopenStatusNotImplemented, "alpha2 can only be 1.0");
}
*/

float falpha1 = alpha1 ? *(static_cast<const double*>(alpha1)) : 1.0f;
float falpha2 = alpha2 ? *(static_cast<const double*>(alpha2)) : 1.0f;

// if(z != nullptr || zDesc.GetSize() != 0)
// MIOPEN_THROW(miopenStatusNotImplemented, "The addition of z vector is not yet supported");
FusionPlanDescriptor fusePlanDesc{miopenVerticalFusion, xDesc};
OperatorArgs fusionArgs;
auto convoOp = std::make_shared<ConvForwardOpDescriptor>(conv_desc, wDesc);
auto zOp = std::make_shared<BiasFusionOpDescriptor>(zDesc);
auto convOp = std::make_shared<ConvForwardOpDescriptor>(conv_desc, wDesc);
auto zOp = std::make_shared<TensorScaleAddOpDescriptor>(zDesc);
auto biasOp = std::make_shared<BiasFusionOpDescriptor>(biasDesc);
auto activOp = std::make_shared<ActivFwdFusionOpDescriptor>(activationDesc.GetMode());

Expand All @@ -107,25 +115,24 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle,
"only Activation Mode == miopenActivationRELU is supported");
}

MIOPEN_CHECK(fusePlanDesc.AddOp(convoOp));
MIOPEN_CHECK(fusePlanDesc.AddOp(convOp));
MIOPEN_CHECK(fusePlanDesc.SetConvAlgo(algo));
MIOPEN_CHECK(fusePlanDesc.AddOp(zOp));
MIOPEN_CHECK(fusePlanDesc.AddOp(biasOp));
MIOPEN_CHECK(fusePlanDesc.AddOp(activOp));

MIOPEN_CHECK(fusePlanDesc.Compile(handle));
float alpha = static_cast<float>(1.0);
float beta = static_cast<float>(0);
float alpha = 1.0f;
float beta = 0.0f;
float activ_alpha = activationDesc.GetAlpha();
float activ_beta = activationDesc.GetBeta();
float activ_gamma = activationDesc.GetGamma();

// Set the Args
MIOPEN_CHECK(convoOp->SetArgs(fusionArgs, &alpha, &beta, w));
MIOPEN_CHECK(activOp->SetArgs(fusionArgs, &alpha, &beta, activ_alpha, activ_beta, activ_gamma));
// TODO(Amber): add alpha2 to zOp as a scale factor
MIOPEN_CHECK(zOp->SetArgs(fusionArgs, &alpha, &beta, z));
MIOPEN_CHECK(convOp->SetArgs(fusionArgs, falpha1, beta, w));
MIOPEN_CHECK(zOp->SetArgs(fusionArgs, falpha2, z));
MIOPEN_CHECK(biasOp->SetArgs(fusionArgs, &alpha, &beta, bias));
MIOPEN_CHECK(activOp->SetArgs(fusionArgs, &alpha, &beta, activ_alpha, activ_beta, activ_gamma));
MIOPEN_CHECK(fusePlanDesc.Execute(handle, xDesc, x, yDesc, y, fusionArgs));
return miopenStatusSuccess;
}
Expand Down Expand Up @@ -524,11 +531,11 @@ miopenStatus_t ConvForwardOpDescriptor::GetOutputDesc(TensorDescriptor& output_d
}

miopenStatus_t ConvForwardOpDescriptor::SetArgs(OperatorArgs& args,
const void* /*alpha*/,
const void* /*beta*/,
float alpha,
float beta,
ConstData_t w)
{
auto op_args = std::make_unique<fusion::ConvolutionOpInvokeParam>(w);
auto op_args = std::make_unique<fusion::ConvolutionOpInvokeParam>(alpha, beta, w);
args.SetArg(GetIdx(), std::move(op_args));
return miopenStatusSuccess;
}
Expand Down Expand Up @@ -683,6 +690,21 @@ miopenStatus_t BiasFusionOpDescriptor::SetArgs(OperatorArgs& args,
return miopenStatusSuccess;
}

miopenStatus_t TensorScaleAddOpDescriptor::GetOutputDesc(TensorDescriptor& output_desc) const
{
output_desc = this->tensor_desc;
return miopenStatusSuccess;
}

miopenStatus_t TensorScaleAddOpDescriptor::SetArgs(OperatorArgs& args,
float alpha,
ConstData_t tensor_ptr)
{
auto op_args = std::make_unique<fusion::TensorScaleAddOpInvokeParam>(alpha, tensor_ptr);
args.SetArg(GetIdx(), std::move(op_args));
return miopenStatusSuccess;
}

std::string FusionPlanDescriptor::GetAlgorithmName(const Handle& /*handle*/)
{
if(conv_fwd_algo)
Expand Down
13 changes: 12 additions & 1 deletion src/include/miopen/fusion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ struct BiasFusionOpDescriptor : FusionOpDescriptor
TensorDescriptor base_desc;
};

struct TensorScaleAddOpDescriptor: public FusionOpDescriptor {
TensorScaleAddOpDescriptor(const TensorDescriptor& desc) : tensor_desc(desc) {}
miopenStatus_t GetOutputDesc(TensorDescriptor& output_desc) const override;
miopenStatus_t GetNetworkConfig(std::ostringstream& network_config) override;
miopenStatus_t
SetArgs(OperatorArgs& args, float alpha, ConstData_t tensor_ptr);
miopenFusionOp_t kind() const override { return miopenFusionOpTensorScaleAdd; };
TensorDescriptor tensor_desc;

};

struct ActivFwdFusionOpDescriptor : FusionOpDescriptor
{
ActivFwdFusionOpDescriptor(miopenActivationMode_t mode) : activMode(mode) {}
Expand Down Expand Up @@ -214,7 +225,7 @@ struct ConvForwardOpDescriptor : FusionOpDescriptor
kernel_info_valid(false),
conv_compiler_options(""){};
miopenStatus_t GetOutputDesc(TensorDescriptor& output_desc) const override;
miopenStatus_t SetArgs(OperatorArgs& args, const void* alpha, const void* beta, ConstData_t w);
miopenStatus_t SetArgs(OperatorArgs& args, float alpha, float beta, ConstData_t w);
miopenStatus_t GetNetworkConfig(std::ostringstream& network_config) override;
bool isASMApplicable(Handle& handle);
miopenFusionOp_t kind() const override { return miopenFusionOpConvForward; };
Expand Down
14 changes: 13 additions & 1 deletion src/include/miopen/fusion/fusion_invoke_params.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ struct FusionOpInvokeParamBase

struct ConvolutionOpInvokeParam : FusionOpInvokeParamBase
{
ConvolutionOpInvokeParam(ConstData_t w) : weights(w) {}
ConvolutionOpInvokeParam(float _alpha, float _beta, ConstData_t w) :
alpha(_alpha),
beta(_beta),
weights(w) {}
float alpha = 1.0f; // scales new result of convolution
float beta = 0.0f; // scales old val of convolution output tensor
ConstData_t weights = nullptr;
};

Expand All @@ -50,6 +55,13 @@ struct BiasOpInvokeParam : FusionOpInvokeParamBase
ConstData_t bdata = nullptr;
};

struct TensorScaleAddOpInvokeParam : public FusionOpInvokeParamBase
{
TensorScaleAddOpInvokeParam(float a, ConstData_t tp) : alpha(a), tensor_ptr(tp) {}
float alpha = 1.0f;
ConstData_t tensor_ptr = nullptr;
};

struct ActivationOpInvokeParam : FusionOpInvokeParamBase
{
ActivationOpInvokeParam(double alpha, double beta, double gamma)
Expand Down
6 changes: 3 additions & 3 deletions src/include/miopen/fusion/solvers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,9 @@ struct PerfConfigConvCKIgemmFwdBiasResAddActivFused
bool operator==(const PerfConfigConvCKIgemmFwdBiasResAddActivFused& other) const;

private:
template <typename DataType, typename BiasDataType = DataType>
template <typename DataType, typename AccumDataType = DataType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType, typename BiasDataType = DataType>
template <typename DataType, typename AccumDataType = DataType>
bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
};

Expand Down Expand Up @@ -317,7 +317,7 @@ struct ConvCKIgemmFwdBiasResAddActivFused final
const PerfConfigConvCKIgemmFwdBiasResAddActivFused& config) const override;

private:
template <typename DataType, typename BiasDataType = DataType>
template <typename DataType, typename AccumDataType = DataType>
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};
struct ConvBinWinogradRxSFused final : FusionSolverBase
Expand Down
1 change: 1 addition & 0 deletions src/include/miopen/fusion_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ enum miopenFusionOp_t
miopenFusionOpBatchNormFwdTrain = 4,
miopenFusionOpBatchNormBwdTrain = 5,
miopenFusionOpActivBackward = 6,
miopenFusionOpTensorScaleAdd = 7,
};

enum MDGraph_op_t
Expand Down
6 changes: 6 additions & 0 deletions src/ocl/fusionopbiasbnactivocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ miopenStatus_t BiasFusionOpDescriptor::GetNetworkConfig(std::ostringstream& netw
return miopenStatusSuccess;
}

miopenStatus_t TensorScaleAddOpDescriptor::GetNetworkConfig(std::ostringstream& network_config)
{
network_config << "tensorScaleAdd"; // for bias
return miopenStatusSuccess;
}

miopenStatus_t ActivFwdFusionOpDescriptor::GetNetworkConfig(std::ostringstream& network_config)
{
network_config << "ActivFwd" << std::to_string(activMode);
Expand Down
64 changes: 37 additions & 27 deletions src/solver/conv_ck_igemm_fwd_bias_res_add_activ_fused.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ namespace fusion {
using CK_OutLayout = ck::tensor_layout::convolution::NDHWGK;

// DataType also applies to weights
// BiasDataType also applies to added z & bias tensors
template <typename DataType, typename BiasDataType = DataType>
// AccumDataType also applies to added z & bias tensors
template <typename DataType, typename AccumDataType = DataType>
using DeviceOp = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
ck::tensor_operation::device::DeviceGroupedConvFwdMultipleD<
3,
ck::tensor_layout::convolution::NDHWGC,
ck::tensor_layout::convolution::GKZYXC,
ck::Tuple<CK_OutLayout, CK_OutLayout>,
ck::Tuple<CK_OutLayout, ck::tensor_layout::convolution::G_K>,
CK_OutLayout,
DataType, // in data type
DataType, // wei data type
ck::Tuple<BiasDataType, BiasDataType>, // z & bias tensors data type
ck::Tuple<AccumDataType, AccumDataType>, // z & bias tensors data type
DataType, // out data type
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
Expand Down Expand Up @@ -93,6 +93,8 @@ struct CKArgs
in_lens = {G, N, C, Di, Hi, Wi};
out_lens = {G, N, K, Do, Ho, Wo};
wei_lens = {G, K, C, Z, Y, X};
bias_lens = {G, 1, K, 1, 1, 1};
bias_strides = {K, 0, 1, 0, 0, 0};

// miopen filter_stride to CK filter_stride
auto miopen_in_strides = problem.GetIn().GetStrides();
Expand Down Expand Up @@ -129,7 +131,9 @@ struct CKArgs
ConstData_t wei_buf,
Data_t out_buf,
ConstData_t z_buf,
ConstData_t bias_buf) const
ConstData_t bias_buf,
float alpha1,
float alpha2) const
{
using ScaleAddScaleAddRelu = ck::tensor_operation::element_wise::ScaleAddScaleAddRelu;
return op_ptr->MakeArgumentPointer(in_buf,
Expand All @@ -140,8 +144,8 @@ struct CKArgs
in_strides,
wei_lens,
wei_strides,
{out_lens, out_lens},
{out_strides, out_strides},
{out_lens, bias_lens},
{out_strides, bias_strides},
out_lens,
out_strides,
filter_stride,
Expand All @@ -158,20 +162,24 @@ struct CKArgs
const miopen::fusion::FusionInvokeParams& data_ctx) const
{

const auto& wei_buf =
dynamic_cast<miopen::fusion::ConvolutionOpInvokeParam&>(*data_ctx.op_args.params[0])
.weights;
auto* conv_param =
dynamic_cast<miopen::fusion::ConvolutionOpInvokeParam*>(data_ctx.op_args.params[0]);
assert(conv_param);

const auto& z_buf =
dynamic_cast<miopen::fusion::BiasOpInvokeParam&>(*data_ctx.op_args.params[1]).bdata;
auto* z_param = dynamic_cast<miopen::fusion::TensorScaleAddOpInvokeParam*>(data_ctx.op_args.params[1]);
assert(z_param);

const auto& bias_buf =
dynamic_cast<miopen::fusion::BiasOpInvokeParam&>(*data_ctx.op_args.params[2]).bdata;
auto* bias_param = dynamic_cast<miopen::fusion::BiasOpInvokeParam*>(data_ctx.op_args.params[2]);
assert(bias_param);

// const auto& activ_op =
// dynamic_cast<miopen::fusion::ActivationOpInvokeParam&>(*data_ctx.op_args.params[3]);
/// \todo: Support general activation functions.
/// only relu activation supported and hardcoded for now
[[maybe_unused]] auto* activ_param = dynamic_cast<miopen::fusion::ActivationOpInvokeParam&>(*data_ctx.op_args.params[3]);
assert(activ_param);

return MakeArgPtr(op_ptr, data_ctx.in, wei_buf, data_ctx.out, z_buf, bias_buf);
return MakeArgPtr(op_ptr, data_ctx.in, conv_param->weights, data_ctx.out,
z_param->tensor_ptr, bias_param->bdata,
conv_param->alpha, z_param->alpha);
}

#if 0
Expand Down Expand Up @@ -204,14 +212,14 @@ struct CKArgs
int Y;
int X;
int Z;
float alpha1 = 1.0f;
float alpha2 = 1.0f;
std::array<ck::index_t, 6> in_lens;
std::array<ck::index_t, 6> in_strides;
std::array<ck::index_t, 6> out_lens;
std::array<ck::index_t, 6> out_strides;
std::array<ck::index_t, 6> wei_lens;
std::array<ck::index_t, 6> wei_strides;
std::array<ck::index_t, 6> bias_lens;
std::array<ck::index_t, 6> bias_strides;
std::array<ck::index_t, 3> filter_stride;
std::array<ck::index_t, 3> filter_dilation;
std::array<ck::index_t, 3> lPadding;
Expand All @@ -221,29 +229,29 @@ struct CKArgs
} // namespace

// TODO: deal with separate input/output data types
template <typename DataType, typename BiasDataType>
template <typename DataType, typename AccumDataType>
void PerfConfigConvCKIgemmFwdBiasResAddActivFused::Init(
const miopen::conv::ProblemDescription& problem)
{

valid_kernels = FillValidKernelsIDs<DeviceOp<DataType, BiasDataType>, CKArgs>(problem);
valid_kernels = FillValidKernelsIDs<DeviceOp<DataType, AccumDataType>, CKArgs>(problem);
index = 0;
assert(!valid_kernels.empty());
kernel_id = valid_kernels[0];
}

template <typename DataType, typename BiasDataType>
template <typename DataType, typename AccumDataType>
bool PerfConfigConvCKIgemmFwdBiasResAddActivFused::CheckIsSupportCKArgs(
const miopen::conv::ProblemDescription& problem) const
{
return IsCKArgsSupported<DeviceOp<DataType, BiasDataType>, CKArgs>(problem, kernel_id);
return IsCKArgsSupported<DeviceOp<DataType, AccumDataType>, CKArgs>(problem, kernel_id);
}

template <typename DataType, typename BiasDataType>
template <typename DataType, typename AccumDataType>
bool ConvCKIgemmFwdBiasResAddActivFused::CheckCKApplicability(
const miopen::conv::ProblemDescription& problem) const
{
return IsCKApplicable<DeviceOp<DataType, BiasDataType>, CKArgs>(problem);
return IsCKApplicable<DeviceOp<DataType, AccumDataType>, CKArgs>(problem);
}

#endif
Expand Down Expand Up @@ -379,9 +387,11 @@ bool ConvCKIgemmFwdBiasResAddActivFused::IsApplicable(const FusionContext& ctx,
return false;
if(desc.op_map[0]->kind() != miopenFusionOpConvForward)
return false;
if(desc.op_map[1]->kind() != miopenFusionOpBiasForward)
if(desc.op_map[1]->kind() != miopenFusionOpTensorScaleAdd)
return false;
if(desc.op_map[2]->kind() != miopenFusionOpActivForward)
if(desc.op_map[2]->kind() != miopenFusionOpBiasForward)
return false;
if(desc.op_map[3]->kind() != miopenFusionOpActivForward)
return false;
const auto& activ_op = dynamic_cast<ActivFwdFusionOpDescriptor&>(*desc.op_map[2]);
if(activ_op.activMode != miopenActivationRELU)
Expand Down

0 comments on commit 3539871

Please sign in to comment.