Skip to content

Commit

Permalink
AI Based Parameter Prediction Model for conv_hip_igemm_group_fwd_xdlo…
Browse files Browse the repository at this point in the history
…ps Solver (#2523)
  • Loading branch information
Dmantri98 authored Dec 8, 2023
1 parent 3c0dfcb commit 4d5a184
Show file tree
Hide file tree
Showing 10 changed files with 342 additions and 171 deletions.
30 changes: 17 additions & 13 deletions src/conv/heuristics/ai_heuristics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,8 @@ Metadata::Metadata(const std::string& arch, const std::string& solver)
const nlohmann::json metadata =
common::LoadJSON(GetSystemDbPath() + "/" + arch + "_" + solver + "_metadata.ktn.model");
num_tuning_params = metadata["num_tuning_params"].get<std::size_t>();
tuning_decodings = metadata["decodings"]["tunings"].get<std::unordered_map<std::string, int>>();
tuning_decodings =
metadata["decodings"]["tunings"].get<std::unordered_map<std::string, std::string>>();
}

class Model
Expand All @@ -450,9 +451,11 @@ class Model
{
}
virtual ~Model() = default;
fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim) const
fdeep::tensors Encode(const std::vector<float>& features, std::size_t dim, bool transform) const
{
fdeep::tensor input_tensor = fdeep::tensor(fdeep::tensor_shape(dim, dim), features);
const auto tensor_shape_depth = transform ? dim : 1;
fdeep::tensor input_tensor =
fdeep::tensor(fdeep::tensor_shape(dim, tensor_shape_depth), features);
return encoder.predict({input_tensor});
}
fdeep::tensors Decode(const float prev_token, const fdeep::tensors& context) const
Expand Down Expand Up @@ -488,10 +491,6 @@ class Model

std::shared_ptr<Model> GetModel(const std::string& arch, const std::string& solver)
{
static const std::string prevArch{arch};

if(prevArch != arch)
MIOPEN_THROW("Cannot use AI tuning models for multiple gpu architectures");
static std::map<std::string, std::shared_ptr<Model>> models;
auto it = models.find(solver);
if(it == models.end())
Expand All @@ -509,11 +508,16 @@ std::shared_ptr<Model> GetModel(const std::string& arch, const std::string& solv
bool ModelSetParams(const std::string& arch,
const std::string& solver,
const std::vector<float>& features,
std::function<bool(int, int)> validator)
bool transform_features,
std::function<bool(std::size_t, std::string)> validator)
{
auto model = GetModel(arch, solver);
int dim = std::sqrt(features.size());
fdeep::tensors context = model->Encode(features, dim);
auto model = GetModel(arch, solver);
int dim = 0;
if(transform_features)
dim = std::sqrt(features.size());
else
dim = features.size();
fdeep::tensors context = model->Encode(features, dim, transform_features);
float decoder_input = 0.0;
for(std::size_t i = 0; i < model->metadata.num_tuning_params; ++i)
{
Expand All @@ -529,9 +533,9 @@ bool ModelSetParams(const std::string& arch,
{
int token = pq.top().second;
// convert index to token value
int value = model->metadata.tuning_decodings[std::to_string(token)];
std::string value = model->metadata.tuning_decodings[std::to_string(token)];
pq.pop();
if(value < 0)
if(value == "-1")
return false;
if(validator(i, value))
{
Expand Down
5 changes: 3 additions & 2 deletions src/include/miopen/conv/heuristics/ai_heuristics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,15 @@ namespace tuning {
struct Metadata
{
std::size_t num_tuning_params;
std::unordered_map<std::string, int> tuning_decodings;
std::unordered_map<std::string, std::string> tuning_decodings;
Metadata(const std::string& arch, const std::string& solver);
};

bool ModelSetParams(const std::string& arch,
const std::string& solver,
const std::vector<float>& features,
std::function<bool(int, int)> validator);
bool transform_features,
std::function<bool(std::size_t, std::string)> validator);
} // namespace tuning
#endif // MIOPEN_ENABLE_AI_KERNEL_TUNING
} // namespace ai
Expand Down
24 changes: 17 additions & 7 deletions src/include/miopen/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,12 +370,8 @@ struct PerformanceConfigConvAsm1x1U : PerfConfigBase<PerformanceConfigConvAsm1x1

void StaticHeuristic(const miopen::conv::ProblemDescription& problem);
void HeuristicInit(const ExecutionContext&, const miopen::conv::ProblemDescription&);
#if MIOPEN_ENABLE_AI_KERNEL_TUNING
void RunParmeterPredictionModel(const ExecutionContext&,
const miopen::conv::ProblemDescription&,
bool& valid);
bool ModelApplyToken(int index, int value, const miopen::conv::ProblemDescription&);
#endif
bool IsModelApplicable(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem) const;
bool IsValidValue() const { return IsValidValueImpl(8); }
bool SetNextValue(const miopen::conv::ProblemDescription&);
bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
Expand All @@ -399,6 +395,9 @@ struct PerformanceConfigConvAsm1x1U : PerfConfigBase<PerformanceConfigConvAsm1x1
{
return IsValidValueImpl(sequence_length);
}
bool RunParameterPredictionModel(const ExecutionContext&,
const miopen::conv::ProblemDescription&);
bool ModelApplyToken(int index, std::string value, const miopen::conv::ProblemDescription&);
#endif
bool IsValidImpl(const miopen::conv::ProblemDescription& problem, int sequence_length) const;
bool IsValidValueImpl(int sequence_length) const;
Expand Down Expand Up @@ -4492,7 +4491,7 @@ struct PerformanceConfigHipImplicitGemmGroupFwdXdlops
: PerformanceConfigHipImplicitGemmGroupFwdXdlops(0, "")
{
}
void HeuristicInit(const miopen::conv::ProblemDescription&);
void HeuristicInit(const ExecutionContext&, const miopen::conv::ProblemDescription&);
bool SetNextValue(const miopen::conv::ProblemDescription&);
bool IsValidValue() const;
bool IsValid(const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const
Expand All @@ -4501,8 +4500,19 @@ struct PerformanceConfigHipImplicitGemmGroupFwdXdlops
}
bool IsValid(const miopen::conv::ProblemDescription&) const;
bool operator==(const PerformanceConfigHipImplicitGemmGroupFwdXdlops& other) const;
bool IsModelApplicable(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem) const;

private:
#if MIOPEN_ENABLE_AI_KERNEL_TUNING
std::vector<int> heuristic_indexes;
std::vector<std::vector<std::string>> heuristic_kernels;
template <typename DataType>
bool RunParameterPredictionModel(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem);
void InitHeuristicKernelIDs();
bool ModelApplyToken(int idx, std::string value);
#endif
template <typename DataType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType>
Expand Down
76 changes: 38 additions & 38 deletions src/kernels/gfx908_ConvAsm1x1U_metadata.ktn.model
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,44 @@
"num_tuning_params": 8,
"decodings": {
"tunings": {
"0": -1,
"1": 4,
"2": 2,
"3": 1,
"4": 3,
"5": 16,
"6": 8,
"7": 1,
"8": 4,
"9": 32,
"10": 4,
"11": 1,
"12": 2,
"13": 5,
"14": 7,
"15": 3,
"16": 6,
"17": 8,
"18": 64,
"19": 16,
"20": 32,
"21": 4,
"22": 1,
"23": 1,
"24": 3,
"25": 2,
"26": 4,
"27": 2,
"28": 4,
"29": 1,
"30": 2,
"31": 1,
"32": 4,
"33": 2,
"34": 4,
"35": 8,
"36": 1,
"37": -1
"0": "-1",
"1": "4",
"2": "2",
"3": "1",
"4": "3",
"5": "16",
"6": "8",
"7": "1",
"8": "4",
"9": "32",
"10": "4",
"11": "1",
"12": "2",
"13": "5",
"14": "7",
"15": "3",
"16": "6",
"17": "8",
"18": "64",
"19": "16",
"20": "32",
"21": "4",
"22": "1",
"23": "1",
"24": "3",
"25": "2",
"26": "4",
"27": "2",
"28": "4",
"29": "1",
"30": "2",
"31": "1",
"32": "4",
"33": "2",
"34": "4",
"35": "8",
"36": "1",
"37": "-1"
}
}
}

Large diffs are not rendered by default.

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions src/kernels/gfx90a_ConvHipIgemmGroupFwdXdlops_metadata.ktn.model
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"num_tuning_params": 9,
"decodings": {
"tunings": {
"0": "-1",
"1": "64",
"2": "256",
"3": "128",
"4": "64",
"5": "128",
"6": "32",
"7": "256",
"8": "32",
"9": "128",
"10": "64",
"11": "256",
"12": "32",
"13": "16",
"14": "Default",
"15": "OddC",
"16": "2",
"17": "1",
"18": "4",
"19": "1",
"20": "2",
"21": "4",
"22": "8",
"23": "1",
"24": "4",
"25": "8",
"26": "1",
"27": "4",
"28": "-1"
}
}
}
65 changes: 31 additions & 34 deletions src/solver/conv_asm_1x1u.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include <miopen/logger.hpp>
#include <miopen/solver.hpp>
#include <miopen/conv/heuristics/ai_heuristics.hpp>
#include <nlohmann/json_fwd.hpp>

MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS)
MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED)
Expand Down Expand Up @@ -367,39 +366,29 @@ bool PerformanceConfigConvAsm1x1U::IsValidImpl(const ProblemDescription& problem
}
return true;
}
#if MIOPEN_ENABLE_AI_KERNEL_TUNING

#if MIOPEN_ENABLE_AI_KERNEL_TUNING
bool PerformanceConfigConvAsm1x1U::ModelApplyToken(int index,
int value,
std::string value,
const ProblemDescription& problem)
{
int val = stoi(value);
switch(index)
{
case 0: read_size = value; break;
case 1: k_mult = value; break;
case 2: chunks_per_wave = value; break;
case 3: chunk_size = value; break;
case 4: n_mult = value; break;
case 5: c_mult = value; break;
case 6: waves_c_in_group = value; break;
case 7: waves_k_in_group = value; break;
case 0: read_size = val; break;
case 1: k_mult = val; break;
case 2: chunks_per_wave = val; break;
case 3: chunk_size = val; break;
case 4: n_mult = val; break;
case 5: c_mult = val; break;
case 6: waves_c_in_group = val; break;
case 7: waves_k_in_group = val; break;
default: return false;
}
// this function may leave PerformanceConfigConvAsm1x1U in a partially valid or invalid state
return this->IsPartiallyValid(problem, index + 1);
}

static bool IsModelApplicable(const ExecutionContext& ctx, const ProblemDescription& problem)
{
if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR)))
return false;
if(ctx.GetStream().GetDeviceName() != "gfx908")
return false;
if(problem.GetKernelStrideH() != 1)
return false;
return true;
}

static std::vector<float> TransformFeatures(const ProblemDescription& problem, std::size_t n)
{
assert(n == 8); // n = 6 (numerical conv params) * 1 + 1 (nominal conv params) * 2(amount of
Expand All @@ -418,21 +407,21 @@ static std::vector<float> TransformFeatures(const ProblemDescription& problem, s
return features;
}

void PerformanceConfigConvAsm1x1U::RunParmeterPredictionModel(const ExecutionContext& ctx,
const ProblemDescription& problem,
bool& valid)
bool PerformanceConfigConvAsm1x1U::RunParameterPredictionModel(const ExecutionContext& ctx,
const ProblemDescription& problem)
{
static const std::size_t n = 8;
static const std::string& arch = ctx.GetStream().GetDeviceName();
static const std::string solver = "ConvAsm1x1U";
std::vector<float> features = TransformFeatures(problem, n);
if(ai::tuning::ModelSetParams(arch, solver, features, [&](int idx, int value) {
if(ai::tuning::ModelSetParams(arch, solver, features, true, [&](int idx, std::string value) {
return this->ModelApplyToken(idx, value, problem);
}))
{
MIOPEN_LOG_I("Params set by AI: " << ToString());
valid = true;
return true;
}
return false;
}
#endif

Expand Down Expand Up @@ -482,22 +471,30 @@ void PerformanceConfigConvAsm1x1U::StaticHeuristic(const ProblemDescription& pro
}
}

void PerformanceConfigConvAsm1x1U::HeuristicInit(const ExecutionContext& ctx,
bool PerformanceConfigConvAsm1x1U::IsModelApplicable(const ExecutionContext& ctx,
const ProblemDescription& problem) const
{
if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR)))
return false;
if(ctx.GetStream().GetDeviceName() != "gfx908")
return false;
if(problem.GetKernelStrideH() != 1)
return false;
return true;
}

void PerformanceConfigConvAsm1x1U::HeuristicInit([[maybe_unused]] const ExecutionContext& ctx,
const ProblemDescription& problem)
{
if(problem.GetInDataType() == miopenDouble)
MIOPEN_THROW("Double data type is not supported by ConvAsm1x1U");

#if MIOPEN_ENABLE_AI_KERNEL_TUNING
if(IsModelApplicable(ctx, problem))
{
bool valid = false;
RunParmeterPredictionModel(ctx, problem, valid);
if(valid)

if(RunParameterPredictionModel(ctx, problem))
return;
}
#else
std::ignore = ctx;
#endif
StaticHeuristic(problem);
MIOPEN_LOG_I(ToString());
Expand Down
Loading

0 comments on commit 4d5a184

Please sign in to comment.