Skip to content

Commit

Permalink
[IE CLDNN] Add config key to enforce fp32 precision for quantized models
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-paramuzov committed Aug 26, 2020
1 parent 936b38d commit aa96de5
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 4 deletions.
7 changes: 5 additions & 2 deletions inference-engine/include/cldnn/cldnn_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,12 @@ DECLARE_CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR);
DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR);

/**
* @brief This key turns usage of int8 optimizations and qunatized models on.
* @brief This key enables FP16 precision for quantized models.
* By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then non-quantized layers
* will be converted back to FP16 after LPT, which might imrpove the performance if a model has a lot of compute operations in
* non-quantized path. This key has no effect if current device doesn't have INT8 optimization capabilities.
*/
DECLARE_CLDNN_CONFIG_KEY(INT8_ENABLED);
DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);

/**
* @brief This key should be set to correctly handle NV12 input without pre-processing.
Expand Down
13 changes: 13 additions & 0 deletions inference-engine/src/cldnn_engine/cldnn_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported NV12 flag value: " << val;
}
} else if (key.compare(CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS) == 0) {
if (val.compare(PluginConfigParams::YES) == 0) {
enable_fp16_for_quantized_models = true;
} else if (val.compare(PluginConfigParams::NO) == 0) {
enable_fp16_for_quantized_models = false;
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS flag value: " << val;
}
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key;
}
Expand Down Expand Up @@ -228,6 +236,11 @@ void Config::adjustKeyMapValues() {
else
key_config_map[CLDNNConfigParams::KEY_CLDNN_NV12_TWO_INPUTS] = PluginConfigParams::NO;

if (enable_fp16_for_quantized_models)
key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS] = PluginConfigParams::YES;
else
key_config_map[CLDNNConfigParams::KEY_CLDNN_ENABLE_FP16_FOR_QUANTIZED_MODELS] = PluginConfigParams::NO;

{
std::string qp = "0";
switch (queuePriority) {
Expand Down
2 changes: 2 additions & 0 deletions inference-engine/src/cldnn_engine/cldnn_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct Config {
enableDynamicBatch(false),
enableInt8(true),
nv12_two_inputs(false),
enable_fp16_for_quantized_models(true),
queuePriority(cldnn::priority_mode_types::disabled),
queueThrottle(cldnn::throttle_mode_types::disabled),
max_dynamic_batch(1),
Expand All @@ -49,6 +50,7 @@ struct Config {
bool enableDynamicBatch;
bool enableInt8;
bool nv12_two_inputs;
bool enable_fp16_for_quantized_models;
cldnn::priority_mode_types queuePriority;
cldnn::throttle_mode_types queueThrottle;
int max_dynamic_batch;
Expand Down
4 changes: 2 additions & 2 deletions inference-engine/src/cldnn_engine/cldnn_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ void convertLayerPrecision(const CNNLayerPtr& layer, bool isOutput = false) {
out_data->setPrecision(PREC_TO);
}
}

for (auto &in_data : layer->insData) {
auto data = in_data.lock();
if (PREC_FROM == data->getPrecision())
Expand All @@ -246,7 +247,6 @@ void convertLayerPrecision(const CNNLayerPtr& layer, bool isOutput = false) {
if (layer->precision == PREC_FROM)
layer->precision = PREC_TO;


auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *>(layer.get());
if (wLayer) {
if (wLayer->_weights && wLayer->_weights->getTensorDesc().getPrecision() == PREC_FROM) {
Expand Down Expand Up @@ -445,7 +445,7 @@ Program::Program(InferenceEngine::ICNNNetwork& network, std::shared_ptr<const cl
transformer.transform(network);

// [WA part2] Try to find non-quantized layers and convert them back to FP16
if (fqFound && baselineIsFP16) {
if (fqFound && baselineIsFP16 && config.enable_fp16_for_quantized_models) {
auto layersSorted = BFSSort(network);

for (auto& layer : layersSorted) {
Expand Down

0 comments on commit aa96de5

Please sign in to comment.