Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RDO Postprocess #460

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
__pycache__
Scratch
Proto
.idea

# Precompiled reference binaries for comparison tests
bin
Expand All @@ -19,6 +20,7 @@ Binaries
# Build artifacts
astcenc
build*
cmake-build*

# General build artifacts
Test/DocOut
Expand Down
30 changes: 30 additions & 0 deletions Source/astcenc.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,36 @@ struct astcenc_config
*/
float tune_search_mode0_enable;

/**
* @brief Enable Rate Distortion Optimization (RDO) post-processing.
*/
bool rdo_enabled;

/**
* @brief RDO quality scalar (lambda).
*/
float rdo_quality;

/**
* @brief RDO lookback size in blocks.
*/
unsigned int rdo_lookback;

/**
* @brief RDO task partitions.
*/
unsigned int rdo_partitions;

/**
* @brief RDO max smooth block error scale.
*/
float rdo_max_smooth_block_error_scale;

/**
* @brief RDO max smooth block standard deviation.
*/
float rdo_max_smooth_block_std_dev;

/**
* @brief The progress callback, can be @c nullptr.
*
Expand Down
51 changes: 33 additions & 18 deletions Source/astcenc_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct astcenc_preset_config
float tune_3partition_early_out_limit_factor;
float tune_2plane_early_out_limit_correlation;
float tune_search_mode0_enable;
unsigned int rdo_lookback;
};

/**
Expand All @@ -64,22 +65,22 @@ struct astcenc_preset_config
static const std::array<astcenc_preset_config, 6> preset_configs_high {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 0.0f, 64
}, {
ASTCENC_PRE_FAST,
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f
3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f, 0.0f, 128
}, {
ASTCENC_PRE_MEDIUM,
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f
4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f, 0.0f, 256
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f, 0.0f, 256
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
4, 256, 128, 64, 98, 4, 6, 8, 6, 4, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f, 256
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
4, 512, 512, 512, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f, 256
}
}};

Expand All @@ -89,22 +90,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_high {{
static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f, 64
}, {
ASTCENC_PRE_FAST,
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f, 128
}, {
ASTCENC_PRE_MEDIUM,
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f, 1.0f, 256
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f
4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f, 0.0f, 256
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f
4, 256, 128, 64, 98, 4, 6, 8, 6, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 0.0f, 256
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 0.0f, 256
}
}};

Expand All @@ -114,22 +115,22 @@ static const std::array<astcenc_preset_config, 6> preset_configs_mid {{
static const std::array<astcenc_preset_config, 6> preset_configs_low {{
{
ASTCENC_PRE_FASTEST,
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f
2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f, 1.0f, 64
}, {
ASTCENC_PRE_FAST,
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f
2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f, 1.0f, 128
}, {
ASTCENC_PRE_MEDIUM,
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f
3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f, 1.0f, 256
}, {
ASTCENC_PRE_THOROUGH,
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f
4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f, 1.0f, 256
}, {
ASTCENC_PRE_VERYTHOROUGH,
4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f
4, 256, 128, 64, 98, 4, 6, 8, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f, 1.0f, 256
}, {
ASTCENC_PRE_EXHAUSTIVE,
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f
4, 256, 256, 256, 100, 4, 8, 8, 8, 8, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f, 1.0f, 256
}
}};

Expand Down Expand Up @@ -412,6 +413,11 @@ static astcenc_error validate_config(
config.tune_3partition_early_out_limit_factor = astc::max(config.tune_3partition_early_out_limit_factor, 0.0f);
config.tune_2plane_early_out_limit_correlation = astc::max(config.tune_2plane_early_out_limit_correlation, 0.0f);

config.rdo_quality = astc::clamp(config.rdo_quality, 0.001f, 50.0f);
config.rdo_lookback = astc::clamp(config.rdo_lookback, 4u, 4096u);
config.rdo_max_smooth_block_error_scale = astc::clamp(config.rdo_max_smooth_block_error_scale, 1.0f, 300.0f);
config.rdo_max_smooth_block_std_dev = astc::clamp(config.rdo_max_smooth_block_std_dev, 0.01f, 65536.0f);

// Specifying a zero weight color component is not allowed; force to small value
float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight),
astc::max(config.cw_b_weight, config.cw_a_weight));
Expand Down Expand Up @@ -528,6 +534,7 @@ astcenc_error astcenc_config_init(
config.tune_3partition_early_out_limit_factor = (*preset_configs)[start].tune_3partition_early_out_limit_factor;
config.tune_2plane_early_out_limit_correlation = (*preset_configs)[start].tune_2plane_early_out_limit_correlation;
config.tune_search_mode0_enable = (*preset_configs)[start].tune_search_mode0_enable;
config.rdo_lookback = (*preset_configs)[start].rdo_lookback;
}
// Start and end node are not the same - so interpolate between them
else
Expand Down Expand Up @@ -567,11 +574,16 @@ astcenc_error astcenc_config_init(
config.tune_3partition_early_out_limit_factor = LERP(tune_3partition_early_out_limit_factor);
config.tune_2plane_early_out_limit_correlation = LERP(tune_2plane_early_out_limit_correlation);
config.tune_search_mode0_enable = LERP(tune_search_mode0_enable);
config.rdo_lookback = LERPUI(rdo_lookback);
#undef LERP
#undef LERPI
#undef LERPUI
}

config.rdo_quality = 0.5f;
config.rdo_max_smooth_block_error_scale = 10.0f;
config.rdo_max_smooth_block_std_dev = 18.0f;

// Set heuristics to the defaults for each color profile
config.cw_r_weight = 1.0f;
config.cw_g_weight = 1.0f;
Expand Down Expand Up @@ -1099,6 +1111,8 @@ astcenc_error astcenc_compress_image(
// Only the first thread to arrive actually runs the term
ctxo->manage_compress.term(term_compress);

rate_distortion_optimize(*ctxo, image, *swizzle, data_out);

return ASTCENC_SUCCESS;
#endif
}
Expand All @@ -1119,6 +1133,7 @@ astcenc_error astcenc_compress_reset(

ctxo->manage_avg.reset();
ctxo->manage_compress.reset();
ctxo->manage_rdo.reset();
return ASTCENC_SUCCESS;
#endif
}
Expand Down
22 changes: 19 additions & 3 deletions Source/astcenc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,8 @@ struct astcenc_contexti
#if !defined(ASTCENC_DECOMPRESS_ONLY)
/** @brief The pixel region and variance worker arguments. */
avg_args avg_preprocess_args;

struct astcenc_rdo_context* rdo_context;
#endif

#if defined(ASTCENC_DIAGNOSTICS)
Expand Down Expand Up @@ -1966,7 +1968,7 @@ unsigned int compute_ideal_endpoint_formats(
* @param pi The partition info for the current trial.
* @param di The weight grid decimation table.
* @param dec_weights_uquant The quantized weight set.
* @param[in,out] ep The color endpoints (modifed in place).
* @param[in,out] ep The color endpoints (modified in place).
* @param[out] rgbs_vectors The RGB+scale vectors for LDR blocks.
* @param[out] rgbo_vectors The RGB+offset vectors for HDR blocks.
*/
Expand All @@ -1990,7 +1992,7 @@ void recompute_ideal_colors_1plane(
* @param di The weight grid decimation table.
* @param dec_weights_uquant_plane1 The quantized weight set for plane 1.
* @param dec_weights_uquant_plane2 The quantized weight set for plane 2.
* @param[in,out] ep The color endpoints (modifed in place).
* @param[in,out] ep The color endpoints (modified in place).
* @param[out] rgbs_vector The RGB+scale color for LDR blocks.
* @param[out] rgbo_vector The RGB+offset color for HDR blocks.
* @param plane2_component The component assigned to plane 2.
Expand Down Expand Up @@ -2163,14 +2165,28 @@ void symbolic_to_physical(
* flagged as an error block if the encoding is invalid.
*
* @param bsd The block size information.
* @param pcb The physical compresesd block input.
* @param pcb The physical compressed block input.
* @param[out] scb The output symbolic representation.
*/
void physical_to_symbolic(
const block_size_descriptor& bsd,
const uint8_t pcb[16],
symbolic_compressed_block& scb);

/**
* @brief Rate-distortion optimization main entry.
*
* @param ctxo The compressor context and configuration.
* @param image The input image data.
* @param swizzle The swizzle applied on store.
* @param[in,out] buffer The compressed buffer to be optimized (modified in place)
*/
void rate_distortion_optimize(
astcenc_context& ctxo,
const astcenc_image& image,
const astcenc_swizzle& swizzle,
uint8_t* buffer);

/* ============================================================================
Platform-specific functions.
============================================================================ */
Expand Down
6 changes: 5 additions & 1 deletion Source/astcenc_internal_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,14 @@ class ParallelManager
* @param init_func Callable which executes the stage initialization. It must return the
* total number of tasks in the stage.
*/
void init(std::function<unsigned int(void)> init_func)
void init(std::function<unsigned int(void)> init_func, astcenc_progress_callback callback = nullptr)
{
std::lock_guard<std::mutex> lck(m_lock);
if (!m_init_done)
{
m_task_count = init_func();
m_init_done = true;
if (callback) m_callback = callback;
}
}

Expand Down Expand Up @@ -322,6 +323,9 @@ struct astcenc_context

/** @brief The parallel manager for compression. */
ParallelManager manage_compress;

/** @brief The parallel manager for rate-distortion optimization. */
ParallelManager manage_rdo;
#endif

/** @brief The parallel manager for decompression. */
Expand Down
Loading