Skip to content

Commit

Permalink
astcenc: Update to 4.4.0
Browse files Browse the repository at this point in the history
> The 4.4.0 release is a minor release with image quality improvements,
> a small performance boost, a few new quality-of-life features, and a
> few minor fixes for uncommon build configurations.

https://github.com/ARM-software/astc-encoder/releases/tag/4.4.0
  • Loading branch information
akien-mga committed May 11, 2023
1 parent fd4a06c commit 5a3f955
Show file tree
Hide file tree
Showing 15 changed files with 659 additions and 953 deletions.
1 change: 0 additions & 1 deletion modules/astcenc/SCsub
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ thirdparty_sources = [
"astcenc_partition_tables.cpp",
"astcenc_percentile_tables.cpp",
"astcenc_pick_best_endpoint_format.cpp",
"astcenc_platform_isa_detection.cpp",
"astcenc_quantization.cpp",
"astcenc_symbolic_physical.cpp",
"astcenc_weight_align.cpp",
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Files extracted from upstream source:
## astcenc

- Upstream: https://github.com/ARM-software/astc-encoder
- Version: 4.3.0 (ec83dda79fcefe07f69cdae7ed980d169bf2c4d4, 2023)
- Version: 4.4.0 (5a5b5a1ef60dd47c27c28c66c118d22c40e3197e, 2023)
- License: Apache 2.0

Files extracted from upstream source:
Expand Down
18 changes: 12 additions & 6 deletions thirdparty/astcenc/astcenc.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@
* for faster processing. The caller is responsible for creating the worker threads, and
* synchronizing between images.
*
* Extended instruction set support
* ================================
*
* This library supports use of extended instruction sets, such as SSE4.1 and AVX2. These are
* enabled at compile time when building the library. There is no runtime checking in the core
* library that the instruction sets used are actually available. Checking compatibility is the
* responsibility of the calling code.
*
* Threading
* =========
*
Expand Down Expand Up @@ -191,8 +199,6 @@ enum astcenc_error {
ASTCENC_ERR_OUT_OF_MEM,
/** @brief The call failed due to the build using fast math. */
ASTCENC_ERR_BAD_CPU_FLOAT,
/** @brief The call failed due to the build using an unsupported ISA. */
ASTCENC_ERR_BAD_CPU_ISA,
/** @brief The call failed due to an out-of-spec parameter. */
ASTCENC_ERR_BAD_PARAM,
/** @brief The call failed due to an out-of-spec block size. */
Expand Down Expand Up @@ -472,7 +478,7 @@ struct astcenc_config
/**
* @brief The number of trial candidates per mode search (-candidatelimit).
*
* Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES (default 4).
* Valid values are between 1 and TUNE_MAX_TRIAL_CANDIDATES.
*/
unsigned int tune_candidate_limit;

Expand Down Expand Up @@ -520,21 +526,21 @@ struct astcenc_config
*
* This option is further scaled for normal maps, so it skips less often.
*/
float tune_2_partition_early_out_limit_factor;
float tune_2partition_early_out_limit_factor;

/**
* @brief The threshold for skipping 4.1 trials (-3partitionlimitfactor).
*
* This option is further scaled for normal maps, so it skips less often.
*/
float tune_3_partition_early_out_limit_factor;
float tune_3partition_early_out_limit_factor;

/**
* @brief The threshold for skipping two weight planes (-2planelimitcorrelation).
*
* This option is ineffective for normal maps.
*/
float tune_2_plane_early_out_limit_correlation;
float tune_2plane_early_out_limit_correlation;

#if defined(ASTCENC_DIAGNOSTICS)
/**
Expand Down
59 changes: 6 additions & 53 deletions thirdparty/astcenc/astcenc_averages_and_directions.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2011-2022 Arm Limited
// Copyright 2011-2023 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
Expand Down Expand Up @@ -725,8 +725,7 @@ void compute_error_squared_rgba(
const image_block& blk,
const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
float uncor_lengths[BLOCK_MAX_PARTITIONS],
float samec_lengths[BLOCK_MAX_PARTITIONS],
float line_lengths[BLOCK_MAX_PARTITIONS],
float& uncor_error,
float& samec_error
) {
Expand All @@ -740,12 +739,6 @@ void compute_error_squared_rgba(
{
const uint8_t *texel_indexes = pi.texels_of_partition[partition];

float uncor_loparam = 1e10f;
float uncor_hiparam = -1e10f;

float samec_loparam = 1e10f;
float samec_hiparam = -1e10f;

processed_line4 l_uncor = uncor_plines[partition];
processed_line4 l_samec = samec_plines[partition];

Expand Down Expand Up @@ -773,9 +766,6 @@ void compute_error_squared_rgba(
vfloat uncor_loparamv(1e10f);
vfloat uncor_hiparamv(-1e10f);

vfloat samec_loparamv(1e10f);
vfloat samec_hiparamv(-1e10f);

vfloat ew_r(blk.channel_weight.lane<0>());
vfloat ew_g(blk.channel_weight.lane<1>());
vfloat ew_b(blk.channel_weight.lane<2>());
Expand Down Expand Up @@ -825,9 +815,6 @@ void compute_error_squared_rgba(
+ (data_b * l_samec_bs2)
+ (data_a * l_samec_bs3);

samec_loparamv = min(samec_param, samec_loparamv);
samec_hiparamv = max(samec_param, samec_hiparamv);

vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
Expand All @@ -843,18 +830,9 @@ void compute_error_squared_rgba(
lane_ids += vint(ASTCENC_SIMD_WIDTH);
}

uncor_loparam = hmin_s(uncor_loparamv);
uncor_hiparam = hmax_s(uncor_hiparamv);

samec_loparam = hmin_s(samec_loparamv);
samec_hiparam = hmax_s(samec_hiparamv);

float uncor_linelen = uncor_hiparam - uncor_loparam;
float samec_linelen = samec_hiparam - samec_loparam;

// Turn very small numbers and NaNs into a small number
uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
line_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
}

uncor_error = hadd_s(uncor_errorsumv);
Expand Down Expand Up @@ -882,19 +860,9 @@ void compute_error_squared_rgb(
unsigned int texel_count = pi.partition_texel_count[partition];
promise(texel_count > 0);

float uncor_loparam = 1e10f;
float uncor_hiparam = -1e10f;

float samec_loparam = 1e10f;
float samec_hiparam = -1e10f;

processed_line3 l_uncor = pl.uncor_pline;
processed_line3 l_samec = pl.samec_pline;

// This implementation is an example vectorization of this function.
// It works for - the codec is a 2-4% faster than not vectorizing - but
// the benefit is limited by the use of gathers and register pressure

// Vectorize some useful scalar inputs
vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
Expand All @@ -913,9 +881,6 @@ void compute_error_squared_rgb(
vfloat uncor_loparamv(1e10f);
vfloat uncor_hiparamv(-1e10f);

vfloat samec_loparamv(1e10f);
vfloat samec_hiparamv(-1e10f);

vfloat ew_r(blk.channel_weight.lane<0>());
vfloat ew_g(blk.channel_weight.lane<1>());
vfloat ew_b(blk.channel_weight.lane<2>());
Expand Down Expand Up @@ -958,9 +923,6 @@ void compute_error_squared_rgb(
+ (data_g * l_samec_bs1)
+ (data_b * l_samec_bs2);

samec_loparamv = min(samec_param, samec_loparamv);
samec_hiparamv = max(samec_param, samec_hiparamv);

vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
Expand All @@ -974,18 +936,9 @@ void compute_error_squared_rgb(
lane_ids += vint(ASTCENC_SIMD_WIDTH);
}

uncor_loparam = hmin_s(uncor_loparamv);
uncor_hiparam = hmax_s(uncor_hiparamv);

samec_loparam = hmin_s(samec_loparamv);
samec_hiparam = hmax_s(samec_hiparamv);

float uncor_linelen = uncor_hiparam - uncor_loparam;
float samec_linelen = samec_hiparam - samec_loparam;

// Turn very small numbers and NaNs into a small number
pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
pl.line_length = astc::max(uncor_linelen, 1e-7f);
}

uncor_error = hadd_s(uncor_errorsumv);
Expand Down
20 changes: 10 additions & 10 deletions thirdparty/astcenc/astcenc_block_sizes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,8 @@ static void construct_dt_entry_2d(
assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
bsd.decimation_modes[index].refprec_1_plane = 0;
bsd.decimation_modes[index].refprec_2_planes = 0;
bsd.decimation_modes[index].refprec_1plane = 0;
bsd.decimation_modes[index].refprec_2planes = 0;
}

/**
Expand Down Expand Up @@ -934,11 +934,11 @@ static void construct_block_size_descriptor_2d(

if (is_dual_plane)
{
dm.set_ref_2_plane(bm.get_weight_quant_mode());
dm.set_ref_2plane(bm.get_weight_quant_mode());
}
else
{
dm.set_ref_1_plane(bm.get_weight_quant_mode());
dm.set_ref_1plane(bm.get_weight_quant_mode());
}

bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
Expand Down Expand Up @@ -969,8 +969,8 @@ static void construct_block_size_descriptor_2d(
{
bsd.decimation_modes[i].maxprec_1plane = -1;
bsd.decimation_modes[i].maxprec_2planes = -1;
bsd.decimation_modes[i].refprec_1_plane = 0;
bsd.decimation_modes[i].refprec_2_planes = 0;
bsd.decimation_modes[i].refprec_1plane = 0;
bsd.decimation_modes[i].refprec_2planes = 0;
}

// Determine the texels to use for kmeans clustering.
Expand Down Expand Up @@ -1055,8 +1055,8 @@ static void construct_block_size_descriptor_3d(

bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
bsd.decimation_modes[decimation_mode_count].refprec_1_plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
bsd.decimation_modes[decimation_mode_count].refprec_2_planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
decimation_mode_count++;
}
}
Expand All @@ -1067,8 +1067,8 @@ static void construct_block_size_descriptor_3d(
{
bsd.decimation_modes[i].maxprec_1plane = -1;
bsd.decimation_modes[i].maxprec_2planes = -1;
bsd.decimation_modes[i].refprec_1_plane = 0;
bsd.decimation_modes[i].refprec_2_planes = 0;
bsd.decimation_modes[i].refprec_1plane = 0;
bsd.decimation_modes[i].refprec_2planes = 0;
}

bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
Expand Down
Loading

0 comments on commit 5a3f955

Please sign in to comment.