From e30a3fa4dbca29ad09d7aedd2b441cfe0d5c1c79 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 18 Jul 2024 08:19:09 -0400 Subject: [PATCH] =?UTF-8?q?Revert=20"Horner's=20rule=20for=20polynomial=20?= =?UTF-8?q?evaluation=20with=20symmetry=20idea=20diccussed=20=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4ea00960faa40e8dfdb22d691a7c449b7f3d7a8b. --- src/spreadinterp.cpp | 85 +++++--------------------------------------- 1 file changed, 8 insertions(+), 77 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 3f03c79bb..4374c6e9d 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -24,8 +24,6 @@ namespace { // anonymous namespace for internal structs equivalent to declaring // static struct zip_low; struct zip_hi; -template struct reverse_index; -template struct shuffle_index; struct select_even; struct select_odd; // forward declaration to clean up the code and be able to use this everywhere in the file @@ -779,80 +777,23 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in // [-1,1] if (opts.upsampfac == 2.0) { // floating point equality is fine here - using arch_t = typename simd_type::arch_type; - static constexpr auto alignment = arch_t::alignment(); + static constexpr auto alignment = simd_type::arch_type::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); static constexpr auto nc = nc200(); static constexpr auto horner_coeffs = get_horner_coeffs_200(); - static constexpr auto use_ker_sym = (simd_size < w); alignas(alignment) static constexpr auto padded_coeffs = pad_2D_array_with_zeros(horner_coeffs); - // use kernel symmetry trick if w > simd_size - if constexpr (use_ker_sym) { - static constexpr uint8_t tail = w % simd_size; - static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); - static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; - static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; - const simd_type zv{z}; - const auto z2v = zv * zv; - - // some xsimd constant for shuffle or inverse - static constexpr auto shuffle_batch = []() constexpr noexcept { - if constexpr (tail) { - return xsimd::make_batch_constant, arch_t, - shuffle_index>(); - } else { - return xsimd::make_batch_constant, arch_t, - reverse_index>(); - } - }(); - - // process simd vecs - simd_type k_prev, k_sym{0}; - for (uint8_t i{0}, offset = offset_start; i < end_idx; - i += simd_size, offset -= simd_size) { - auto k_odd = [i]() constexpr noexcept { - if constexpr (if_odd_degree) { - return simd_type::load_aligned(padded_coeffs[0].data() + i); - } else { - return simd_type{0}; - } - }(); - auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j{1 + if_odd_degree}; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - k_even = xsimd::fma(k_even, z2v, cji_even); - } - // left part - xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - // right part symmetric to the left part - if (offset >= end_idx) { - if constexpr (tail) { - // to use aligned store, we need shuffle the previous k_sym and current k_sym - k_prev = k_sym; - k_sym = xsimd::fnma(k_odd, zv, k_even); - xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); - } else { - xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), shuffle_batch) - .store_aligned(ker + offset); - } - } - } - } else { - const simd_type zv(z); - for (uint8_t i = 0; i < w; i += simd_size) { - auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); - for (uint8_t j = 1; j < nc; ++j) { - const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); - k = xsimd::fma(k, zv, cji); - } - k.store_aligned(ker + i); + const simd_type zv(z); + for (uint8_t i = 0; i < w; i += simd_size) { + auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); + for (uint8_t j = 1; j < nc; ++j) { + const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); + k = xsimd::fma(k, zv, cji); } + k.store_aligned(ker + i); } return; } @@ -2227,16 +2168,6 @@ struct zip_hi { return (size + index) / 2; } }; -template struct reverse_index { - static constexpr unsigned get(unsigned index, const unsigned size) { - return index < cap ? (cap - 1 - index) : index; - } -}; -template struct shuffle_index { - static constexpr unsigned get(unsigned index, const unsigned size) { - return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index; - } -}; struct select_even { static constexpr unsigned get(unsigned index, unsigned /*size*/) { return index * 2; }