Skip to content

Commit

Permalink
Add new Algorithms using explicit batch type
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbacci committed Jun 24, 2021
1 parent e845404 commit 2be89a0
Show file tree
Hide file tree
Showing 3 changed files with 300 additions and 12 deletions.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,46 @@ void mean(const vector_type& a, const vector_type& b, vector_type& res)
}
```
Algorithms like `xsimd::reduce` and `xsimd::transform` are available also in the batch explicit modality:
```cpp
template <class C, class T = typename std::decay<decltype(*C().begin())>::type>
T nansum(const C& v)
{
return xsimd::reduce_batch(v.begin(), v.end(), 0.0,
[](auto x, auto y) {
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
},
[](auto x, auto y) {
static decltype(x) zero(0.0);
auto xnan = xsimd::isnan(x);
auto ynan = xsimd::isnan(y);
auto xs = xsimd::select(xnan, zero, x);
auto ys = xsimd::select(ynan, zero, y);
return xs + ys;
});
}
```

To switch from `std::count_if` to `xsimd::count_if`:

```cpp
// v is an aligned vector of int type
auto count_expected = std::count_if(v.begin(), v.end(),
[](auto x) {
return x >= 50 && x <= 70 ? 1 : 0;
});
auto count = xsimd::count_if(v.begin(), v.end(),
[](auto x) {
return x >= 50 && x <= 70 ? 1 : 0;
},
[](auto b) {
static decltype(b) zero(0);
static decltype(b) one(1);
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
});
assert(count_expected == count);
```
## Building and Running the Tests
Expand Down
119 changes: 108 additions & 11 deletions include/xsimd/stl/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

namespace xsimd
{
template <class I1, class I2, class O1, class UF>
void transform(I1 first, I2 last, O1 out_first, UF&& f)
template <class I1, class I2, class O1, class UF, class UFB>
void transform_batch(I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
Expand All @@ -43,7 +43,7 @@ namespace xsimd
for (std::size_t i = align_begin; i < align_end; i += simd_size)
{
xsimd::load_aligned(&first[i], batch);
xsimd::store_aligned(&out_first[i], f(batch));
xsimd::store_aligned(&out_first[i], fb(batch));
}

for (std::size_t i = align_end; i < size; ++i)
Expand All @@ -62,7 +62,7 @@ namespace xsimd
for (std::size_t i = align_begin; i < align_end; i += simd_size)
{
xsimd::load_aligned(&first[i], batch);
xsimd::store_unaligned(&out_first[i], f(batch));
xsimd::store_unaligned(&out_first[i], fb(batch));
}

for (std::size_t i = align_end; i < size; ++i)
Expand All @@ -72,8 +72,14 @@ namespace xsimd
}
}

template <class I1, class I2, class I3, class O1, class UF>
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
template <class I1, class I2, class O1, class UF>
void transform(I1 first, I2 last, O1 out_first, UF&& f)
{
transform_batch(first, last, out_first, f, f);
}

template <class I1, class I2, class I3, class O1, class UF, class UFB>
void transform_batch(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
{
using value_type = typename std::decay<decltype(*first_1)>::type;
using traits = simd_traits<value_type>;
Expand Down Expand Up @@ -102,7 +108,7 @@ namespace xsimd
{ \
xsimd::A1(&first_1[i], batch_1); \
xsimd::A2(&first_2[i], batch_2); \
xsimd::A3(&out_first[i], f(batch_1, batch_2)); \
xsimd::A3(&out_first[i], fb(batch_1, batch_2)); \
} \
\
for (std::size_t i = align_end; i < size; ++i) \
Expand Down Expand Up @@ -130,6 +136,11 @@ namespace xsimd
#undef XSIMD_LOOP_MACRO
}

template <class I1, class I2, class I3, class O1, class UF>
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
{
transform_batch(first_1, last_1, first_2, out_first, f, f);
}

// TODO: Remove this once we drop C++11 support
namespace detail
Expand All @@ -141,9 +152,8 @@ namespace xsimd
};
}


template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
template <class Iterator1, class Iterator2, class Init, class BinaryFunction, class BinaryFunctionBatch>
Init reduce_batch(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
Expand Down Expand Up @@ -180,7 +190,7 @@ namespace xsimd
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
{
xsimd::load_aligned(ptr, batch);
batch_init = binfun(batch_init, batch);
batch_init = binfun_batch(batch_init, batch);
}

// reduce across batch
Expand All @@ -197,6 +207,93 @@ namespace xsimd
return init;
}

template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
{
return reduce_batch(first, last, init, binfun, binfun);
}

namespace detail
{
template <class T>
struct count_batch
{
count_batch(T value)
: value(value)
{}

count_batch(const count_batch<T>&) = default;
count_batch(count_batch<T>&&) = default;

template <class B>
std::size_t operator()(const B& b)
{
static auto zero = B(T(0));
static auto one = B(T(1));
return static_cast<std::size_t>(xsimd::hadd(xsimd::select(b == value, one, zero)));
}

private:
T value;
};
}

template <class Iterator1, class Iterator2, class UnaryPredicate, class UnaryPredicateBatch>
std::size_t count_if(Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
using batch_type = typename traits::type;

std::size_t size = static_cast<std::size_t>(std::distance(first, last));
constexpr std::size_t simd_size = traits::size;

std::size_t counter(0);
if(size < simd_size)
{
while(first != last)
{
counter += predicate(*first++);
}
return counter;
}

const auto* const ptr_begin = &(*first);

std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));

// reduce initial unaligned part
for (std::size_t i = 0; i < align_begin; ++i)
{
counter += predicate(first[i]);
}

// reduce aligned part
batch_type batch;
auto ptr = ptr_begin + align_begin;
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
{
xsimd::load_aligned(ptr, batch);
counter += predicate_batch(batch);
}

// reduce final unaligned part
for (std::size_t i = align_end; i < size; ++i)
{
counter += predicate(first[i]);
}

return counter;
}

template <class Iterator1, class Iterator2, class T>
std::size_t count(Iterator1 first, Iterator2 last, const T& value)
{
return count_if(first, last,
[&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
}

}

#endif
Loading

0 comments on commit 2be89a0

Please sign in to comment.