diff --git a/src/lib/pubkey/pqcrystals/info.txt b/src/lib/pubkey/pqcrystals/info.txt
new file mode 100644
index 00000000000..5a2ab539091
--- /dev/null
+++ b/src/lib/pubkey/pqcrystals/info.txt
@@ -0,0 +1,18 @@
+
+PQCRYSTALS -> 20240228
+
+
+
+name -> "CRYSTALS"
+brief -> "Base utilities for CRYSTALS-Kyber/ML-KEM and CRYSTALS-Dilithium/ML-DSA. CRYptographic SuiTe for Algebraic LatticeS"
+type -> "Internal"
+
+
+
+
+
+
+pqcrystals.h
+pqcrystals_encoding.h
+pqcrystals_helpers.h
+
diff --git a/src/lib/pubkey/pqcrystals/pqcrystals.h b/src/lib/pubkey/pqcrystals/pqcrystals.h
new file mode 100644
index 00000000000..3940e5c66ba
--- /dev/null
+++ b/src/lib/pubkey/pqcrystals/pqcrystals.h
@@ -0,0 +1,661 @@
+/*
+ * PQ CRYSTALS Common Structures
+ *
+ * Further changes
+ * (C) 2021-2024 Jack Lloyd
+ * (C) 2021-2022 Manuel Glaser and Michael Boric, Rohde & Schwarz Cybersecurity
+ * (C) 2021-2022 René Meusel and Hannes Rantzsch, neXenio GmbH
+ * (C) 2024 René Meusel, Rohde & Schwarz Cybersecurity
+ *
+ * Botan is released under the Simplified BSD License (see license.txt)
+ */
+
+#ifndef BOTAN_PQ_CRYSTALS_H_
+#define BOTAN_PQ_CRYSTALS_H_
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+namespace Botan::CRYSTALS {
+
+enum class Domain { Normal, NTT };
+
+template
+concept crystals_constants =
+ std::signed_integral && std::integral && std::integral &&
+ std::integral && std::unsigned_integral &&
+ std::integral;
+
+/**
+ * This implements basic polynomial operations for Kyber and Dilithium
+ * based on the given algorithm constants (@p ConstantsT) and back-
+ * references some of the operations to the actual implementation
+ * into the derived class (CRTP @p DerivedT).
+ *
+ * Polynomial parameters are passed as spans of coefficients for maximum
+ * flexibility. Certain
+ *
+ * It is assumed that this is subclassed with the actual implementation
+ * with establishing a CRTP back-reference.
+ */
+template
+class Trait_Base {
+ public:
+ using T = typename ConstantsT::T;
+ static constexpr T N = ConstantsT::N;
+ static constexpr T Q = ConstantsT::Q;
+
+ protected:
+ using T2 = next_longer_int_t;
+
+ /// \name Pre-computed algorithm constants
+ /// @{
+
+ static constexpr T Q_inverse = modular_inverse(Q);
+ static constexpr T MONTY = montgomery_R(Q);
+ static constexpr T MONTY_SQUARED = montgomery_R2(Q);
+
+ // Contains the constant f from Algorithm 36 multiplied two times by
+ // the montgomery parameter, i.e. 2^(2*32) mod q. The first montgomery
+ // factor is then removed by the reduction in the loop. The second one
+ // is required to eliminate factors 2^(-32) mod q in coeffs introduced
+ // by previous montgomery multiplications in a single vector/matrix
+ // multiplication operation.
+ static constexpr T F_WITH_MONTY_SQUARED = (static_cast(ConstantsT::F) * MONTY_SQUARED) % Q;
+
+ static constexpr auto zetas = precompute_zetas(Q, MONTY, ConstantsT::ROOT_OF_UNITY);
+
+ /// @}
+
+ protected:
+ /// @returns the number of polynomials in the polynomial vector @p polyvec.
+ static constexpr size_t polys_in_polyvec(std::span polyvec) {
+ BOTAN_DEBUG_ASSERT(polyvec.size() % N == 0);
+ return polyvec.size() / N;
+ }
+
+ /// @returns the @p index-th polynomial in the polynomial vector @p polyvec.
+ template
+ requires(std::same_as || std::same_as)
+ static constexpr std::span poly_in_polyvec(std::span polyvec, size_t index) {
+ BOTAN_DEBUG_ASSERT(polyvec.size() % N == 0);
+ BOTAN_DEBUG_ASSERT(polyvec.size() / N > index);
+ auto polyspan = polyvec.subspan(index * N, N);
+ return std::span{polyspan.data(), polyspan.size()};
+ }
+
+ static constexpr T fqmul(T a, T b) { return DerivedT::montgomery_reduce_coefficient(static_cast(a) * b); }
+
+ public:
+ static constexpr void poly_add(std::span result, std::span lhs, std::span rhs) {
+ for(size_t i = 0; i < N; ++i) {
+ result[i] = lhs[i] + rhs[i];
+ }
+ }
+
+ static constexpr void poly_sub(std::span result, std::span lhs, std::span rhs) {
+ for(size_t i = 0; i < N; ++i) {
+ result[i] = lhs[i] - rhs[i];
+ }
+ }
+
+ /// Adds Q if the coefficient is negative.
+ static constexpr void poly_cadd_q(std::span coeffs) {
+ for(auto& coeff : coeffs) {
+ using unsigned_T = std::make_unsigned_t;
+ const auto is_negative = CT::Mask::expand_top_bit(static_cast(coeff));
+ coeff += is_negative.if_set_return(Q);
+ }
+ }
+
+ static constexpr T to_montgomery(T a) { return fqmul(a, MONTY_SQUARED); }
+
+ constexpr static void barrett_reduce(std::span poly) {
+ for(auto& coeff : poly) {
+ coeff = DerivedT::barrett_reduce_coefficient(coeff);
+ }
+ }
+
+ /// Multiplication and accumulation of 2 polynomial vectors @p u and @p v.
+ static constexpr void polyvec_pointwise_acc_montgomery(std::span w,
+ std::span u,
+ std::span v) {
+ clear_mem(w);
+ std::array t;
+ for(size_t i = 0; i < polys_in_polyvec(u); ++i) {
+ DerivedT::poly_pointwise_montgomery(t, poly_in_polyvec(u, i), poly_in_polyvec(v, i));
+ poly_add(w, w, t);
+ }
+ barrett_reduce(w);
+ }
+};
+
+template
+concept crystals_trait =
+ std::signed_integral && sizeof(typename T::T) <= 4 && std::integral &&
+ T::N % 2 == 0 &&
+ requires(std::span polyspan, std::span polyvecspan, typename T::T coeff) {
+ { T::to_montgomery(coeff) };
+ { T::barrett_reduce(polyspan) };
+ { T::poly_cadd_q(polyspan) };
+ { T::ntt(polyspan) };
+ { T::inverse_ntt(polyspan) };
+ { T::poly_pointwise_montgomery(polyspan, polyspan, polyspan) };
+ { T::polyvec_pointwise_acc_montgomery(polyspan, polyvecspan, polyvecspan) };
+ };
+
+namespace detail {
+
+/**
+ * Converts polynomials or polynomial vectors from one domain to another.
+ */
+template class StructureT, crystals_trait Trait, Domain From>
+ requires(To != From)
+StructureT domain_cast(StructureT&& p) {
+ // The public factory method `from_domain_cast` is just a workaround for
+ // Xcode and NDK not understanding the friend declaration to allow this
+ // to directly call the private constructor.
+ return StructureT::from_domain_cast(std::move(p));
+}
+
+/**
+ * Ensures that all values in the @p range are within the range [min, max]
+ * using constant-time operations.
+ *
+ * @returns true if all values are within the range, false otherwise.
+ */
+template
+constexpr static bool ct_all_within_range(std::span range, T min, T max)
+ requires(sizeof(T) <= 4)
+{
+ BOTAN_DEBUG_ASSERT(min < max);
+
+ using unsigned_T = std::make_unsigned_t;
+ auto map = [](T v) -> unsigned_T {
+ if constexpr(std::signed_integral) {
+ constexpr int64_t offset = -static_cast(std::numeric_limits::min());
+ return static_cast(static_cast(v) + offset);
+ } else {
+ return v;
+ }
+ };
+
+ const auto umin = map(min);
+ const auto umax = map(max);
+
+ auto mask = CT::Mask::set();
+ for(const T c : range) {
+ mask &= CT::Mask::is_within_range(map(c), umin, umax);
+ }
+ return mask.as_bool();
+}
+
+} // namespace detail
+
+/**
+ * Represents a polynomial with Trait::N coefficients of type Trait::T.
+ * The domain of the polynomial can be either Domain::Normal or Domain::NTT and
+ * this information is represented in the C++ type system.
+ *
+ * Polynomials may either own their storage of piggy-back on external storage
+ * when they are part of a PolynomialVector.
+ */
+template
+class Polynomial {
+ private:
+ using ThisPolynomial = Polynomial;
+ using T = typename Trait::T;
+
+ private:
+ // TODO: perhaps secure vector
+ std::vector m_coeffs_storage;
+ std::span m_coeffs;
+
+ private:
+ template
+ friend class Polynomial;
+
+ template class StructureT, crystals_trait C, Domain From>
+ requires(To != From)
+ friend StructureT detail::domain_cast(StructureT&&);
+
+ /**
+ * This constructor is used to convert a Polynomial from one domain to another.
+ * The friend declarations above facilitate this.
+ */
+ template
+ requires(D != OtherD)
+ explicit Polynomial(Polynomial&& other) noexcept :
+ m_coeffs_storage(std::move(other.m_coeffs_storage)),
+ m_coeffs(owns_storage() ? std::span(m_coeffs_storage) : other.m_coeffs) {}
+
+ public:
+ // Workaround, because Xcode and NDK don't understand the
+ // `detail::domain_cast` friend declaration.
+ //
+ // TODO: Try to remove this and use the c'tor directly in
+ // `detail::domain_cast` after updating the compilers.
+ template
+ requires(D != OtherD)
+ static Polynomial from_domain_cast(Polynomial&& p) {
+ return Polynomial(std::move(p));
+ }
+
+ public:
+ Polynomial() : m_coeffs_storage(Trait::N), m_coeffs(m_coeffs_storage) { BOTAN_DEBUG_ASSERT(owns_storage()); }
+
+ explicit Polynomial(std::span coeffs) : m_coeffs(coeffs) { BOTAN_DEBUG_ASSERT(!owns_storage()); }
+
+ Polynomial(const ThisPolynomial& other) = delete;
+
+ Polynomial(ThisPolynomial&& other) noexcept :
+ m_coeffs_storage(std::move(other.m_coeffs_storage)), m_coeffs(other.m_coeffs) {}
+
+ ThisPolynomial& operator=(const ThisPolynomial& other) = delete;
+
+ ThisPolynomial& operator=(ThisPolynomial&& other) noexcept {
+ if(this != &other) {
+ BOTAN_ASSERT_NOMSG(owns_storage());
+ m_coeffs_storage = std::move(other.m_coeffs_storage);
+ m_coeffs = std::span(m_coeffs_storage);
+ }
+ return *this;
+ }
+
+ ~Polynomial() = default;
+
+ constexpr size_t size() const { return m_coeffs.size(); }
+
+ constexpr Domain domain() const noexcept { return D; }
+
+ ThisPolynomial clone() const {
+ ThisPolynomial res;
+ copy_mem(res.m_coeffs_storage, m_coeffs);
+ res.m_coeffs = std::span(res.m_coeffs_storage);
+ BOTAN_DEBUG_ASSERT(res.owns_storage());
+ return res;
+ }
+
+ /// @returns true if all coefficients are within the range [min, max]
+ constexpr bool ct_validate_value_range(T min, T max) const noexcept {
+ return detail::ct_all_within_range(coefficients(), min, max);
+ }
+
+ /// @returns the number of non-zero coefficients in the polynomial
+ constexpr size_t hamming_weight() const noexcept {
+ size_t weight = 0;
+ for(const auto c : m_coeffs) {
+ weight += (c != 0);
+ }
+ return weight;
+ }
+
+ std::span coefficients() { return m_coeffs; }
+
+ std::span coefficients() const { return m_coeffs; }
+
+ T& operator[](size_t i) { return m_coeffs[i]; }
+
+ T operator[](size_t i) const { return m_coeffs[i]; }
+
+ decltype(auto) begin() { return m_coeffs.begin(); }
+
+ decltype(auto) begin() const { return m_coeffs.begin(); }
+
+ decltype(auto) end() { return m_coeffs.end(); }
+
+ decltype(auto) end() const { return m_coeffs.end(); }
+
+ constexpr bool owns_storage() const { return !m_coeffs_storage.empty(); }
+
+ ThisPolynomial& reduce() {
+ Trait::barrett_reduce(m_coeffs);
+ return *this;
+ }
+
+ ThisPolynomial& conditional_add_q() {
+ Trait::poly_cadd_q(m_coeffs);
+ return *this;
+ }
+
+ /**
+ * Adds two polynomials element-wise. Does not perform a reduction after the addition.
+ * Therefore this operation might cause an integer overflow.
+ */
+ decltype(auto) operator+=(const ThisPolynomial& other) {
+ Trait::poly_add(m_coeffs, m_coeffs, other.m_coeffs);
+ return *this;
+ }
+
+ /**
+ * Subtracts two polynomials element-wise. Does not perform a reduction after the subtraction.
+ * Therefore this operation might cause an integer underflow.
+ */
+ decltype(auto) operator-=(const ThisPolynomial& other) {
+ Trait::poly_sub(m_coeffs, m_coeffs, other.m_coeffs);
+ return *this;
+ }
+};
+
+template
+class PolynomialVector {
+ private:
+ using ThisPolynomialVector = PolynomialVector;
+ using T = typename Trait::T;
+
+ private:
+ std::vector m_polys_storage;
+ std::vector> m_vec;
+
+ private:
+ template
+ friend class PolynomialVector;
+
+ template class StructureT, crystals_trait C, Domain From>
+ requires(To != From)
+ friend StructureT detail::domain_cast(StructureT&&);
+
+ /**
+ * This constructor is used to convert a PolynomialVector from one domain to another.
+ * The friend declarations above facilitate this.
+ */
+ template
+ requires(D != OtherD)
+ explicit PolynomialVector(PolynomialVector&& other) noexcept :
+ m_polys_storage(std::move(other.m_polys_storage)) {
+ BOTAN_DEBUG_ASSERT(m_polys_storage.size() % Trait::N == 0);
+ const size_t vecsize = m_polys_storage.size() / Trait::N;
+ for(size_t i = 0; i < vecsize; ++i) {
+ m_vec.emplace_back(
+ Polynomial(std::span{m_polys_storage}.subspan(i * Trait::N).template first()));
+ }
+ }
+
+ public:
+ // Workaround, because Xcode and NDK don't understand the
+ // `detail::domain_cast` friend declaration above.
+ //
+ // TODO: Try to remove this and use the c'tor directly in
+ // `detail::domain_cast` after updating the compilers.
+ template
+ requires(D != OtherD)
+ static PolynomialVector from_domain_cast(PolynomialVector&& other) {
+ return PolynomialVector(std::move(other));
+ }
+
+ public:
+ PolynomialVector(size_t vecsize) : m_polys_storage(vecsize * Trait::N) {
+ for(size_t i = 0; i < vecsize; ++i) {
+ m_vec.emplace_back(
+ Polynomial(std::span{m_polys_storage}.subspan(i * Trait::N).template first()));
+ }
+ }
+
+ PolynomialVector(const ThisPolynomialVector& other) = delete;
+ PolynomialVector(ThisPolynomialVector&& other) noexcept = default;
+ ThisPolynomialVector& operator=(const ThisPolynomialVector& other) = delete;
+ ThisPolynomialVector& operator=(ThisPolynomialVector&& other) noexcept = default;
+ ~PolynomialVector() = default;
+
+ size_t size() const { return m_vec.size(); }
+
+ constexpr Domain domain() const noexcept { return D; }
+
+ ThisPolynomialVector clone() const {
+ ThisPolynomialVector res(size());
+
+ // The default-constructed PolynomialVector has set up res.m_vec to
+ // point to res.m_polys_storage. Therefore we can just copy the data
+ // into res.m_polys_storage to fill the non-owning polynomials.
+ copy_mem(res.m_polys_storage, m_polys_storage);
+
+ return res;
+ }
+
+ /// @returns the number of non-zero coefficients in the polynomial vector
+ size_t hamming_weight() const noexcept {
+ size_t weight = 0;
+ for(const auto c : m_polys_storage) {
+ weight += (c != 0);
+ }
+ return weight;
+ }
+
+ /// @returns true if all coefficients are within the range [min, max]
+ constexpr bool ct_validate_value_range(T min, T max) const noexcept {
+ return detail::ct_all_within_range(coefficients(), min, max);
+ }
+
+ std::span coefficients() { return m_polys_storage; }
+
+ std::span coefficients() const { return m_polys_storage; }
+
+ ThisPolynomialVector& operator+=(const ThisPolynomialVector& other) {
+ BOTAN_ASSERT(m_vec.size() == other.m_vec.size(), "cannot add polynomial vectors of differing lengths");
+ for(size_t i = 0; i < m_vec.size(); ++i) {
+ Trait::poly_add(m_vec[i].coefficients(), m_vec[i].coefficients(), other.m_vec[i].coefficients());
+ }
+ return *this;
+ }
+
+ ThisPolynomialVector& operator-=(const ThisPolynomialVector& other) {
+ BOTAN_ASSERT(m_vec.size() == other.m_vec.size(), "cannot subtract polynomial vectors of differing lengths");
+ for(size_t i = 0; i < m_vec.size(); ++i) {
+ Trait::poly_sub(m_vec[i].coefficients(), m_vec[i].coefficients(), other.m_vec[i].coefficients());
+ }
+ return *this;
+ }
+
+ ThisPolynomialVector& reduce() {
+ for(auto& p : m_vec) {
+ Trait::barrett_reduce(p.coefficients());
+ }
+ return *this;
+ }
+
+ ThisPolynomialVector& conditional_add_q() {
+ for(auto& v : m_vec) {
+ Trait::poly_cadd_q(v.coefficients());
+ }
+ return *this;
+ }
+
+ Polynomial& operator[](size_t i) { return m_vec[i]; }
+
+ const Polynomial& operator[](size_t i) const { return m_vec[i]; }
+
+ decltype(auto) begin() { return m_vec.begin(); }
+
+ decltype(auto) begin() const { return m_vec.begin(); }
+
+ decltype(auto) end() { return m_vec.end(); }
+
+ decltype(auto) end() const { return m_vec.end(); }
+};
+
+template
+class PolynomialMatrix {
+ private:
+ using ThisPolynomialMatrix = PolynomialMatrix;
+
+ private:
+ std::vector> m_mat;
+
+ public:
+ PolynomialMatrix(std::vector> mat) : m_mat(std::move(mat)) {}
+
+ PolynomialMatrix(const ThisPolynomialMatrix& other) = delete;
+ PolynomialMatrix(ThisPolynomialMatrix&& other) noexcept = default;
+ ThisPolynomialMatrix& operator=(const ThisPolynomialMatrix& other) = delete;
+ ThisPolynomialMatrix& operator=(ThisPolynomialMatrix&& other) noexcept = default;
+ ~PolynomialMatrix() = default;
+
+ size_t size() const { return m_mat.size(); }
+
+ PolynomialMatrix(size_t rows, size_t cols) {
+ m_mat.reserve(rows);
+ for(size_t i = 0; i < rows; ++i) {
+ m_mat.emplace_back(cols);
+ }
+ }
+
+ PolynomialVector& operator[](size_t i) { return m_mat[i]; }
+
+ const PolynomialVector& operator[](size_t i) const { return m_mat[i]; }
+
+ decltype(auto) begin() { return m_mat.begin(); }
+
+ decltype(auto) begin() const { return m_mat.begin(); }
+
+ decltype(auto) end() { return m_mat.end(); }
+
+ decltype(auto) end() const { return m_mat.end(); }
+};
+
+namespace detail {
+
+template
+void montgomery(Polynomial& p) {
+ for(auto& c : p) {
+ c = Trait::to_montgomery(c);
+ }
+}
+
+template
+void pointwise_multiply(Polynomial& out,
+ const PolynomialVector& a,
+ const PolynomialVector& b) {
+ BOTAN_ASSERT(a.size() == b.size(), "Pointwise vector multiplication requires equally sized PolynomialVectors");
+ for(size_t i = 0; i < a.size(); ++i) {
+ out += a[i] * b[i];
+ }
+ out.reduce();
+}
+
+} // namespace detail
+
+template
+Polynomial ntt(Polynomial p) {
+ auto p_ntt = detail::domain_cast(std::move(p));
+ Trait::ntt(p_ntt.coefficients());
+ return p_ntt;
+}
+
+template
+Polynomial inverse_ntt(Polynomial p_ntt) {
+ auto p = detail::domain_cast(std::move(p_ntt));
+ Trait::inverse_ntt(p.coefficients());
+ return p;
+}
+
+template
+PolynomialVector ntt(PolynomialVector polyvec) {
+ auto polyvec_ntt = detail::domain_cast(std::move(polyvec));
+ for(auto& poly : polyvec_ntt) {
+ Trait::ntt(poly.coefficients());
+ }
+ return polyvec_ntt;
+}
+
+template
+PolynomialVector inverse_ntt(PolynomialVector polyvec_ntt) {
+ auto polyvec = detail::domain_cast(std::move(polyvec_ntt));
+ for(auto& poly : polyvec) {
+ Trait::inverse_ntt(poly.coefficients());
+ }
+ return polyvec;
+}
+
+template
+Polynomial montgomery(Polynomial p) {
+ detail::montgomery(p);
+ return p;
+}
+
+template
+PolynomialVector montgomery(PolynomialVector polyvec) {
+ for(auto& p : polyvec) {
+ detail::montgomery(p);
+ }
+ return polyvec;
+}
+
+template
+PolynomialMatrix montgomery(PolynomialMatrix m) {
+ for(auto& polyvec : m) {
+ for(auto& poly : polyvec) {
+ detail::montgomery(poly);
+ }
+ }
+ return m;
+}
+
+template
+PolynomialVector operator+(const PolynomialVector& a,
+ const PolynomialVector& b) {
+ BOTAN_DEBUG_ASSERT(a.size() == b.size());
+ PolynomialVector result(a.size());
+ for(size_t i = 0; i < a.size(); ++i) {
+ Trait::poly_add(result[i].coefficients(), a[i].coefficients(), b[i].coefficients());
+ }
+ return result;
+}
+
+template
+PolynomialVector operator*(const PolynomialMatrix& mat,
+ const PolynomialVector& vec) {
+ PolynomialVector result(mat.size());
+ for(size_t i = 0; i < mat.size(); ++i) {
+ Trait::polyvec_pointwise_acc_montgomery(result[i].coefficients(), mat[i].coefficients(), vec.coefficients());
+ }
+ return result;
+}
+
+template
+Polynomial operator*(const PolynomialVector& a,
+ const PolynomialVector& b) {
+ Polynomial result;
+ detail::pointwise_multiply(result, a, b);
+ return result;
+}
+
+template
+PolynomialVector operator*(const Polynomial& p,
+ const PolynomialVector& pv) {
+ PolynomialVector result(pv.size());
+ for(size_t i = 0; i < pv.size(); ++i) {
+ Trait::poly_pointwise_montgomery(result[i].coefficients(), p.coefficients(), pv[i].coefficients());
+ }
+ return result;
+}
+
+template
+Polynomial operator*(const Polynomial& a,
+ const Polynomial& b) {
+ Polynomial result;
+ Trait::poly_pointwise_montgomery(result.coefficients(), a.coefficients(), b.coefficients());
+ return result;
+}
+
+template
+PolynomialVector operator<<(const PolynomialVector& pv, size_t shift) {
+ BOTAN_ASSERT_NOMSG(shift < sizeof(typename Trait::T) * 8);
+ PolynomialVector result(pv.size());
+ for(size_t i = 0; i < pv.size(); ++i) {
+ for(size_t j = 0; j < Trait::N; ++j) {
+ result[i][j] = pv[i][j] << shift;
+ }
+ }
+ return result;
+}
+
+} // namespace Botan::CRYSTALS
+
+#endif
diff --git a/src/lib/pubkey/pqcrystals/pqcrystals_encoding.h b/src/lib/pubkey/pqcrystals/pqcrystals_encoding.h
new file mode 100644
index 00000000000..7a4bcfe95af
--- /dev/null
+++ b/src/lib/pubkey/pqcrystals/pqcrystals_encoding.h
@@ -0,0 +1,220 @@
+/*
+ * PQ CRYSTALS Encoding Helpers
+ *
+ * Further changes
+ * (C) 2024 Jack Lloyd
+ * (C) 2024 René Meusel, Rohde & Schwarz Cybersecurity
+ *
+ * Botan is released under the Simplified BSD License (see license.txt)
+ */
+
+#ifndef BOTAN_PQ_CRYSTALS_ENCODING_H_
+#define BOTAN_PQ_CRYSTALS_ENCODING_H_
+
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+namespace Botan::CRYSTALS {
+
+namespace detail {
+
+constexpr auto as_byte_source(BufferSlicer& slicer) {
+ return [&](std::span out) { slicer.copy_into(out); };
+}
+
+constexpr auto as_byte_source(Botan::XOF& xof) {
+ return [&](std::span out) { xof.output(out); };
+}
+
+} // namespace detail
+
+template
+concept byte_source =
+ requires(T& t) { requires std::invocable>; };
+
+template
+concept coeff_map_fn = std::signed_integral && requires(T fn, PolyCoeffT coeff) {
+ { fn(coeff) } -> std::same_as>;
+};
+
+template
+concept coeff_unmap_fn =
+ std::signed_integral && requires(T fn, std::make_unsigned_t coeff_value) {
+ { fn(coeff_value) } -> std::same_as;
+ };
+
+/**
+ * Helper for base implementations of NIST FIPS 204 IPD, Algorithms 10-13 and
+ * NIST FIPS Algorithms 4-5. It pre-computes generic values to bit-(un)pack
+ * polynomial coefficients at compile-time.
+ *
+ * The base implementations are also templated with the @p range parameter
+ * forcing the compiler to generate specialized code for each supported range.
+ */
+template
+struct BitPackingTrait final {
+ using T = typename PolyTrait::T;
+ using unsigned_T = std::make_unsigned_t;
+ using sink_t = uint64_t;
+
+ static_assert(range <= std::numeric_limits::max());
+
+ constexpr static size_t bits_in_collector = sizeof(sink_t) * 8;
+ constexpr static size_t bits_per_coeff = bitlen(range);
+
+ /// The smallest number of coefficents that must be bit-packed to
+ /// obtain a span of bytes without any padding. Note that this may be more
+ /// than 64 bits.
+ constexpr static size_t bits_per_pack = lcm(bits_per_coeff, size_t(8));
+ constexpr static size_t bytes_per_pack = bits_per_pack / 8;
+ constexpr static size_t coeffs_per_pack = bits_per_pack / bits_per_coeff;
+ constexpr static size_t collectors_per_pack = (bytes_per_pack + sizeof(sink_t) - 1) / sizeof(sink_t);
+ constexpr static size_t collector_bytes_per_pack = collectors_per_pack * sizeof(sink_t);
+ constexpr static sink_t value_mask = (1 << bits_per_coeff) - 1;
+
+ using collector_array = std::array;
+ using collector_bytearray = std::array;
+
+ static_assert(PolyTrait::N % coeffs_per_pack == 0);
+};
+
+/**
+ * Base implementation of NIST FIPS 203 IPD Algorithm 4 (ByteEncode) and NIST
+ * FIPS 204 Algorithms 10 (SimpleBitPack) and 11 (BitPack).
+ *
+ * This takes a polynomial @p p and packs its coefficients into the buffer
+ * represented by @p stuffer. Optionally, the coefficients can be transformed
+ * using the @p map function before packing them. Kyber uses @p map to compress
+ * the coefficients as needed, Dilithium to transform coefficients to unsigned.
+ *
+ * The implementation assumes that the values returned from the custom @p map
+ * transformation are in the range [0, range]. No assumption is made about the
+ * value range of the coefficients in the polynomial @p p.
+ *
+ * Note that this bit-packing algorithm is inefficient if the bit-length of the
+ * coefficients is a multiple of 8. In that case, a byte-level encoding (that
+ * might need to take endianess into account) would be more efficient.
+ *
+ * @tparam range the upper bound of the coefficient range.
+ */
+template MapFnT>
+constexpr void pack(const Polynomial& p, BufferStuffer& stuffer, MapFnT map) {
+ using trait = BitPackingTrait;
+
+ BOTAN_DEBUG_ASSERT(stuffer.remaining_capacity() >= p.size() * trait::bits_per_coeff / 8);
+
+ // Bit-packing example that shows a coefficients' bit-pack that spills across
+ // more than one 64-bit collectors. This illustrates the algorithm below.
+ //
+ // 0 64 128
+ // Collectors (64 bits): | collectors[0] | collectors[1] |
+ // | | |
+ // Coefficients (11 bits): | c[0] | c[1] | c[2] | c[3] | c[4] | c[5] | c[6] | c[7] | | | | | ...
+ // | | |
+ // | < byte-aligned coefficient pack > | < byte-aligned pad. > |
+ // | (one inner loop iteration) |
+ // 0 88 (divisible by 8)
+
+ for(size_t i = 0; i < p.size(); i += trait::coeffs_per_pack) {
+ // The collectors array is filled with bit-packed coefficients to produce
+ // a byte-aligned pack of coefficients. When coefficients fall onto the
+ // boundary of two collectors, their bits must be split.
+ typename trait::collector_array collectors = {0};
+ for(size_t j = 0, bit_offset = 0, c = 0; j < trait::coeffs_per_pack; ++j) {
+ // Transform p[i] via a custom map function (that may be a NOOP).
+ const typename trait::unsigned_T mapped_coeff = map(p[i + j]);
+ const auto coeff_value = static_cast(mapped_coeff);
+ BOTAN_DEBUG_ASSERT(coeff_value <= range);
+
+ // Bit-pack the coefficient into the collectors array and keep track of
+ // the bit-offset within the current collector. Note that this might
+ // shift some high-bits of the coefficient out of the current collector.
+ collectors[c] |= coeff_value << bit_offset;
+ bit_offset += trait::bits_per_coeff;
+
+ // If the bit-offset now exceeds the collector's bit-width, we fill the
+ // next collector with the high-bits that didn't fit into the previous.
+ // The bit-offset is adjusted to now point into the new collector.
+ if(bit_offset > trait::bits_in_collector) {
+ bit_offset = bit_offset - trait::bits_in_collector;
+ collectors[++c] = coeff_value >> (trait::bits_per_coeff - bit_offset);
+ }
+ }
+
+ // One byte-aligned pack of bit-packed coefficients is now stored in the
+ // collectors and can be written to an output buffer. Note that we might
+ // have to remove some padding bytes of unused collector space.
+ const auto bytes = store_le(collectors);
+ stuffer.append(std::span{bytes}.template first());
+ }
+}
+
+/**
+ * Base implementation of NIST FIPS 203 IPD Algorithm 5 (ByteDecode) and NIST
+ * FIPS 204 Algorithms 12 (SimpleBitUnpack) and 13 (BitUnpack).
+ *
+ * This takes a byte sequence represented by @p byte_source and unpacks its
+ * coefficients into the polynomial @p p. Optionally, the coefficients can be
+ * transformed using the @p unmap function after unpacking them. Note that the
+ * provided range is assumed for the coefficients _before_ the transformation.
+ *
+ * Kyber uses @p unmap to decompress the coefficients as needed, Dilithium uses
+ * it to convert the coefficients back to signed integers.
+ *
+ * @tparam range the upper bound of the coefficient range.
+ */
+template UnmapFnT>
+constexpr void unpack(Polynomial& p, ByteSourceT& byte_source, UnmapFnT unmap) {
+ using trait = BitPackingTrait;
+
+ auto get_bytes = detail::as_byte_source(byte_source);
+ typename trait::collector_bytearray bytes = {0};
+
+ // This is the inverse operation of the bit-packing algorithm above. Please
+ // refer to the comments there for a detailed explanation of the algorithm.
+ for(size_t i = 0; i < p.size(); i += trait::coeffs_per_pack) {
+ get_bytes(std::span{bytes}.template first());
+ const auto collectors = load_le(bytes);
+
+ for(size_t j = 0, bit_offset = 0, c = 0; j < trait::coeffs_per_pack; ++j) {
+ typename trait::sink_t coeff_value = collectors[c] >> bit_offset;
+ bit_offset += trait::bits_per_coeff;
+ if(bit_offset > trait::bits_in_collector) {
+ bit_offset = bit_offset - trait::bits_in_collector;
+ coeff_value |= collectors[++c] << (trait::bits_per_coeff - bit_offset);
+ }
+ const auto mapped_coeff = static_cast(coeff_value & trait::value_mask);
+ BOTAN_DEBUG_ASSERT(mapped_coeff <= range);
+ p[i + j] = unmap(mapped_coeff);
+ }
+ }
+}
+
+/// Overload for packing polynomials with a NOOP map function
+template
+constexpr void pack(const Polynomial& p, BufferStuffer& stuffer) {
+ using unsigned_T = std::make_unsigned_t;
+ pack(p, stuffer, [](typename PolyTrait::T x) { return static_cast(x); });
+}
+
+/// Overload for unpacking polynomials with a NOOP unmap function
+template
+constexpr void unpack(Polynomial& p, ByteSourceT& byte_source) {
+ using unsigned_T = std::make_unsigned_t;
+ unpack(p, byte_source, [](unsigned_T x) { return static_cast(x); });
+}
+
+} // namespace Botan::CRYSTALS
+
+#endif
diff --git a/src/lib/pubkey/pqcrystals/pqcrystals_helpers.h b/src/lib/pubkey/pqcrystals/pqcrystals_helpers.h
new file mode 100644
index 00000000000..9d1fbc54126
--- /dev/null
+++ b/src/lib/pubkey/pqcrystals/pqcrystals_helpers.h
@@ -0,0 +1,143 @@
+/*
+ * PQ CRYSTALS Common Helpers
+ *
+ * Further changes
+ * (C) 2024 Jack Lloyd
+ * (C) 2024 René Meusel, Rohde & Schwarz Cybersecurity
+ *
+ * Botan is released under the Simplified BSD License (see license.txt)
+ */
+
+#ifndef BOTAN_PQ_CRYSTALS_HELPERS_H_
+#define BOTAN_PQ_CRYSTALS_HELPERS_H_
+
+#include
+#include
+#include
+
+#include
+
+namespace Botan {
+
+// clang-format off
+
+template
+ requires(sizeof(T) <= 4)
+using next_longer_uint_t =
+ std::conditional_t>>;
+
+template
+ requires(sizeof(T) <= 4)
+using next_longer_int_t =
+ std::conditional_t>>;
+
+// clang-format on
+
+template
+ requires(size_t(sizeof(T)) <= 4)
+constexpr T montgomery_R(T q) {
+ using T_unsigned = std::make_unsigned_t;
+ using T2 = next_longer_uint_t;
+ return (T2(1) << (sizeof(T) * 8)) % q;
+}
+
+template
+ requires(size_t(sizeof(T)) <= 4)
+constexpr T montgomery_R2(T q) {
+ using T2 = next_longer_int_t;
+ return (static_cast(montgomery_R(q)) * static_cast(montgomery_R(q))) % q;
+}
+
+template
+struct eea_result {
+ T gcd;
+ T u;
+ T v;
+};
+
+/**
+ * Run the extended Euclidean algorithm to find the greatest common divisor of a
+ * and b and the Bézout coefficients, u and v.
+ */
+template
+constexpr eea_result extended_euclidean_algorithm(T a, T b) {
+ if(a > b) {
+ std::swap(a, b);
+ }
+
+ T u1 = 0, v1 = 1, u2 = 1, v2 = 0;
+
+ if(a != b) {
+ while(a != 0) {
+ const T q = b / a;
+ std::tie(a, b) = std::make_tuple(b - q * a, a);
+ std::tie(u1, v1, u2, v2) = std::make_tuple(u2, v2, u1 - q * u2, v1 - q * v2);
+ }
+ }
+
+ return {.gcd = b, .u = u1, .v = v1};
+}
+
+/**
+ * Calculate the modular multiplacative inverse of q modulo m.
+ * By default, this assumes m to be 2^bitlength of T for application in a
+ * Montgomery reduction.
+ */
+template >
+ requires(sizeof(T) <= 4)
+constexpr T modular_inverse(T q, T2 m = T2(1) << sizeof(T) * 8) {
+ return static_cast(extended_euclidean_algorithm(q, m).u);
+}
+
+template
+constexpr T lcm(T a, T b) {
+ return a / extended_euclidean_algorithm(a, b).gcd * b;
+}
+
+constexpr auto bitlen(size_t x) {
+ return ceil_log2(x + 1);
+};
+
+/**
+ * Precompute the zeta-values for the NTT. Note that the pre-computed values
+ * contain the Montgomery factor for either Kyber or Dilithium.
+ */
+template
+consteval static auto precompute_zetas(T q, T monty, T root_of_unity) {
+ using T2 = next_longer_int_t;
+
+ std::array result = {0};
+
+ auto bitreverse = [](size_t k) -> size_t {
+ size_t r = 0;
+ const auto l = ceil_log2(degree);
+ for(size_t i = 0; i < l; ++i) {
+ r |= ((k >> i) & 1) << (l - 1 - i);
+ }
+ return r;
+ };
+
+ auto pow = [q](T base, size_t exp) -> T2 {
+ T2 res = 1;
+ for(size_t i = 0; i < exp; ++i) {
+ res = (res * base) % q;
+ }
+ return res;
+ };
+
+ auto csubq = [q](T a) -> T { return a <= q / 2 ? a : a - q; };
+
+ for(size_t i = 0; i < result.size(); ++i) {
+ result[i] = csubq(pow(root_of_unity, bitreverse(i)) * monty % q);
+ }
+
+ return result;
+}
+
+} // namespace Botan
+
+#endif
diff --git a/src/lib/rng/rng.h b/src/lib/rng/rng.h
index b96e2f4633e..a85ee42f68c 100644
--- a/src/lib/rng/rng.h
+++ b/src/lib/rng/rng.h
@@ -14,6 +14,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/src/tests/test_crystals.cpp b/src/tests/test_crystals.cpp
new file mode 100644
index 00000000000..d5c91da6755
--- /dev/null
+++ b/src/tests/test_crystals.cpp
@@ -0,0 +1,432 @@
+/*
+ * Tests for PQ Crystals
+ * (C) 2024 Jack Lloyd
+ * (C) 2024 René Meusel, Rohde & Schwarz Cybersecurity
+ *
+ * Botan is released under the Simplified BSD License (see license.txt)
+ */
+
+#include "tests.h"
+
+#if defined(BOTAN_HAS_PQCRYSTALS)
+ #include
+
+ #include
+ #include
+ #include
+ #include
+ #include
+
+namespace Botan_Tests {
+
+namespace {
+
+Test::Result test_extended_euclidean_algorithm() {
+ Test::Result res("Extended Euclidean Algorithm");
+
+ res.test_is_eq("gcd(1337, 1337)", Botan::extended_euclidean_algorithm(1337, 1337).gcd, 1337);
+ res.test_is_eq("gcd(350, 294)", Botan::extended_euclidean_algorithm(350, 294).gcd, 14);
+ res.test_is_eq