Skip to content

Commit

Permalink
merged
Browse files Browse the repository at this point in the history
  • Loading branch information
trigpolynom committed Jan 13, 2024
2 parents 15b76a2 + eb4b71d commit 3f1af57
Show file tree
Hide file tree
Showing 25 changed files with 612 additions and 179 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: MIT

cmake_minimum_required (VERSION 3.5)
cmake_minimum_required (VERSION 3.15)
# option() honors normal variables.
# see: https://cmake.org/cmake/help/git-stage/policy/CMP0077.html
if(POLICY CMP0077)
Expand Down
2 changes: 1 addition & 1 deletion PLATFORMS.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ In this policy, the words "must" and "must not" specify absolute requirements th
- x86_64/amd64/x64 for Windows 2022
- armeabi-v7a, arm64-v8a, x86, x86_64 for Android
- aarch64 for Apple iOS and tvOS (CMake `-DPLATFORM=OS64` and `TVOS`)
- arm64, armv7, x86, riscv32, riscv64 for Zephyr
- arm64, arm (32 bit), x86, x86_64, riscv32, riscv64 for Zephyr

### Tier 3

Expand Down
4 changes: 2 additions & 2 deletions docs/algorithms/kem/kyber.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
- **Authors' website**: https://pq-crystals.org/
- **Specification version**: NIST Round 3 submission.
- **Primary Source**<a name="primary-source"></a>:
- **Source**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches
- **Source**: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e with copy_from_upstream patches
- **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220 with copy_from_upstream patches
- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e with copy_from_upstream patches
- **oldpqclean-aarch64**:<a name="oldpqclean-aarch64"></a>
- **Source**: https://github.com/PQClean/PQClean/commit/8e220a87308154d48fdfac40abbb191ac7fce06a with copy_from_upstream patches
- **Implementation license (SPDX-Identifier)**: CC0-1.0 and (CC0-1.0 or Apache-2.0) and (CC0-1.0 or MIT) and MIT
Expand Down
2 changes: 1 addition & 1 deletion docs/algorithms/kem/kyber.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ website: https://pq-crystals.org/
nist-round: 3
spec-version: NIST Round 3 submission
primary-upstream:
source: https://github.com/pq-crystals/kyber/commit/dda29cc63af721981ee2c831cf00822e69be3220
source: https://github.com/pq-crystals/kyber/commit/b628ba78711bc28327dc7d2d5c074a00f061884e
with copy_from_upstream patches
spdx-license-identifier: CC0-1.0 or Apache-2.0
optimized-upstreams:
Expand Down
4 changes: 2 additions & 2 deletions scripts/copy_from_upstream/copy_from_upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ upstreams:
sig_meta_path: 'crypto_sign/{pqclean_scheme}/META.yml'
kem_scheme_path: 'crypto_kem/{pqclean_scheme}'
sig_scheme_path: 'crypto_sign/{pqclean_scheme}'
patches: [pqclean-dilithium-arm-randomized-signing.patch, pqclean-kyber-armneon-shake-fixes.patch, pqclean-kyber-armneon-768-1024-fixes.patch]
patches: [pqclean-dilithium-arm-randomized-signing.patch, pqclean-kyber-armneon-shake-fixes.patch, pqclean-kyber-armneon-768-1024-fixes.patch, pqclean-kyber-armneon-variable-timing-fix.patch]
ignore: pqclean_sphincs-shake-256s-simple_aarch64, pqclean_sphincs-shake-256s-simple_aarch64, pqclean_sphincs-shake-256f-simple_aarch64, pqclean_sphincs-shake-192s-simple_aarch64, pqclean_sphincs-shake-192f-simple_aarch64, pqclean_sphincs-shake-128s-simple_aarch64, pqclean_sphincs-shake-128f-simple_aarch64
-
name: pqclean
Expand All @@ -25,7 +25,7 @@ upstreams:
name: pqcrystals-kyber
git_url: https://github.com/pq-crystals/kyber.git
git_branch: master
git_commit: dda29cc63af721981ee2c831cf00822e69be3220
git_commit: b628ba78711bc28327dc7d2d5c074a00f061884e
kem_meta_path: '{pretty_name_full}_META.yml'
kem_scheme_path: '.'
patches: [pqcrystals-kyber-yml.patch, pqcrystals-kyber-ref-shake-aes.patch, pqcrystals-kyber-avx2-shake-aes.patch]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
8f8d63fd708e00cc941ade03f405de21fdf17410
diff --git a/crypto_kem/kyber1024/aarch64/poly.c b/crypto_kem/kyber1024/aarch64/poly.c
index 1dfa52c..3115d1c 100644
--- a/crypto_kem/kyber1024/aarch64/poly.c
+++ b/crypto_kem/kyber1024/aarch64/poly.c
@@ -51,6 +51,7 @@
void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];

for (i = 0; i < KYBER_N / 8; i++) {
@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N
// map to positive standard representatives
u = a[8 * i + j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31;
+ // t[j] = ((((uint32_t)u << 5) + KYBER_Q / 2) / KYBER_Q) & 31;
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
}

r[0] = (t[0] >> 0) | (t[1] << 5);
@@ -207,14 +213,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES])
**************************************************/
void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
- uint16_t t;
+ uint32_t t;

for (i = 0; i < KYBER_N / 8; i++) {
msg[i] = 0;
for (j = 0; j < 8; j++) {
t = a[8 * i + j];
- t += ((int16_t)t >> 15) & KYBER_Q;
- t = (((t << 1) + KYBER_Q / 2) / KYBER_Q) & 1;
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
msg[i] |= t << j;
}
}
diff --git a/crypto_kem/kyber1024/aarch64/polyvec.c b/crypto_kem/kyber1024/aarch64/polyvec.c
index d400348..f9a1ebf 100644
--- a/crypto_kem/kyber1024/aarch64/polyvec.c
+++ b/crypto_kem/kyber1024/aarch64/polyvec.c
@@ -21,6 +21,7 @@
**************************************************/
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) {
unsigned int i, j, k;
+ uint64_t d0;

#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 8; k++) {
t[k] = a[i][8 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}

r[ 0] = (t[0] >> 0);
@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 4; k++) {
t[k] = a[i][4 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}

r[0] = (t[0] >> 0);
diff --git a/crypto_kem/kyber512/aarch64/poly.c b/crypto_kem/kyber512/aarch64/poly.c
index dffc655..361ce89 100644
--- a/crypto_kem/kyber512/aarch64/poly.c
+++ b/crypto_kem/kyber512/aarch64/poly.c
@@ -51,6 +51,7 @@
void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];

for (i = 0; i < KYBER_N / 8; i++) {
@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N
// map to positive standard representatives
u = a[8 * i + j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15;
+ // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15;
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
}

r[0] = t[0] | (t[1] << 4);
@@ -194,14 +200,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES])
**************************************************/
void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
- uint16_t t;
+ uint32_t t;

for (i = 0; i < KYBER_N / 8; i++) {
msg[i] = 0;
for (j = 0; j < 8; j++) {
t = a[8 * i + j];
- t += ((int16_t)t >> 15) & KYBER_Q;
- t = (((t << 1) + KYBER_Q / 2) / KYBER_Q) & 1;
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
msg[i] |= t << j;
}
}
diff --git a/crypto_kem/kyber512/aarch64/polyvec.c b/crypto_kem/kyber512/aarch64/polyvec.c
index d400348..f9a1ebf 100644
--- a/crypto_kem/kyber512/aarch64/polyvec.c
+++ b/crypto_kem/kyber512/aarch64/polyvec.c
@@ -21,6 +21,7 @@
**************************************************/
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) {
unsigned int i, j, k;
+ uint64_t d0;

#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 8; k++) {
t[k] = a[i][8 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}

r[ 0] = (t[0] >> 0);
@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 4; k++) {
t[k] = a[i][4 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}

r[0] = (t[0] >> 0);
diff --git a/crypto_kem/kyber768/aarch64/poly.c b/crypto_kem/kyber768/aarch64/poly.c
index dffc655..361ce89 100644
--- a/crypto_kem/kyber768/aarch64/poly.c
+++ b/crypto_kem/kyber768/aarch64/poly.c
@@ -51,6 +51,7 @@
void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
int16_t u;
+ uint32_t d0;
uint8_t t[8];

for (i = 0; i < KYBER_N / 8; i++) {
@@ -58,7 +59,12 @@ void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const int16_t a[KYBER_N
// map to positive standard representatives
u = a[8 * i + j];
u += (u >> 15) & KYBER_Q;
- t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15;
+ // t[j] = ((((uint16_t)u << 4) + KYBER_Q / 2) / KYBER_Q) & 15;
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
}

r[0] = t[0] | (t[1] << 4);
@@ -194,14 +200,19 @@ void poly_frommsg(int16_t r[KYBER_N], const uint8_t msg[KYBER_INDCPA_MSGBYTES])
**************************************************/
void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const int16_t a[KYBER_N]) {
unsigned int i, j;
- uint16_t t;
+ uint32_t t;

for (i = 0; i < KYBER_N / 8; i++) {
msg[i] = 0;
for (j = 0; j < 8; j++) {
t = a[8 * i + j];
- t += ((int16_t)t >> 15) & KYBER_Q;
- t = (((t << 1) + KYBER_Q / 2) / KYBER_Q) & 1;
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
msg[i] |= t << j;
}
}
diff --git a/crypto_kem/kyber768/aarch64/polyvec.c b/crypto_kem/kyber768/aarch64/polyvec.c
index d400348..f9a1ebf 100644
--- a/crypto_kem/kyber768/aarch64/polyvec.c
+++ b/crypto_kem/kyber768/aarch64/polyvec.c
@@ -21,6 +21,7 @@
**************************************************/
void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K][KYBER_N]) {
unsigned int i, j, k;
+ uint64_t d0;

#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
uint16_t t[8];
@@ -29,7 +30,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 8; k++) {
t[k] = a[i][8 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ // t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q / 2) / KYBER_Q) & 0x7ff;
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
}

r[ 0] = (t[0] >> 0);
@@ -53,7 +60,13 @@ void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], int16_t a[KYBER_K
for (k = 0; k < 4; k++) {
t[k] = a[i][4 * j + k];
t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
- t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ // t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff;
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
}

r[0] = (t[0] >> 0);
2 changes: 1 addition & 1 deletion scripts/copy_from_upstream/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
attrs==20.3.0
GitPython==3.1.37
GitPython==3.1.41
importlib-metadata==3.7.0
Jinja2==2.11.3
markdown-it-py==2.2.0
Expand Down
1 change: 0 additions & 1 deletion src/kem/bike/additional_r4/decode_portable.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ rotr_big(OUT syndrome_t *out, IN const syndrome_t *in, IN size_t qw_num)
_INLINE_ void
rotr_small(OUT syndrome_t *out, IN const syndrome_t *in, IN const size_t bits)
{
bike_static_assert(bits < 64, rotr_small_err);
bike_static_assert(sizeof(*out) > (8 * R_QWORDS), rotr_small_qw_err);

// Convert |bits| to 0/1 by using !!bits; then create a mask of 0 or
Expand Down
17 changes: 13 additions & 4 deletions src/kem/bike/additional_r4/gf2x_mul_base_portable.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,23 @@ void gf2x_mul_base_port(OUT uint64_t *c,
u[7] = u[6] ^ b0m;

// Step 2: Multiply two elements in parallel in positions i, i+s
l = u[LSB3(a0)] ^ (u[LSB3(a0 >> 3)] << 3);
h = (u[LSB3(a0 >> 3)] >> 61);
for (size_t i = 0; i < 8; ++i) {
// use a mask for secret-independent memory access
l ^= u[i] & secure_cmpeq64_mask(LSB3(a0), i);
l ^= (u[i] << 3) & secure_cmpeq64_mask(LSB3(a0 >> 3), i);
h ^= (u[i] >> 61) & secure_cmpeq64_mask(LSB3(a0 >> 3), i);
}

for(size_t i = (2 * s); i < w; i += (2 * s)) {
const size_t i2 = (i + s);

g1 = u[LSB3(a0 >> i)];
g2 = u[LSB3(a0 >> i2)];
g1 = 0;
g2 = 0;
for (size_t j = 0; j < 8; ++j) {
// use a mask for secret-independent memory access
g1 ^= u[j] & secure_cmpeq64_mask(LSB3(a0 >> i), j);
g2 ^= u[j] & secure_cmpeq64_mask(LSB3(a0 >> i2), j);
}

l ^= (g1 << i) ^ (g2 << i2);
h ^= (g1 >> (w - i)) ^ (g2 >> (w - i2));
Expand Down
5 changes: 5 additions & 0 deletions src/kem/bike/additional_r4/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ _INLINE_ uint32_t secure_l32_mask(IN const uint32_t v1, IN const uint32_t v2)
#endif
}

// Return (-1) if v1 == v2, 0 otherwise
_INLINE_ uint64_t secure_cmpeq64_mask(IN const uint64_t v1, IN const uint64_t v2) {
return -(1 - ((uint64_t)((v1-v2) | (v2-v1)) >> 63));
}

// bike_memcpy avoids the undefined behaviour of memcpy when byte_len=0
_INLINE_ void *bike_memcpy(void *dst, const void *src, size_t byte_len)
{
Expand Down
Loading

0 comments on commit 3f1af57

Please sign in to comment.