From 8b10ecf26b3bb0ce4af6c18e0a5088c2e3dff209 Mon Sep 17 00:00:00 2001 From: Alex Weibel Date: Fri, 24 Nov 2023 11:23:39 -0800 Subject: [PATCH] Remove s2n's internal Kyber512 implementation, and rely on AWS-LC for Kyber support (#4283) --------- Co-authored-by: Lindsay Stewart --- CMakeLists.txt | 60 +- LICENSE | 22 - Makefile | 6 +- bindings/rust/generate.sh | 1 - bindings/rust/s2n-tls-sys/build.rs | 2 + codebuild/bin/grep_simple_mistakes.sh | 4 +- compliance/generate_report.sh | 1 - {pq-crypto => crypto}/s2n_kyber_evp.c | 27 +- {pq-crypto => crypto}/s2n_kyber_evp.h | 0 pq-crypto/s2n_pq_random.h => crypto/s2n_pq.c | 22 +- {pq-crypto => crypto}/s2n_pq.h | 6 - error/s2n_errno.c | 1 - error/s2n_errno.h | 1 - lib/Makefile | 2 +- pq-crypto/Makefile | 41 - pq-crypto/README.md | 87 -- .../KeccakP-1600-times4-SIMD256_avx2.c | 1284 ----------------- .../kyber_r3/KeccakP-1600-times4-SnP_avx2.h | 63 - .../kyber_r3/KeccakP-SIMD256-config_avx2.h | 3 - pq-crypto/kyber_r3/KeccakP-align_avx2.h | 31 - pq-crypto/kyber_r3/KeccakP-brg_endian_avx2.h | 139 -- pq-crypto/kyber_r3/Makefile | 79 - pq-crypto/kyber_r3/kyber512r3_align_avx2.h | 19 - pq-crypto/kyber_r3/kyber512r3_basemul_avx2.S | 105 -- pq-crypto/kyber_r3/kyber512r3_cbd.c | 106 -- pq-crypto/kyber_r3/kyber512r3_cbd.h | 11 - pq-crypto/kyber_r3/kyber512r3_cbd_avx2.c | 137 -- pq-crypto/kyber_r3/kyber512r3_cbd_avx2.h | 15 - pq-crypto/kyber_r3/kyber512r3_consts_avx2.c | 122 -- pq-crypto/kyber_r3/kyber512r3_consts_avx2.h | 43 - pq-crypto/kyber_r3/kyber512r3_fips202.c | 544 ------- pq-crypto/kyber_r3/kyber512r3_fips202.h | 68 - .../kyber_r3/kyber512r3_fips202x4_avx2.c | 210 --- .../kyber_r3/kyber512r3_fips202x4_avx2.h | 70 - pq-crypto/kyber_r3/kyber512r3_fq_avx2.S | 122 -- pq-crypto/kyber_r3/kyber512r3_indcpa.c | 325 ----- pq-crypto/kyber_r3/kyber512r3_indcpa.h | 15 - pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.c | 363 ----- pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.h | 25 - pq-crypto/kyber_r3/kyber512r3_invntt_avx2.S | 255 ---- pq-crypto/kyber_r3/kyber512r3_kem.c | 163 --- pq-crypto/kyber_r3/kyber512r3_ntt.c | 124 -- pq-crypto/kyber_r3/kyber512r3_ntt.h | 19 - pq-crypto/kyber_r3/kyber512r3_ntt_avx2.S | 218 --- pq-crypto/kyber_r3/kyber512r3_ntt_avx2.h | 28 - pq-crypto/kyber_r3/kyber512r3_params.h | 33 - pq-crypto/kyber_r3/kyber512r3_poly.c | 302 ---- pq-crypto/kyber_r3/kyber512r3_poly.h | 61 - pq-crypto/kyber_r3/kyber512r3_poly_avx2.c | 453 ------ pq-crypto/kyber_r3/kyber512r3_poly_avx2.h | 80 - pq-crypto/kyber_r3/kyber512r3_polyvec.c | 188 --- pq-crypto/kyber_r3/kyber512r3_polyvec.h | 40 - pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.c | 227 --- pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.h | 39 - pq-crypto/kyber_r3/kyber512r3_reduce.c | 62 - pq-crypto/kyber_r3/kyber512r3_reduce.h | 15 - pq-crypto/kyber_r3/kyber512r3_reduce_avx2.h | 13 - .../kyber_r3/kyber512r3_rejsample_avx2.c | 420 ------ .../kyber_r3/kyber512r3_rejsample_avx2.h | 14 - pq-crypto/kyber_r3/kyber512r3_shuffle_avx2.S | 272 ---- .../kyber_r3/kyber512r3_symmetric-shake.c | 49 - pq-crypto/kyber_r3/kyber512r3_symmetric.h | 17 - pq-crypto/s2n_pq.c | 135 -- pq-crypto/s2n_pq_asm.h | 29 - pq-crypto/s2n_pq_asm.mk | 37 - pq-crypto/s2n_pq_random.c | 46 - s2n.mk | 5 - .../S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.c | 18 - .../S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.flags | 1 - ...S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.c | 27 - ...KYBER512R3_M256_INTRINSICS_SUPPORTED.flags | 0 tests/fuzz/allowed_coverage_failures.cfg | 5 + tests/fuzz/calcTotalCov.sh | 2 +- tests/fuzz/runFuzzTest.sh | 10 +- .../s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c | 2 +- .../s2n_kyber_r3_recv_ciphertext_fuzz_test.c | 1 - .../s2n_kyber_r3_recv_public_key_fuzz_test.c | 7 +- tests/integrationv2/common.py | 4 +- tests/saw/Makefile | 19 +- tests/testlib/s2n_kem_fuzz_testlib.c | 2 +- tests/testlib/s2n_pq_kat_test_utils.c | 116 +- tests/unit/s2n_cipher_suite_match_test.c | 2 +- tests/unit/s2n_client_extensions_test.c | 2 +- ...n_client_hello_get_supported_groups_test.c | 2 +- tests/unit/s2n_client_hello_retry_test.c | 4 +- .../s2n_client_key_share_extension_pq_test.c | 4 +- tests/unit/s2n_client_pq_kem_extension_test.c | 2 +- ...n_client_supported_groups_extension_test.c | 2 +- tests/unit/s2n_config_test.c | 2 +- tests/unit/s2n_kem_preferences_test.c | 2 +- tests/unit/s2n_kem_test.c | 2 +- tests/unit/s2n_kex_with_kem_test.c | 6 +- tests/unit/s2n_pq_kem_kat_kyber_r3_test.c | 52 - tests/unit/s2n_pq_kem_test.c | 74 +- tests/unit/s2n_security_policies_test.c | 2 +- .../s2n_server_key_share_extension_test.c | 6 +- .../s2n_tls13_hybrid_shared_secret_test.c | 2 +- tests/unit/s2n_tls13_pq_handshake_test.c | 2 +- tls/extensions/s2n_client_key_share.c | 4 +- tls/extensions/s2n_client_pq_kem.c | 2 +- tls/extensions/s2n_client_supported_groups.c | 2 +- tls/extensions/s2n_server_key_share.c | 8 +- tls/s2n_cipher_suites.c | 2 +- tls/s2n_config.c | 2 +- tls/s2n_kem.c | 25 +- tls/s2n_kex.c | 4 +- tls/s2n_server_hello_retry.c | 4 +- utils/s2n_init.c | 2 - 108 files changed, 114 insertions(+), 7849 deletions(-) rename {pq-crypto => crypto}/s2n_kyber_evp.c (80%) rename {pq-crypto => crypto}/s2n_kyber_evp.h (100%) rename pq-crypto/s2n_pq_random.h => crypto/s2n_pq.c (58%) rename {pq-crypto => crypto}/s2n_pq.h (77%) delete mode 100644 pq-crypto/Makefile delete mode 100644 pq-crypto/README.md delete mode 100644 pq-crypto/kyber_r3/KeccakP-1600-times4-SIMD256_avx2.c delete mode 100644 pq-crypto/kyber_r3/KeccakP-1600-times4-SnP_avx2.h delete mode 100644 pq-crypto/kyber_r3/KeccakP-SIMD256-config_avx2.h delete mode 100644 pq-crypto/kyber_r3/KeccakP-align_avx2.h delete mode 100644 pq-crypto/kyber_r3/KeccakP-brg_endian_avx2.h delete mode 100644 pq-crypto/kyber_r3/Makefile delete mode 100644 pq-crypto/kyber_r3/kyber512r3_align_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_basemul_avx2.S delete mode 100644 pq-crypto/kyber_r3/kyber512r3_cbd.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_cbd.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_cbd_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_cbd_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_consts_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_consts_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_fips202.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_fips202.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_fq_avx2.S delete mode 100644 pq-crypto/kyber_r3/kyber512r3_indcpa.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_indcpa.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_invntt_avx2.S delete mode 100644 pq-crypto/kyber_r3/kyber512r3_kem.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_ntt.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_ntt.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_ntt_avx2.S delete mode 100644 pq-crypto/kyber_r3/kyber512r3_ntt_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_params.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_poly.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_poly.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_poly_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_poly_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_polyvec.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_polyvec.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_reduce.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_reduce.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_reduce_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.h delete mode 100644 pq-crypto/kyber_r3/kyber512r3_shuffle_avx2.S delete mode 100644 pq-crypto/kyber_r3/kyber512r3_symmetric-shake.c delete mode 100644 pq-crypto/kyber_r3/kyber512r3_symmetric.h delete mode 100644 pq-crypto/s2n_pq.c delete mode 100644 pq-crypto/s2n_pq_asm.h delete mode 100644 pq-crypto/s2n_pq_asm.mk delete mode 100644 pq-crypto/s2n_pq_random.c delete mode 100644 tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.c delete mode 100644 tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.flags delete mode 100644 tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.c delete mode 100644 tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.flags create mode 100644 tests/fuzz/allowed_coverage_failures.cfg delete mode 100644 tests/unit/s2n_pq_kem_kat_kyber_r3_test.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fa445dcf49..83e49be202f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,10 +19,6 @@ set(VERSION_MAJOR 1) set(VERSION_MINOR 0) set(VERSION_PATCH 0) -option(S2N_NO_PQ "Disables all Post Quantum Crypto code. You likely want this -for older compilers or uncommon platforms." OFF) -option(S2N_NO_PQ_ASM "Turns off the ASM for PQ Crypto even if it's available for the toolchain. -You likely want this on older compilers." OFF) option(SEARCH_LIBCRYPTO "Set this if you want to let S2N search libcrypto for you, otherwise a crypto target needs to be defined." ON) option(UNSAFE_TREAT_WARNINGS_AS_ERRORS "Compiler warnings are treated as errors. Warnings may @@ -62,47 +58,25 @@ file(GLOB_RECURSE TLS_SRC "tls/*.c") file(GLOB UTILS_HEADERS "utils/*.h") file(GLOB UTILS_SRC "utils/*.c") -# Always include the top-level pq-crypto/ files -file(GLOB PQ_HEADERS "pq-crypto/*.h") -file(GLOB PQ_SRC "pq-crypto/*.c") - message(STATUS "Detected CMAKE_SYSTEM_PROCESSOR as ${CMAKE_SYSTEM_PROCESSOR}") if(CMAKE_SIZEOF_VOID_P EQUAL 4) - message(STATUS "Detected 32-Bit system - disabling PQ crypto assembly optimizations") - set(S2N_NO_PQ_ASM ON) + message(STATUS "Detected 32-Bit system") else() message(STATUS "Detected 64-Bit system") endif() -if(S2N_NO_PQ) - # PQ is disabled, so we do not include any PQ crypto code - message(STATUS "S2N_NO_PQ flag was detected - disabling PQ crypto") - set(S2N_NO_PQ_ASM ON) -else() - # PQ is enabled, so include all of the PQ crypto code - file(GLOB PQ_HEADERS - "pq-crypto/*.h" - "pq-crypto/kyber_r3/*.h") - - file(GLOB PQ_SRC - "pq-crypto/*.c" - "pq-crypto/kyber_r3/*.c") -endif() - ##be nice to visual studio users if(MSVC) source_group("Header Files\\s2n\\api" FILES ${API_HEADERS} ${API_UNSTABLE_HEADERS}) source_group("Header Files\\s2n\\crypto" FILES ${CRYPTO_HEADERS}) source_group("Header Files\\s2n\\error" FILES ${ERROR_HEADERS}) - source_group("Header Files\\s2n\\pq-crypto" FILES ${PQ_HEADERS}) source_group("Header Files\\s2n\\stuffer" FILES ${STUFFER_HEADERS}) source_group("Header Files\\s2n\\tls" FILES ${TLS_HEADERS}) source_group("Header Files\\s2n\\utils" FILES ${UTILS_HEADERS}) source_group("Source Files\\crypto" FILES ${CRYPTO_SRC}) source_group("Source Files\\error" FILES ${ERROR_SRC}) - source_group("Source Files\\pq-crypto" FILES ${PQ_SRC}) source_group("Source Files\\stuffer" FILES ${STUFFER_SRC}) source_group("Source Files\\tls" FILES ${TLS_SRC}) source_group("Source Files\\utils" FILES ${UTILS_SRC}) @@ -135,7 +109,6 @@ file(GLOB S2N_HEADERS ${API_UNSTABLE_HEADERS} ${CRYPTO_HEADERS} ${ERROR_HEADERS} - ${PQ_HEADERS} ${STUFFER_HEADERS} ${TLS_HEADERS} ${UTILS_HEADERS} @@ -144,7 +117,6 @@ file(GLOB S2N_HEADERS file(GLOB S2N_SRC ${CRYPTO_SRC} ${ERROR_SRC} - ${PQ_SRC} ${STUFFER_SRC} ${TLS_SRC} ${UTILS_SRC} @@ -186,10 +158,6 @@ if(NOT APPLE) set(CMAKE_SHARED_LINKER_FLAGS -Wl,-z,noexecstack,-z,relro,-z,now) endif() -if(S2N_NO_PQ) - add_definitions(-DS2N_NO_PQ) -endif() - # Whether to fail the build when compiling s2n's portable C code with non-portable assembly optimizations. Doing this # can lead to runtime crashes if build artifacts are built on modern hardware, but deployed to older hardware without # newer CPU instructions. s2n, by default, should be backwards compatible with older CPU types so this flag should be @@ -367,32 +335,6 @@ if (NOT S2N_EXECINFO_AVAILABLE) endif() feature_probe_result(S2N_STACKTRACE ${S2N_STACKTRACE}) -set(S2N_KYBER512R3_AVX2_BMI2 FALSE) -if(NOT S2N_NO_PQ_ASM) - # Kyber Round-3 code has several different optimizations which require - # specific compiler flags to be supported by the compiler. - # So for each needed instruction set extension we check if the compiler - # supports it and set proper compiler flags to be added later to the - # Kyber compilation units. - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|amd64|AMD64)$") - # Some platforms support -mavx2 flag but not m256 intrinsics required to use them. Only enable Kyber assembly - # optimizations if both are supported. See https://github.com/aws/s2n-tls/pull/3005 for more info. - if(S2N_KYBER512R3_AVX2_BMI2_SUPPORTED AND S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED) - set(S2N_KYBER512R3_AVX2_BMI2 TRUE) - enable_language(ASM) - - # add the assembly files to the project - FILE(GLOB KYBER512R3_AVX2_BMI2_ASM_SRCS "pq-crypto/kyber_r3/*_avx2.S") - target_sources(${PROJECT_NAME} PRIVATE ${KYBER512R3_AVX2_BMI2_ASM_SRCS}) - - # compile the C files with avx flags - FILE(GLOB KYBER512R3_AVX2_BMI2_SRCS "pq-crypto/kyber_r3/*_avx2.c") - set_source_files_properties(${KYBER512R3_AVX2_BMI2_SRCS} PROPERTIES COMPILE_FLAGS ${S2N_KYBER512R3_AVX2_BMI2_SUPPORTED_FLAGS}) - endif() - endif() -endif() -feature_probe_result(S2N_KYBER512R3_AVX2_BMI2 ${S2N_KYBER512R3_AVX2_BMI2}) - if (S2N_INTERN_LIBCRYPTO) # Check if the AWS::crypto target has beeen added and handle it diff --git a/LICENSE b/LICENSE index fb388fd4731..d6456956733 100644 --- a/LICENSE +++ b/LICENSE @@ -200,25 +200,3 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - - -============================================================================ - S2N SUBCOMPONENTS: - - The s2n Project contains subcomponents with separate copyright notices - and license terms. Your use of the source code for these subcomponents is - subject to the terms and conditions of the following licenses. - - -======================================================================== -Third party MIT licenses -======================================================================== - -The following components are provided under the MIT License. See project link for details. - - - SIKE - -> s2n/pq-crypto/sike_r1/LICENSE.txt - - - diff --git a/Makefile b/Makefile index 5b0e7279e02..e3f99c0e659 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,6 @@ include s2n.mk .PHONY : libs libs: - $(MAKE) -C pq-crypto $(MAKE) -C utils $(MAKE) -C error $(MAKE) -C stuffer @@ -101,12 +100,11 @@ run-lcov: $(MAKE) -C bin lcov $(MAKE) -C crypto lcov $(MAKE) -C error lcov - $(MAKE) -C pq-crypto run-lcov $(MAKE) -C stuffer lcov $(MAKE) -C tests lcov $(MAKE) -C tls run-lcov $(MAKE) -C utils lcov - lcov -a crypto/coverage.info -a error/coverage.info -a pq-crypto/coverage.info -a pq-crypto/kyber_r3/coverage.info -a stuffer/coverage.info -a tls/coverage.info -a $(wildcard tls/*/coverage.info) -a utils/coverage.info --output ${COVERAGE_DIR}/all_coverage.info + lcov -a crypto/coverage.info -a error/coverage.info -a stuffer/coverage.info -a tls/coverage.info -a $(wildcard tls/*/coverage.info) -a utils/coverage.info --output ${COVERAGE_DIR}/all_coverage.info .PHONY : run-genhtml run-genhtml: @@ -115,7 +113,6 @@ run-genhtml: .PHONY : indent indent: - $(MAKE) -C pq-crypto indentsource $(MAKE) -C tests indentsource $(MAKE) -C stuffer indentsource $(MAKE) -C crypto indentsource @@ -147,7 +144,6 @@ uninstall: .PHONY : clean clean: - $(MAKE) -C pq-crypto clean $(MAKE) -C tests clean $(MAKE) -C stuffer decruft $(MAKE) -C crypto decruft diff --git a/bindings/rust/generate.sh b/bindings/rust/generate.sh index be87352c59a..a45635c32b7 100755 --- a/bindings/rust/generate.sh +++ b/bindings/rust/generate.sh @@ -19,7 +19,6 @@ cp -r \ ../../api \ ../../crypto \ ../../error \ - ../../pq-crypto \ ../../stuffer \ ../../tls \ ../../utils \ diff --git a/bindings/rust/s2n-tls-sys/build.rs b/bindings/rust/s2n-tls-sys/build.rs index 1925cfd72c3..8710c97b942 100644 --- a/bindings/rust/s2n-tls-sys/build.rs +++ b/bindings/rust/s2n-tls-sys/build.rs @@ -93,6 +93,8 @@ fn build_vendored() { let mut build = builder(&libcrypto); + // TODO: update rust bindings to handle no pq-crypto dir + let pq = option_env("CARGO_FEATURE_PQ").is_some(); // TODO each pq section needs to be built separately since it diff --git a/codebuild/bin/grep_simple_mistakes.sh b/codebuild/bin/grep_simple_mistakes.sh index 1dbe9fe9208..84c3810b8f6 100755 --- a/codebuild/bin/grep_simple_mistakes.sh +++ b/codebuild/bin/grep_simple_mistakes.sh @@ -18,7 +18,7 @@ FAILED=0 # Grep for any instances of raw memcpy() function. s2n code should instead be # using one of the *_ENSURE_MEMCPY macros. ############################################# -S2N_FILES_ASSERT_NOT_USING_MEMCPY=$(find "$PWD" -type f -name "s2n*.[ch]" -not -path "*/tests/*" -not -path "*/pq-crypto/*") +S2N_FILES_ASSERT_NOT_USING_MEMCPY=$(find "$PWD" -type f -name "s2n*.[ch]" -not -path "*/tests/*") for file in $S2N_FILES_ASSERT_NOT_USING_MEMCPY; do RESULT_NUM_LINES=`grep 'memcpy(' $file | wc -l` if [ "${RESULT_NUM_LINES}" != 0 ]; then @@ -180,7 +180,7 @@ done ## Assert that there are no new uses of S2N_ERROR_IF # TODO add crypto, tls (see https://github.com/aws/s2n-tls/issues/2635) ############################################# -S2N_ERROR_IF_FREE="bin error pq-crypto scram stuffer utils tests" +S2N_ERROR_IF_FREE="bin error scram stuffer utils tests" for dir in $S2N_ERROR_IF_FREE; do files=$(find "$dir" -type f -name "*.c" -path "*") for file in $files; do diff --git a/compliance/generate_report.sh b/compliance/generate_report.sh index d46176fed30..cac3c51299e 100755 --- a/compliance/generate_report.sh +++ b/compliance/generate_report.sh @@ -15,7 +15,6 @@ duvet \ --source-pattern '(*=,*#)bin/**/*.[ch]' \ --source-pattern '(*=,*#)crypto/**/*.[ch]' \ --source-pattern '(*=,*#)error/**/*.[ch]' \ - --source-pattern '(*=,*#)pq-crypto/**/*.[ch]' \ --source-pattern '(*=,*#)stuffer/**/*.[ch]' \ --source-pattern '(*=,*#)tests/**/*.[ch]' \ --source-pattern '(*=,*#)tls/**/*.[ch]' \ diff --git a/pq-crypto/s2n_kyber_evp.c b/crypto/s2n_kyber_evp.c similarity index 80% rename from pq-crypto/s2n_kyber_evp.c rename to crypto/s2n_kyber_evp.c index 457e33ddc98..277ffc88e39 100644 --- a/pq-crypto/s2n_kyber_evp.c +++ b/crypto/s2n_kyber_evp.c @@ -16,13 +16,13 @@ #include #include +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "tls/s2n_kem.h" #include "utils/s2n_safety.h" #include "utils/s2n_safety_macros.h" -#if defined(S2N_LIBCRYPTO_SUPPORTS_KYBER) && !defined(S2N_NO_PQ) +#if defined(S2N_LIBCRYPTO_SUPPORTS_KYBER) DEFINE_POINTER_CLEANUP_FUNC(EVP_PKEY *, EVP_PKEY_free); DEFINE_POINTER_CLEANUP_FUNC(EVP_PKEY_CTX *, EVP_PKEY_CTX_free); @@ -30,7 +30,6 @@ DEFINE_POINTER_CLEANUP_FUNC(EVP_PKEY_CTX *, EVP_PKEY_CTX_free); int s2n_kyber_evp_generate_keypair(IN const struct s2n_kem *kem, OUT uint8_t *public_key, OUT uint8_t *secret_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); DEFER_CLEANUP(EVP_PKEY_CTX *kyber_pkey_ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_KEM, NULL), EVP_PKEY_CTX_free_pointer); POSIX_GUARD_PTR(kyber_pkey_ctx); POSIX_GUARD_OSSL(EVP_PKEY_CTX_kem_set_params(kyber_pkey_ctx, kem->kem_nid), S2N_ERR_PQ_CRYPTO); @@ -53,7 +52,6 @@ int s2n_kyber_evp_generate_keypair(IN const struct s2n_kem *kem, OUT uint8_t *pu int s2n_kyber_evp_encapsulate(IN const struct s2n_kem *kem, OUT uint8_t *ciphertext, OUT uint8_t *shared_secret, IN const uint8_t *public_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); DEFER_CLEANUP(EVP_PKEY *kyber_pkey = EVP_PKEY_kem_new_raw_public_key(kem->kem_nid, public_key, kem->public_key_length), EVP_PKEY_free_pointer); POSIX_GUARD_PTR(kyber_pkey); @@ -74,7 +72,6 @@ int s2n_kyber_evp_encapsulate(IN const struct s2n_kem *kem, OUT uint8_t *ciphert int s2n_kyber_evp_decapsulate(IN const struct s2n_kem *kem, OUT uint8_t *shared_secret, IN const uint8_t *ciphertext, IN const uint8_t *private_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); DEFER_CLEANUP(EVP_PKEY *kyber_pkey = EVP_PKEY_kem_new_raw_secret_key(kem->kem_nid, private_key, kem->private_key_length), EVP_PKEY_free_pointer); POSIX_GUARD_PTR(kyber_pkey); @@ -90,36 +87,24 @@ int s2n_kyber_evp_decapsulate(IN const struct s2n_kem *kem, OUT uint8_t *shared_ return S2N_SUCCESS; } -#elif !defined(S2N_NO_PQ) /* Use interned Kyber512 implementation, otherwise bail. */ +#else /* If !S2N_LIBCRYPTO_SUPPORTS_KYBER, we won't have a Kyber impl so define relevant stubs here. */ int s2n_kyber_evp_generate_keypair(IN const struct s2n_kem *kem, OUT uint8_t *public_key, OUT uint8_t *secret_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - if (kem == &s2n_kyber_512_r3) { - return s2n_kyber_512_r3_crypto_kem_keypair(kem, public_key, secret_key); - } - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); + POSIX_BAIL(S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); } int s2n_kyber_evp_encapsulate(IN const struct s2n_kem *kem, OUT uint8_t *ciphertext, OUT uint8_t *shared_secret, IN const uint8_t *public_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - if (kem == &s2n_kyber_512_r3) { - return s2n_kyber_512_r3_crypto_kem_enc(kem, ciphertext, shared_secret, public_key); - } - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); + POSIX_BAIL(S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); } int s2n_kyber_evp_decapsulate(IN const struct s2n_kem *kem, OUT uint8_t *shared_secret, IN const uint8_t *ciphertext, IN const uint8_t *secret_key) { - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - if (kem == &s2n_kyber_512_r3) { - return s2n_kyber_512_r3_crypto_kem_dec(kem, shared_secret, ciphertext, secret_key); - } - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); + POSIX_BAIL(S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); } #endif diff --git a/pq-crypto/s2n_kyber_evp.h b/crypto/s2n_kyber_evp.h similarity index 100% rename from pq-crypto/s2n_kyber_evp.h rename to crypto/s2n_kyber_evp.h diff --git a/pq-crypto/s2n_pq_random.h b/crypto/s2n_pq.c similarity index 58% rename from pq-crypto/s2n_pq_random.h rename to crypto/s2n_pq.c index 04dcd9cd93d..2f00663142d 100644 --- a/pq-crypto/s2n_pq_random.h +++ b/crypto/s2n_pq.c @@ -13,11 +13,23 @@ * permissions and limitations under the License. */ -#pragma once +#include "s2n_pq.h" -#include "utils/s2n_result.h" +#include "crypto/s2n_openssl.h" -typedef S2N_RESULT (*s2n_get_random_bytes_callback)(uint8_t *buffer, uint32_t num_bytes); +bool s2n_libcrypto_supports_kyber() +{ + /* S2N_LIBCRYPTO_SUPPORTS_KYBER will be auto-detected and #defined if + * ./tests/features/S2N_LIBCRYPTO_SUPPORTS_KYBER.c successfully compiles + */ +#if defined(S2N_LIBCRYPTO_SUPPORTS_KYBER) + return true; +#else + return false; +#endif +} -S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes); -S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback); +bool s2n_pq_is_enabled() +{ + return s2n_libcrypto_supports_kyber(); +} diff --git a/pq-crypto/s2n_pq.h b/crypto/s2n_pq.h similarity index 77% rename from pq-crypto/s2n_pq.h rename to crypto/s2n_pq.h index edba33138f9..cd7b59ffa4b 100644 --- a/pq-crypto/s2n_pq.h +++ b/crypto/s2n_pq.h @@ -18,14 +18,8 @@ #include #include "crypto/s2n_fips.h" -#include "pq-crypto/s2n_pq_asm.h" #include "utils/s2n_result.h" #include "utils/s2n_safety.h" -bool s2n_kyber512r3_is_avx2_bmi2_enabled(void); -S2N_RESULT s2n_try_enable_kyber512r3_opt_avx2_bmi2(void); -S2N_RESULT s2n_disable_kyber512r3_opt_avx2_bmi2(void); - bool s2n_pq_is_enabled(void); bool s2n_libcrypto_supports_kyber(void); -S2N_RESULT s2n_pq_init(void); diff --git a/error/s2n_errno.c b/error/s2n_errno.c index fbe52416b5a..f71c734aac2 100644 --- a/error/s2n_errno.c +++ b/error/s2n_errno.c @@ -263,7 +263,6 @@ static const char *no_such_error = "Internal s2n error"; ERR_ENTRY(S2N_ERR_INVALID_STATE, "Invalid state, this is the result of invalid use of an API. Check the API documentation for the function that raised this error for more info") \ ERR_ENTRY(S2N_ERR_UNSUPPORTED_WITH_QUIC, "Functionality not supported when running with QUIC support enabled") \ ERR_ENTRY(S2N_ERR_PQ_CRYPTO, "An error occurred in a post-quantum crypto function") \ - ERR_ENTRY(S2N_ERR_PQ_DISABLED, "Post-quantum crypto is disabled") \ ERR_ENTRY(S2N_ERR_DUPLICATE_PSK_IDENTITIES, "The list of pre-shared keys provided contains duplicate psk identities") \ ERR_ENTRY(S2N_ERR_OFFERED_PSKS_TOO_LONG, "The total pre-shared key data is too long to send over the wire") \ ERR_ENTRY(S2N_ERR_INVALID_SESSION_TICKET, "Session ticket data is not valid") \ diff --git a/error/s2n_errno.h b/error/s2n_errno.h index 1ff5070717e..c51881fb12e 100644 --- a/error/s2n_errno.h +++ b/error/s2n_errno.h @@ -222,7 +222,6 @@ typedef enum { S2N_ERR_ASYNC_CALLBACK_FAILED, S2N_ERR_ASYNC_MORE_THAN_ONE, S2N_ERR_PQ_CRYPTO, - S2N_ERR_PQ_DISABLED, S2N_ERR_INVALID_CERT_STATE, S2N_ERR_INVALID_EARLY_DATA_STATE, S2N_ERR_PKEY_CTX_INIT, diff --git a/lib/Makefile b/lib/Makefile index e8c66ff5369..bea08e291b0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ # permissions and limitations under the License. # -OBJS = $(wildcard ../utils/*.o ../stuffer/*.o ../tls/*.o ../tls/*/*.o ../iana/*.o ../crypto/*.o ../error/*.o ../pq-crypto/*.o ../pq-crypto/kyber_r3/*.o) +OBJS = $(wildcard ../utils/*.o ../stuffer/*.o ../tls/*.o ../tls/*/*.o ../iana/*.o ../crypto/*.o ../error/*.o) .PHONY : all all: libs2n.a libs2n.so libs2n.dylib diff --git a/pq-crypto/Makefile b/pq-crypto/Makefile deleted file mode 100644 index d0a95386ab2..00000000000 --- a/pq-crypto/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. -# -SRCS=$(wildcard *.c) -OBJS=$(SRCS:.c=.o) - -BCS_11=s2n_pq_random.bc s2n_pq.bc -BCS1=$(addprefix $(BITCODE_DIR), $(BCS_11)) - -ifdef S2N_NO_PQ -# If S2N_NO_PQ is defined, we build only the top-level pq-crypto directory, and exclude all the PQ crypto code -S2N_NO_PQ_ASM = 1 -.PHONY : all -all: $(OBJS) -else -# Explicitly specify `try_include_asm` target for KEMs that attempt to include assembly -.PHONY : all -all: $(OBJS) - $(MAKE) -C kyber_r3 -endif - -.PHONY : run-lcov -run-lcov: lcov - $(MAKE) -C kyber_r3 lcov - -.PHONY : clean -clean: decruft - $(MAKE) -C kyber_r3 decruft - -include ../s2n.mk diff --git a/pq-crypto/README.md b/pq-crypto/README.md deleted file mode 100644 index 3e1c0d05436..00000000000 --- a/pq-crypto/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# Post-quantum cryptography for s2n -This directory contains code for new post-quantum key exchange mechanisms. There are no known computationally feasible -attacks (classical or quantum) against these algorithms when used with the recommended key lengths. - -## Quantum computers -Quantum computers use the properties of quantum mechanics to evaluate quantum algorithms. These algorithms can solve some -classically hard (exponential time) problems quickly (polynomial time). Shor's algorithm is one such algorithm which can -factor large integers, thus breaking RSA encryption and digital signature, and another quantum algorithm can solve the -discrete logarithm problem over arbitrary groups thus breaking Diffie–Hellman and elliptic curve Diffie–Hellman key -exchange. - -## Post-quantum cryptography -Post-quantum public-key cryptographic algorithms run on a classical computer and are conjectured secure against both -classical and quantum attacks. NIST is in the process of reviewing submissions and standardizing them, -see more info on the [NIST website](https://csrc.nist.gov/Projects/Post-Quantum-Cryptography/Post-Quantum-Cryptography-Standardization). -Until the review and standardization is complete the post-quantum key exchanges in s2n **must not** be used for key -establishment by themselves. Instead they should only be used as part of a hybrid key exchange, which combines a -post-quantum key exchange scheme and a classical key exchange scheme. - -## Hybrid key exchange -A hybrid key exchange combines both the high assurance of classical key exchange with the conjectured quantum-resistance -of newly proposed key exchanges. For hybrid TLS 1.2, s2n implements the hybrid specification from [this RFC](https://tools.ietf.org/html/draft-campagna-tls-bike-sike-hybrid-01). -See [this s2n issue](https://github.com/awslabs/s2n/issues/904) for more up-to-date information. For hybrid TLS 1.3, s2n -implements the hybrid specification from [this draft RFC](https://tools.ietf.org/html/draft-stebila-tls-hybrid-design). -See also [this doc](https://docs.google.com/spreadsheets/d/12YarzaNv3XQNLnvDsWLlRKwtZFhRrDdWf36YlzwrPeg/edit#gid=0) that -defines hybrid group values for interoperability. - -## How to disable optimized assembly code for PQ Crypto -Certain post-quantum KEM algorithms included in s2n use optimized assembly code for efficient computation. When compiling s2n on compatible toolchains, -the optimized assembly code will significantly improve performance of the post-quantum cryptographic operations. s2n attempts to detect whether or not -the architecture is compatible with the assembly code, and falls back to the portable C implementation if it detects incompatibility. However, some users -may wish to manually force s2n to use the portable C implementation. To do so, simply `export S2N_NO_PQ_ASM=1` as an environment variable before compiling. - -## How to disable all PQ Crypto -Users may have need to compile s2n without any PQ crypto support whatsoever. To so do, `export S2N_NO_PQ=1` as an environment -variable before compiling. - -## How to add a new PQ KEM family for use in hybrid TLS 1.2 -1. Add the code to `pq-crypto/KEM_NAME/` - 1. Update `pq-crypto/Makefile` to build that directory - 1. Update `lib/Makefile` to also include that directory - 1. Update the KEM code to include `pq-crypto/s2n_pq_random.h` and use the function `s2n_get_random_bytes` for any random data the KEM needs - 1. Create a `pq-crypto/KEM_NAME/KEM_NAME.h` with the size of objects and method definitions -1. Define the new cipher suite value and KEM extension value in `tls/s2n_tls_parameters.h` -1. Create the `KEM_NAME` `s2n_kem` struct in `tls/s2n_kem.c` - 1. Create the `supported_KEM_NAME_params` array in `tls/s2n_kem.c` - 1. Add the new kem to the `kem_mapping` with the correct cipher suite value -1. Add known answer tests using `s2n_test_kem_with_kat()` in `tests/unit/s2n_KEM_NAME_kat_test.c` -1. Add fuzz testing in `tests/fuzz/s2n_KEM_NAME_fuzz_test.c` -1. Add formal verification in `tests/saw/KEM_NAME/verify.saw` -1. Create a new `s2n_cipher_suite` in `tls/s2n_cipher_suites.c` -1. Create a new `s2n_cipher_preferences` in `tls/s2n_cipher_preferences.c` that uses the new cipher suite - 1. Once this change is made, the KEM will be available for use in TLS handshakes; ensure that all testing/verification has been completed - -## How to add a new variant to an existing PQ KEM family for use in hybrid TLS 1.2 -1. Add the code to `pq-crypto/KEM_NAME/` - 1. Update `pq-crypto/Makefile` to build that directory - 1. Update `lib/Makefile` to also include that directory - 1. Update the KEM code to include `pq-crypto/s2n_pq_random.h` and use the function `s2n_get_random_bytes` for any random data the KEM needs - 1. Create a `pq-crypto/KEM_NAME/KEM_NAME.h` with the size of objects and method definitions -1. Define the KEM extension value in `tls/s2n_tls_parameters.h` -1. Create the `KEM_NAME` `s2n_kem` struct in `tls/s2n_kem.c` -1. Add known answer tests using `s2n_test_kem_with_kat()` in `tests/unit/s2n_KEM_NAME_kat_test.c` -1. Add fuzz testing in `tests/fuzz/s2n_KEM_NAME_fuzz_test.c` -1. Add formal verification in `tests/saw/KEM_NAME/verify.saw` -1. Update the appropriate `supported_KEM_NAME_params` array in `tls/s2n_kem.c` - 1. Once this change is made, the KEM extension will be available for use in TLS handshakes; ensure that all testing/verification has been completed - -## How to use PQ cipher suites for hybrid TLS 1.2 -1. Checkout s2n `git clone https://github.com/awslabs/s2n.git` -1. Following the docs/USAGE-GUIDE.md build s2n -1. Use the sample server and client in the bin directory: -```bash -# Terminal 1 -# Use the s2nd CLI tool to start a TLS daemon with the KMS-PQ-TLS-1-0-2019-06 cipher preferences listening on port 8888 -export PATH_TO_S2N=/path/to/s2n -export LD_LIBRARY_PATH=${PATH_TO_S2N}/test-deps/openssl-1.1.1/lib:${PATH_TO_S2N}/test-deps/openssl-1.1.1/lib:${PATH_TO_S2N}/lib:${PATH_TO_S2N}/bin -export PATH=${PATH_TO_S2N}/bin:$PATH -s2nd --cert ${PATH_TO_S2N}/tests/pems/rsa_2048_sha256_wildcard_cert.pem --key ${PATH_TO_S2N}/tests/pems/rsa_2048_sha256_wildcard_key.pem --negotiate --ciphers KMS-PQ-TLS-1-0-2019-06 0.0.0.0 8888 - -# Terminal 2 -# Use the s2nc TLS CLI client to connect to the TLS server daemon started in Terminal 1 on port 8888 -export PATH_TO_S2N=/path/to/s2n -export LD_LIBRARY_PATH=${PATH_TO_S2N}/test-deps/openssl-1.1.1/lib:${PATH_TO_S2N}/test-deps/openssl-1.1.1/lib:${PATH_TO_S2N}/lib:${PATH_TO_S2N}/bin -export PATH=${PATH_TO_S2N}/bin:$PATH -s2nc -i --ciphers KMS-PQ-TLS-1-0-2019-06 0.0.0.0 8888 -``` diff --git a/pq-crypto/kyber_r3/KeccakP-1600-times4-SIMD256_avx2.c b/pq-crypto/kyber_r3/KeccakP-1600-times4-SIMD256_avx2.c deleted file mode 100644 index 349442f65c5..00000000000 --- a/pq-crypto/kyber_r3/KeccakP-1600-times4-SIMD256_avx2.c +++ /dev/null @@ -1,1284 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include -#include -// extra headers are removed: smmintrin.h, wmmintrin.h and emmintrin.h - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include -#include "KeccakP-align_avx2.h" -#include "KeccakP-1600-times4-SnP_avx2.h" -#include "KeccakP-SIMD256-config_avx2.h" - -#include "KeccakP-brg_endian_avx2.h" -#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) -#error Expecting a little-endian platform -#endif - -typedef unsigned char UINT8; -typedef unsigned long long int UINT64; -typedef __m128i V128; -typedef __m256i V256; - -#define laneIndex(instanceIndex, lanePosition) ((lanePosition)*4 + instanceIndex) - -#if defined(KeccakP1600times4_useAVX2) - #define ANDnu256(a, b) _mm256_andnot_si256(a, b) - // correcting cast-align error - // old version: #define CONST256(a) _mm256_load_si256((const V256 *)&(a)) - #define CONST256(a) _mm256_load_si256((const void *)&(a)) - #define CONST256_64(a) (V256)_mm256_broadcast_sd((const double*)(&a)) - #define LOAD256(a) _mm256_load_si256((const V256 *)&(a)) - // correcting cast-align error - // old version: #define LOAD256u(a) _mm256_loadu_si256((const V256 *)&(a)) - #define LOAD256u(a) _mm256_loadu_si256((const void *)&(a)) - #define LOAD4_64(a, b, c, d) _mm256_set_epi64x((UINT64)(a), (UINT64)(b), (UINT64)(c), (UINT64)(d)) - #define ROL64in256(d, a, o) d = _mm256_or_si256(_mm256_slli_epi64(a, o), _mm256_srli_epi64(a, 64-(o))) - #define ROL64in256_8(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho8)) - #define ROL64in256_56(d, a) d = _mm256_shuffle_epi8(a, CONST256(rho56)) -static const UINT64 rho8[4] = {0x0605040302010007, 0x0E0D0C0B0A09080F, 0x1615141312111017, 0x1E1D1C1B1A19181F}; -static const UINT64 rho56[4] = {0x0007060504030201, 0x080F0E0D0C0B0A09, 0x1017161514131211, 0x181F1E1D1C1B1A19}; - #define STORE256(a, b) _mm256_store_si256((V256 *)&(a), b) - // correcting cast-align error - // old version: #define STORE256u(a, b) _mm256_storeu_si256((V256 *)&(a), b) - #define STORE256u(a, b) _mm256_storeu_si256((void *)&(a), b) - #define STORE2_128(ah, al, v) _mm256_storeu2_m128d((V128*)&(ah), (V128*)&(al), v) - #define XOR256(a, b) _mm256_xor_si256(a, b) - #define XOReq256(a, b) a = _mm256_xor_si256(a, b) - #define UNPACKL( a, b ) _mm256_unpacklo_epi64((a), (b)) - #define UNPACKH( a, b ) _mm256_unpackhi_epi64((a), (b)) - #define PERM128( a, b, c ) (V256)_mm256_permute2f128_ps((__m256)(a), (__m256)(b), c) - #define SHUFFLE64( a, b, c ) (V256)_mm256_shuffle_pd((__m256d)(a), (__m256d)(b), c) - - #define UNINTLEAVE() lanesL01 = UNPACKL( lanes0, lanes1 ), \ - lanesH01 = UNPACKH( lanes0, lanes1 ), \ - lanesL23 = UNPACKL( lanes2, lanes3 ), \ - lanesH23 = UNPACKH( lanes2, lanes3 ), \ - lanes0 = PERM128( lanesL01, lanesL23, 0x20 ), \ - lanes2 = PERM128( lanesL01, lanesL23, 0x31 ), \ - lanes1 = PERM128( lanesH01, lanesH23, 0x20 ), \ - lanes3 = PERM128( lanesH01, lanesH23, 0x31 ) - - #define INTLEAVE() lanesL01 = PERM128( lanes0, lanes2, 0x20 ), \ - lanesH01 = PERM128( lanes1, lanes3, 0x20 ), \ - lanesL23 = PERM128( lanes0, lanes2, 0x31 ), \ - lanesH23 = PERM128( lanes1, lanes3, 0x31 ), \ - lanes0 = SHUFFLE64( lanesL01, lanesH01, 0x00 ), \ - lanes1 = SHUFFLE64( lanesL01, lanesH01, 0x0F ), \ - lanes2 = SHUFFLE64( lanesL23, lanesH23, 0x00 ), \ - lanes3 = SHUFFLE64( lanesL23, lanesH23, 0x0F ) - -#endif - -#define SnP_laneLengthInBytes 8 - -void KeccakP1600times4_InitializeAll(void *states) -{ - memset(states, 0, KeccakP1600times4_statesSizeInBytes); -} - -void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length) -{ - unsigned int sizeLeft = length; - unsigned int lanePosition = offset/SnP_laneLengthInBytes; - unsigned int offsetInLane = offset%SnP_laneLengthInBytes; - const unsigned char *curData = data; - UINT64 *statesAsLanes = (UINT64 *)states; - - if ((sizeLeft > 0) && (offsetInLane != 0)) { - unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; - UINT64 lane = 0; - if (bytesInLane > sizeLeft) - bytesInLane = sizeLeft; - memcpy((unsigned char*)&lane + offsetInLane, curData, bytesInLane); - statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; - sizeLeft -= bytesInLane; - lanePosition++; - curData += bytesInLane; - } - - while(sizeLeft >= SnP_laneLengthInBytes) { - // correcting cast-align error - // old version: UINT64 lane = *((const UINT64*)curData); - UINT64 lane = *((const UINT64*)(const void *)curData); - statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; - sizeLeft -= SnP_laneLengthInBytes; - lanePosition++; - curData += SnP_laneLengthInBytes; - } - - if (sizeLeft > 0) { - UINT64 lane = 0; - memcpy(&lane, curData, sizeLeft); - statesAsLanes[laneIndex(instanceIndex, lanePosition)] ^= lane; - } -} - -void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) -{ - V256 *stateAsLanes = (V256 *)states; - unsigned int i; - // correcting cast-align errors - // old version: const UINT64 *curData0 = (const UINT64 *)data; - const UINT64 *curData0 = (const void *)data; - // old version: const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes); - const UINT64 *curData1 = (const void *)(data+laneOffset*SnP_laneLengthInBytes); - // old version: const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); - const UINT64 *curData2 = (const void *)(data+laneOffset*2*SnP_laneLengthInBytes); - // old version: const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); - const UINT64 *curData3 = (const void *)(data+laneOffset*3*SnP_laneLengthInBytes); - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - - #define Xor_In( argIndex ) XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - - #define Xor_In4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\ - lanes1 = LOAD256u( curData1[argIndex]),\ - lanes2 = LOAD256u( curData2[argIndex]),\ - lanes3 = LOAD256u( curData3[argIndex]),\ - INTLEAVE(),\ - XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ - XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ - XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ - XOReq256( stateAsLanes[argIndex+3], lanes3 ) - - if ( laneCount >= 16 ) { - Xor_In4( 0 ); - Xor_In4( 4 ); - Xor_In4( 8 ); - Xor_In4( 12 ); - if ( laneCount >= 20 ) { - Xor_In4( 16 ); - for(i=20; i 0) && (offsetInLane != 0)) { - unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; - if (bytesInLane > sizeLeft) - bytesInLane = sizeLeft; - memcpy( ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, curData, bytesInLane); - sizeLeft -= bytesInLane; - lanePosition++; - curData += bytesInLane; - } - - while(sizeLeft >= SnP_laneLengthInBytes) { - // correcting cast-align error - // old version: UINT64 lane = *((const UINT64*)curData); - UINT64 lane = *((const UINT64*)(const void*)curData); - statesAsLanes[laneIndex(instanceIndex, lanePosition)] = lane; - sizeLeft -= SnP_laneLengthInBytes; - lanePosition++; - curData += SnP_laneLengthInBytes; - } - - if (sizeLeft > 0) { - memcpy(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], curData, sizeLeft); - } -} - -void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset) -{ - V256 *stateAsLanes = (V256 *)states; - unsigned int i; - // correcting cast-align errors - // old version: const UINT64 *curData0 = (const UINT64 *)data; - const UINT64 *curData0 = (const void *)data; - // old version: const UINT64 *curData1 = (const UINT64 *)(data+laneOffset*SnP_laneLengthInBytes); - const UINT64 *curData1 = (const void *)(data+laneOffset*SnP_laneLengthInBytes); - // old version: const UINT64 *curData2 = (const UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); - const UINT64 *curData2 = (const void *)(data+laneOffset*2*SnP_laneLengthInBytes); - // old version: const UINT64 *curData3 = (const UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); - const UINT64 *curData3 = (const void *)(data+laneOffset*3*SnP_laneLengthInBytes); - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - - #define OverWr( argIndex ) STORE256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - - #define OverWr4( argIndex ) lanes0 = LOAD256u( curData0[argIndex]),\ - lanes1 = LOAD256u( curData1[argIndex]),\ - lanes2 = LOAD256u( curData2[argIndex]),\ - lanes3 = LOAD256u( curData3[argIndex]),\ - INTLEAVE(),\ - STORE256( stateAsLanes[argIndex+0], lanes0 ),\ - STORE256( stateAsLanes[argIndex+1], lanes1 ),\ - STORE256( stateAsLanes[argIndex+2], lanes2 ),\ - STORE256( stateAsLanes[argIndex+3], lanes3 ) - - if ( laneCount >= 16 ) { - OverWr4( 0 ); - OverWr4( 4 ); - OverWr4( 8 ); - OverWr4( 12 ); - if ( laneCount >= 20 ) { - OverWr4( 16 ); - for(i=20; i= SnP_laneLengthInBytes) { - statesAsLanes[laneIndex(instanceIndex, lanePosition)] = 0; - sizeLeft -= SnP_laneLengthInBytes; - lanePosition++; - } - - if (sizeLeft > 0) { - memset(&statesAsLanes[laneIndex(instanceIndex, lanePosition)], 0, sizeLeft); - } -} - -void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length) -{ - unsigned int sizeLeft = length; - unsigned int lanePosition = offset/SnP_laneLengthInBytes; - unsigned int offsetInLane = offset%SnP_laneLengthInBytes; - unsigned char *curData = data; - const UINT64 *statesAsLanes = (const UINT64 *)states; - - if ((sizeLeft > 0) && (offsetInLane != 0)) { - unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; - if (bytesInLane > sizeLeft) - bytesInLane = sizeLeft; - // correcting cast-qual error - // old version: memcpy( curData, ((unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane); - memcpy( curData, ((const unsigned char *)&statesAsLanes[laneIndex(instanceIndex, lanePosition)]) + offsetInLane, bytesInLane); - sizeLeft -= bytesInLane; - lanePosition++; - curData += bytesInLane; - } - - while(sizeLeft >= SnP_laneLengthInBytes) { - // correcting cast-align error - // old version: *(UINT64*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)]; - *(UINT64*)(void*)curData = statesAsLanes[laneIndex(instanceIndex, lanePosition)]; - sizeLeft -= SnP_laneLengthInBytes; - lanePosition++; - curData += SnP_laneLengthInBytes; - } - - if (sizeLeft > 0) { - memcpy( curData, &statesAsLanes[laneIndex(instanceIndex, lanePosition)], sizeLeft); - } -} - -void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset) -{ - // correcting cast-align errors - // old version: UINT64 *curData0 = (UINT64 *)data; - UINT64 *curData0 = (void *)data; - // old version: UINT64 *curData1 = (UINT64 *)(data+laneOffset*1*SnP_laneLengthInBytes); - UINT64 *curData1 = (void *)(data+laneOffset*1*SnP_laneLengthInBytes); - // old version: UINT64 *curData2 = (UINT64 *)(data+laneOffset*2*SnP_laneLengthInBytes); - UINT64 *curData2 = (void *)(data+laneOffset*2*SnP_laneLengthInBytes); - // old version: UINT64 *curData3 = (UINT64 *)(data+laneOffset*3*SnP_laneLengthInBytes); - UINT64 *curData3 = (void *)(data+laneOffset*3*SnP_laneLengthInBytes); - - const V256 *stateAsLanes = (const V256 *)states; - const UINT64 *stateAsLanes64 = (const UINT64*)states; - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - unsigned int i; - - #define Extr( argIndex ) curData0[argIndex] = stateAsLanes64[4*(argIndex)], \ - curData1[argIndex] = stateAsLanes64[4*(argIndex)+1], \ - curData2[argIndex] = stateAsLanes64[4*(argIndex)+2], \ - curData3[argIndex] = stateAsLanes64[4*(argIndex)+3] - - #define Extr4( argIndex ) lanes0 = LOAD256( stateAsLanes[argIndex+0] ), \ - lanes1 = LOAD256( stateAsLanes[argIndex+1] ), \ - lanes2 = LOAD256( stateAsLanes[argIndex+2] ), \ - lanes3 = LOAD256( stateAsLanes[argIndex+3] ), \ - UNINTLEAVE(), \ - STORE256u( curData0[argIndex], lanes0 ), \ - STORE256u( curData1[argIndex], lanes1 ), \ - STORE256u( curData2[argIndex], lanes2 ), \ - STORE256u( curData3[argIndex], lanes3 ) - - if ( laneCount >= 16 ) { - Extr4( 0 ); - Extr4( 4 ); - Extr4( 8 ); - Extr4( 12 ); - if ( laneCount >= 20 ) { - Extr4( 16 ); - for(i=20; i 0) && (offsetInLane != 0)) { - unsigned int bytesInLane = SnP_laneLengthInBytes - offsetInLane; - UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)] >> (8 * offsetInLane); - if (bytesInLane > sizeLeft) - bytesInLane = sizeLeft; - sizeLeft -= bytesInLane; - do { - *(curOutput++) = *(curInput++) ^ (unsigned char)lane; - lane >>= 8; - } while ( --bytesInLane != 0); - lanePosition++; - } - - while(sizeLeft >= SnP_laneLengthInBytes) { - // correcting cast-align and cast-qual errors - // old version: *((UINT64*)curOutput) = *((UINT64*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)]; - *((UINT64*)(void*)curOutput) = *((const UINT64*)(const void*)curInput) ^ statesAsLanes[laneIndex(instanceIndex, lanePosition)]; - sizeLeft -= SnP_laneLengthInBytes; - lanePosition++; - curInput += SnP_laneLengthInBytes; - curOutput += SnP_laneLengthInBytes; - } - - if (sizeLeft != 0) { - UINT64 lane = statesAsLanes[laneIndex(instanceIndex, lanePosition)]; - do { - *(curOutput++) = *(curInput++) ^ (unsigned char)lane; - lane >>= 8; - } while ( --sizeLeft != 0); - } -} - -void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset) -{ - // correcting cast-align and cast-qual errors - // old version: const UINT64 *curInput0 = (UINT64 *)input; - const UINT64 *curInput0 = (const void *)input; - // old version: const UINT64 *curInput1 = (UINT64 *)(input+laneOffset*1*SnP_laneLengthInBytes); - const UINT64 *curInput1 = (const void *)(input+laneOffset*1*SnP_laneLengthInBytes); - // old version: const UINT64 *curInput2 = (UINT64 *)(input+laneOffset*2*SnP_laneLengthInBytes); - const UINT64 *curInput2 = (const void *)(input+laneOffset*2*SnP_laneLengthInBytes); - // old version: const UINT64 *curInput3 = (UINT64 *)(input+laneOffset*3*SnP_laneLengthInBytes); - const UINT64 *curInput3 = (const void *)(input+laneOffset*3*SnP_laneLengthInBytes); - // correcting cast-align errors - // old version: UINT64 *curOutput0 = (UINT64 *)output; - UINT64 *curOutput0 = (void *)output; - // old version: UINT64 *curOutput1 = (UINT64 *)(output+laneOffset*1*SnP_laneLengthInBytes); - UINT64 *curOutput1 = (void *)(output+laneOffset*1*SnP_laneLengthInBytes); - // old version: UUINT64 *curOutput2 = (UINT64 *)(output+laneOffset*2*SnP_laneLengthInBytes); - UINT64 *curOutput2 = (void *)(output+laneOffset*2*SnP_laneLengthInBytes); - // old version: UINT64 *curOutput3 = (UINT64 *)(output+laneOffset*3*SnP_laneLengthInBytes); - UINT64 *curOutput3 = (void *)(output+laneOffset*3*SnP_laneLengthInBytes); - - const V256 *stateAsLanes = (const V256 *)states; - const UINT64 *stateAsLanes64 = (const UINT64*)states; - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - unsigned int i; - - #define ExtrXor( argIndex ) \ - curOutput0[argIndex] = curInput0[argIndex] ^ stateAsLanes64[4*(argIndex)],\ - curOutput1[argIndex] = curInput1[argIndex] ^ stateAsLanes64[4*(argIndex)+1],\ - curOutput2[argIndex] = curInput2[argIndex] ^ stateAsLanes64[4*(argIndex)+2],\ - curOutput3[argIndex] = curInput3[argIndex] ^ stateAsLanes64[4*(argIndex)+3] - - #define ExtrXor4( argIndex ) \ - lanes0 = LOAD256( stateAsLanes[argIndex+0] ),\ - lanes1 = LOAD256( stateAsLanes[argIndex+1] ),\ - lanes2 = LOAD256( stateAsLanes[argIndex+2] ),\ - lanes3 = LOAD256( stateAsLanes[argIndex+3] ),\ - UNINTLEAVE(),\ - lanesL01 = LOAD256u( curInput0[argIndex]),\ - lanesH01 = LOAD256u( curInput1[argIndex]),\ - lanesL23 = LOAD256u( curInput2[argIndex]),\ - lanesH23 = LOAD256u( curInput3[argIndex]),\ - XOReq256( lanes0, lanesL01 ),\ - XOReq256( lanes1, lanesH01 ),\ - XOReq256( lanes2, lanesL23 ),\ - XOReq256( lanes3, lanesH23 ),\ - STORE256u( curOutput0[argIndex], lanes0 ),\ - STORE256u( curOutput1[argIndex], lanes1 ),\ - STORE256u( curOutput2[argIndex], lanes2 ),\ - STORE256u( curOutput3[argIndex], lanes3 ) - - if ( laneCount >= 16 ) { - ExtrXor4( 0 ); - ExtrXor4( 4 ); - ExtrXor4( 8 ); - ExtrXor4( 12 ); - if ( laneCount >= 20 ) { - ExtrXor4( 16 ); - for(i=20; i= (laneOffsetParallel*3 + laneCount)*8) { - V256 *stateAsLanes = (V256 *)states; - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - #define Xor_In( argIndex ) \ - XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - #define Xor_In4( argIndex ) \ - lanes0 = LOAD256u( curData0[argIndex]),\ - lanes1 = LOAD256u( curData1[argIndex]),\ - lanes2 = LOAD256u( curData2[argIndex]),\ - lanes3 = LOAD256u( curData3[argIndex]),\ - INTLEAVE(),\ - XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ - XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ - XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ - XOReq256( stateAsLanes[argIndex+3], lanes3 ) - Xor_In4( 0 ); - Xor_In4( 4 ); - Xor_In4( 8 ); - Xor_In4( 12 ); - Xor_In4( 16 ); - Xor_In( 20 ); - #undef Xor_In - #undef Xor_In4 - KeccakP1600times4_PermuteAll_24rounds(states); - curData0 += laneOffsetSerial; - curData1 += laneOffsetSerial; - curData2 += laneOffsetSerial; - curData3 += laneOffsetSerial; - dataByteLen -= laneOffsetSerial*8; - } - return (const unsigned char *)curData0 - dataStart; -#else -// unsigned int i; - const unsigned char *dataStart = data; - // correcting cast-align errors - // old version: const UINT64 *curData0 = (const UINT64 *)data; - const UINT64 *curData0 = (const void *)data; - // old version: const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); - const UINT64 *curData1 = (const void *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); - // old version: const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); - const UINT64 *curData2 = (const void *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); - // old version: const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); - const UINT64 *curData3 = (const void *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); - V256 *statesAsLanes = (V256 *)states; - declareABCDE - - copyFromState(A, statesAsLanes) - while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { - #define XOR_In( Xxx, argIndex ) \ - XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - XOR_In( Aba, 0 ); - XOR_In( Abe, 1 ); - XOR_In( Abi, 2 ); - XOR_In( Abo, 3 ); - XOR_In( Abu, 4 ); - XOR_In( Aga, 5 ); - XOR_In( Age, 6 ); - XOR_In( Agi, 7 ); - XOR_In( Ago, 8 ); - XOR_In( Agu, 9 ); - XOR_In( Aka, 10 ); - XOR_In( Ake, 11 ); - XOR_In( Aki, 12 ); - XOR_In( Ako, 13 ); - XOR_In( Aku, 14 ); - XOR_In( Ama, 15 ); - XOR_In( Ame, 16 ); - XOR_In( Ami, 17 ); - XOR_In( Amo, 18 ); - XOR_In( Amu, 19 ); - XOR_In( Asa, 20 ); - #undef XOR_In - rounds24 - curData0 += laneOffsetSerial; - curData1 += laneOffsetSerial; - curData2 += laneOffsetSerial; - curData3 += laneOffsetSerial; - dataByteLen -= laneOffsetSerial*8; - } - copyToState(statesAsLanes, A) - return (const unsigned char *)curData0 - dataStart; -#endif - } - else { -// unsigned int i; - const unsigned char *dataStart = data; - - while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { - KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel); - KeccakP1600times4_PermuteAll_24rounds(states); - data += laneOffsetSerial*8; - dataByteLen -= laneOffsetSerial*8; - } - return data - dataStart; - } -} - -size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen) -{ - if (laneCount == 21) { -#if 0 - const unsigned char *dataStart = data; - const UINT64 *curData0 = (const UINT64 *)data; - const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); - const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); - const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); - - while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { - V256 *stateAsLanes = states; - V256 lanes0, lanes1, lanes2, lanes3, lanesL01, lanesL23, lanesH01, lanesH23; - #define Xor_In( argIndex ) \ - XOReq256(stateAsLanes[argIndex], LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - #define Xor_In4( argIndex ) \ - lanes0 = LOAD256u( curData0[argIndex]),\ - lanes1 = LOAD256u( curData1[argIndex]),\ - lanes2 = LOAD256u( curData2[argIndex]),\ - lanes3 = LOAD256u( curData3[argIndex]),\ - INTLEAVE(),\ - XOReq256( stateAsLanes[argIndex+0], lanes0 ),\ - XOReq256( stateAsLanes[argIndex+1], lanes1 ),\ - XOReq256( stateAsLanes[argIndex+2], lanes2 ),\ - XOReq256( stateAsLanes[argIndex+3], lanes3 ) - Xor_In4( 0 ); - Xor_In4( 4 ); - Xor_In4( 8 ); - Xor_In4( 12 ); - Xor_In4( 16 ); - Xor_In( 20 ); - #undef Xor_In - #undef Xor_In4 - KeccakP1600times4_PermuteAll_12rounds(states); - curData0 += laneOffsetSerial; - curData1 += laneOffsetSerial; - curData2 += laneOffsetSerial; - curData3 += laneOffsetSerial; - dataByteLen -= laneOffsetSerial*8; - } - return (const unsigned char *)curData0 - dataStart; -#else -// unsigned int i; - const unsigned char *dataStart = data; - // correcting cast-align errors - // old version: const UINT64 *curData0 = (const UINT64 *)data; - const UINT64 *curData0 = (const void *)data; - // old version: const UINT64 *curData1 = (const UINT64 *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); - const UINT64 *curData1 = (const void *)(data+laneOffsetParallel*1*SnP_laneLengthInBytes); - // old version: const UINT64 *curData2 = (const UINT64 *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); - const UINT64 *curData2 = (const void *)(data+laneOffsetParallel*2*SnP_laneLengthInBytes); - // old version: const UINT64 *curData3 = (const UINT64 *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); - const UINT64 *curData3 = (const void *)(data+laneOffsetParallel*3*SnP_laneLengthInBytes); - V256 *statesAsLanes = states; - declareABCDE - - copyFromState(A, statesAsLanes) - while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { - #define XOR_In( Xxx, argIndex ) \ - XOReq256(Xxx, LOAD4_64(curData3[argIndex], curData2[argIndex], curData1[argIndex], curData0[argIndex])) - XOR_In( Aba, 0 ); - XOR_In( Abe, 1 ); - XOR_In( Abi, 2 ); - XOR_In( Abo, 3 ); - XOR_In( Abu, 4 ); - XOR_In( Aga, 5 ); - XOR_In( Age, 6 ); - XOR_In( Agi, 7 ); - XOR_In( Ago, 8 ); - XOR_In( Agu, 9 ); - XOR_In( Aka, 10 ); - XOR_In( Ake, 11 ); - XOR_In( Aki, 12 ); - XOR_In( Ako, 13 ); - XOR_In( Aku, 14 ); - XOR_In( Ama, 15 ); - XOR_In( Ame, 16 ); - XOR_In( Ami, 17 ); - XOR_In( Amo, 18 ); - XOR_In( Amu, 19 ); - XOR_In( Asa, 20 ); - #undef XOR_In - rounds12 - curData0 += laneOffsetSerial; - curData1 += laneOffsetSerial; - curData2 += laneOffsetSerial; - curData3 += laneOffsetSerial; - dataByteLen -= laneOffsetSerial*8; - } - copyToState(statesAsLanes, A) - return (const unsigned char *)curData0 - dataStart; -#endif - } - else { -// unsigned int i; - const unsigned char *dataStart = data; - - while(dataByteLen >= (laneOffsetParallel*3 + laneCount)*8) { - KeccakP1600times4_AddLanesAll(states, data, laneCount, laneOffsetParallel); - KeccakP1600times4_PermuteAll_12rounds(states); - data += laneOffsetSerial*8; - dataByteLen -= laneOffsetSerial*8; - } - return data - dataStart; - } -} -#endif diff --git a/pq-crypto/kyber_r3/KeccakP-1600-times4-SnP_avx2.h b/pq-crypto/kyber_r3/KeccakP-1600-times4-SnP_avx2.h deleted file mode 100644 index 2640191779b..00000000000 --- a/pq-crypto/kyber_r3/KeccakP-1600-times4-SnP_avx2.h +++ /dev/null @@ -1,63 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#pragma once - -/** For the documentation, see PlSnP-documentation.h. - */ - -#include "KeccakP-SIMD256-config_avx2.h" -#include "kyber512r3_params.h" -#include "kyber512r3_fips202x4_avx2.h" - -#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")" -#define KeccakP1600times4_statesSizeInBytes 800 -#define KeccakP1600times4_statesAlignment 32 -#define KeccakF1600times4_FastLoop_supported -#define KeccakP1600times4_12rounds_FastLoop_supported - -#include - -#define KeccakP1600times4_StaticInitialize() -#define KeccakP1600times4_InitializeAll S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_InitializeAll) -void KeccakP1600times4_InitializeAll(void *states); -#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \ - ((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte) -#define KeccakP1600times4_AddBytes S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_AddBytes) -void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); -#define KeccakP1600times4_AddLanesAll S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_AddLanesAll) -void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); -#define KeccakP1600times4_OverwriteBytes S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_OverwriteBytes) -void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length); -#define KeccakP1600times4_OverwriteLanesAll S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_OverwriteLanesAll) -void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset); -#define KeccakP1600times4_OverwriteWithZeroes S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_OverwriteWithZeroes) -void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount); -#define KeccakP1600times4_PermuteAll_12rounds S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_PermuteAll_12rounds) -void KeccakP1600times4_PermuteAll_12rounds(void *states); -#define KeccakP1600times4_PermuteAll_24rounds S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds) -void KeccakP1600times4_PermuteAll_24rounds(void *states); -#define KeccakP1600times4_ExtractBytes S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_ExtractBytes) -void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length); -#define KeccakP1600times4_ExtractLanesAll S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_ExtractLanesAll) -void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset); -#define KeccakP1600times4_ExtractAndAddBytes S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_ExtractAndAddBytes) -void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); -#define KeccakP1600times4_ExtractAndAddLanesAll S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_ExtractAndAddLanesAll) -void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset); -#define KeccakF1600times4_FastLoop_Absorb S2N_KYBER_512_R3_NAMESPACE(KeccakF1600times4_FastLoop_Absorb) -size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); -#define KeccakP1600times4_12rounds_FastLoop_Absorb S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_12rounds_FastLoop_Absorb) -size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen); diff --git a/pq-crypto/kyber_r3/KeccakP-SIMD256-config_avx2.h b/pq-crypto/kyber_r3/KeccakP-SIMD256-config_avx2.h deleted file mode 100644 index 1c65fe29b49..00000000000 --- a/pq-crypto/kyber_r3/KeccakP-SIMD256-config_avx2.h +++ /dev/null @@ -1,3 +0,0 @@ -#define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled" -#define KeccakP1600times4_fullUnrolling -#define KeccakP1600times4_useAVX2 diff --git a/pq-crypto/kyber_r3/KeccakP-align_avx2.h b/pq-crypto/kyber_r3/KeccakP-align_avx2.h deleted file mode 100644 index be08e84af21..00000000000 --- a/pq-crypto/kyber_r3/KeccakP-align_avx2.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, -Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby -denoted as "the implementer". - -For more information, feedback or questions, please refer to our websites: -http://keccak.noekeon.org/ -http://keyak.noekeon.org/ -http://ketje.noekeon.org/ - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#pragma once - -/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ -#ifdef ALIGN -#undef ALIGN -#endif - -#if defined(__GNUC__) -#define ALIGN(x) __attribute__ ((aligned(x))) -#elif defined(_MSC_VER) -#define ALIGN(x) __declspec(align(x)) -#elif defined(__ARMCC_VERSION) -#define ALIGN(x) __align(x) -#else -#define ALIGN(x) -#endif diff --git a/pq-crypto/kyber_r3/KeccakP-brg_endian_avx2.h b/pq-crypto/kyber_r3/KeccakP-brg_endian_avx2.h deleted file mode 100644 index 8e8b73cf2a2..00000000000 --- a/pq-crypto/kyber_r3/KeccakP-brg_endian_avx2.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - --------------------------------------------------------------------------- - Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. - - LICENSE TERMS - - The redistribution and use of this software (with or without changes) - is allowed without the payment of fees or royalties provided that: - - 1. source code distributions include the above copyright notice, this - list of conditions and the following disclaimer; - - 2. binary distributions include the above copyright notice, this list - of conditions and the following disclaimer in their documentation; - - 3. the name of the copyright holder is not used to endorse products - built using this software without specific written permission. - - DISCLAIMER - - This software is provided 'as is' with no explicit or implied warranties - in respect of its properties, including, but not limited to, correctness - and/or fitness for purpose. - --------------------------------------------------------------------------- - Issue Date: 20/12/2007 - Changes for ARM 9/9/2010 -*/ - -#pragma once - -#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ -#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ - -#if 0 -/* Include files where endian defines and byteswap functions may reside */ -#if defined( __sun ) -# include -#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) -# include -#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ - defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) -# include -#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) -# if !defined( __MINGW32__ ) && !defined( _AIX ) -# include -# if !defined( __BEOS__ ) -# include -# endif -# endif -#endif -#endif - -/* Now attempt to set the define for platform byte order using any */ -/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ -/* seem to encompass most endian symbol definitions */ - -#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) -# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -# endif -#elif defined( BIG_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#elif defined( LITTLE_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) -# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -# endif -#elif defined( _BIG_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#elif defined( _LITTLE_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) -# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -# endif -#elif defined( __BIG_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#elif defined( __LITTLE_ENDIAN ) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) -# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -# endif -#elif defined( __BIG_ENDIAN__ ) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#elif defined( __LITTLE_ENDIAN__ ) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#endif - -/* if the platform byte order could not be determined, then try to */ -/* set this define using common machine defines */ -#if !defined(PLATFORM_BYTE_ORDER) - -#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ - defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ - defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ - defined( vax ) || defined( vms ) || defined( VMS ) || \ - defined( __VMS ) || defined( _M_X64 ) -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN - -#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ - defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ - defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ - defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ - defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ - defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ - defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN - -#elif defined(__arm__) -# ifdef __BIG_ENDIAN -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -# else -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -# endif -#elif 1 /* **** EDIT HERE IF NECESSARY **** */ -# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN -#elif 0 /* **** EDIT HERE IF NECESSARY **** */ -# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN -#else -# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order -#endif - -#endif diff --git a/pq-crypto/kyber_r3/Makefile b/pq-crypto/kyber_r3/Makefile deleted file mode 100644 index 426551785e6..00000000000 --- a/pq-crypto/kyber_r3/Makefile +++ /dev/null @@ -1,79 +0,0 @@ -# -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. -# - -.DEFAULT_GOAL := all - -include ../../s2n.mk -include ../s2n_pq_asm.mk - -# Kyber Round-3 code has several different optimizations -# which require specific compiler flags to be supported -# by the compiler. The flags are set in the s2n_pq_asm.mk -# file and used here to compile the optimized code files. - -SRCS=kyber512r3_cbd.c kyber512r3_fips202.c kyber512r3_indcpa.c kyber512r3_kem.c kyber512r3_ntt.c kyber512r3_poly.c kyber512r3_polyvec.c kyber512r3_reduce.c kyber512r3_symmetric-shake.c -OBJS=$(SRCS:.c=.o) - -AVX2_SRCS=kyber512r3_cbd_avx2.c kyber512r3_consts_avx2.c kyber512r3_fips202x4_avx2.c kyber512r3_indcpa_avx2.c kyber512r3_poly_avx2.c kyber512r3_polyvec_avx2.c kyber512r3_rejsample_avx2.c KeccakP-1600-times4-SIMD256_avx2.c -AVX2_ASM_SRCS=kyber512r3_basemul_avx2.S kyber512r3_fq_avx2.S kyber512r3_invntt_avx2.S kyber512r3_ntt_avx2.S kyber512r3_shuffle_avx2.S - - -ifeq ($(KYBER512R3_AVX2_BMI2_SUPPORTED), 0) - CFLAGS += -DS2N_KYBER512R3_AVX2_BMI2 - CFLAGS_LLVM += -DS2N_KYBER512R3_AVX2_BMI2 - - AVX2_OBJS=$(AVX2_SRCS:.c=.o) - AVX2_ASM_OBJS=$(AVX2_ASM_SRCS:.S=.o) - - KYBER512R3_AVX2_BMI2_FLAGS= -mavx2 -mbmi2 - $(AVX2_SRCS): CFLAGS += $(KYBER512R3_AVX2_BMI2_FLAGS) - $(AVX2_OBJS): CFLAGS += $(KYBER512R3_AVX2_BMI2_FLAGS) - $(AVX2_ASM_OBJS): CFLAGS += $(KYBER512R3_AVX2_BMI2_FLAGS) -endif - -#WA for GCC 4.8.5 bug. -CFLAGS += -Wno-missing-braces -Wno-missing-field-initializers -I../../ - -ifeq ($(KYBER512R3_AVX2_BMI2_SUPPORTED), 0) - .PHONY : all - all: $(OBJS) $(AVX2_OBJS) $(AVX2_ASM_OBJS) -else - .PHONY : all - all: $(OBJS) -endif - -CFLAGS_LLVM = -emit-llvm -c -g \ - -std=c99 -fgnu89-inline -D_POSIX_C_SOURCE=200809L -D_FORTIFY_SOURCE=2 \ - -I$(LIBCRYPTO_ROOT)/include/ -I../../api/ -I../../ - -BCS=$(addprefix $(BITCODE_DIR), $(SRCS:.c=.bc)) -ifeq ($(KYBER512R3_AVX2_BMI2_SUPPORTED), 0) - AVX2_BCS=$(addprefix $(BITCODE_DIR), $(AVX2_SRCS:.c=.bc)) - AVX2_ASM_BCS=$(addprefix $(BITCODE_DIR), $(AVX2_ASM_SRCS:.S=.bc)) -endif - -$(BCS): CFLAGS_LLVM += $(KYBER512R3_AVX2_BMI2_FLAGS) -ifeq ($(KYBER512R3_AVX2_BMI2_SUPPORTED), 0) - $(AVX2_BCS): CFLAGS_LLVM += $(KYBER512R3_AVX2_BMI2_FLAGS) - $(AVX2_ASM_BCS): CFLAGS_LLVM += $(KYBER512R3_AVX2_BMI2_FLAGS) -endif - -ifeq ($(KYBER512R3_AVX2_BMI2_SUPPORTED), 0) - .PHONY : bc - bc: $(BCS) $(AVX2_BCS) $(AVX2_ASM_BCS) -else - .PHONY : bc - bc: $(BCS) -endif diff --git a/pq-crypto/kyber_r3/kyber512r3_align_avx2.h b/pq-crypto/kyber_r3/kyber512r3_align_avx2.h deleted file mode 100644 index 79e6d9ec0c2..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_align_avx2.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define ALIGNED_UINT8(N) \ - union { \ - uint8_t coeffs[N]; \ - __m256i vec[(N+31)/32]; \ - } - -#define ALIGNED_INT16(N) \ - union { \ - int16_t coeffs[N]; \ - __m256i vec[(N+15)/16]; \ - } -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_basemul_avx2.S b/pq-crypto/kyber_r3/kyber512r3_basemul_avx2.S deleted file mode 100644 index ed2a65be203..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_basemul_avx2.S +++ /dev/null @@ -1,105 +0,0 @@ -#include "kyber512r3_consts_avx2.h" - -.macro schoolbook off -vmovdqa _16XQINV*2(%rcx),%ymm0 -vmovdqa (64*\off+ 0)*2(%rsi),%ymm1 # a0 -vmovdqa (64*\off+16)*2(%rsi),%ymm2 # b0 -vmovdqa (64*\off+32)*2(%rsi),%ymm3 # a1 -vmovdqa (64*\off+48)*2(%rsi),%ymm4 # b1 - -vpmullw %ymm0,%ymm1,%ymm9 # a0.lo -vpmullw %ymm0,%ymm2,%ymm10 # b0.lo -vpmullw %ymm0,%ymm3,%ymm11 # a1.lo -vpmullw %ymm0,%ymm4,%ymm12 # b1.lo - -vmovdqa (64*\off+ 0)*2(%rdx),%ymm5 # c0 -vmovdqa (64*\off+16)*2(%rdx),%ymm6 # d0 - -vpmulhw %ymm5,%ymm1,%ymm13 # a0c0.hi -vpmulhw %ymm6,%ymm1,%ymm1 # a0d0.hi -vpmulhw %ymm5,%ymm2,%ymm14 # b0c0.hi -vpmulhw %ymm6,%ymm2,%ymm2 # b0d0.hi - -vmovdqa (64*\off+32)*2(%rdx),%ymm7 # c1 -vmovdqa (64*\off+48)*2(%rdx),%ymm8 # d1 - -vpmulhw %ymm7,%ymm3,%ymm15 # a1c1.hi -vpmulhw %ymm8,%ymm3,%ymm3 # a1d1.hi -vpmulhw %ymm7,%ymm4,%ymm0 # b1c1.hi -vpmulhw %ymm8,%ymm4,%ymm4 # b1d1.hi - -vmovdqa %ymm13,(%rsp) - -vpmullw %ymm5,%ymm9,%ymm13 # a0c0.lo -vpmullw %ymm6,%ymm9,%ymm9 # a0d0.lo -vpmullw %ymm5,%ymm10,%ymm5 # b0c0.lo -vpmullw %ymm6,%ymm10,%ymm10 # b0d0.lo - -vpmullw %ymm7,%ymm11,%ymm6 # a1c1.lo -vpmullw %ymm8,%ymm11,%ymm11 # a1d1.lo -vpmullw %ymm7,%ymm12,%ymm7 # b1c1.lo -vpmullw %ymm8,%ymm12,%ymm12 # b1d1.lo - -vmovdqa _16XQ*2(%rcx),%ymm8 -vpmulhw %ymm8,%ymm13,%ymm13 -vpmulhw %ymm8,%ymm9,%ymm9 -vpmulhw %ymm8,%ymm5,%ymm5 -vpmulhw %ymm8,%ymm10,%ymm10 -vpmulhw %ymm8,%ymm6,%ymm6 -vpmulhw %ymm8,%ymm11,%ymm11 -vpmulhw %ymm8,%ymm7,%ymm7 -vpmulhw %ymm8,%ymm12,%ymm12 - -vpsubw (%rsp),%ymm13,%ymm13 # -a0c0 -vpsubw %ymm9,%ymm1,%ymm9 # a0d0 -vpsubw %ymm5,%ymm14,%ymm5 # b0c0 -vpsubw %ymm10,%ymm2,%ymm10 # b0d0 - -vpsubw %ymm6,%ymm15,%ymm6 # a1c1 -vpsubw %ymm11,%ymm3,%ymm11 # a1d1 -vpsubw %ymm7,%ymm0,%ymm7 # b1c1 -vpsubw %ymm12,%ymm4,%ymm12 # b1d1 - -vmovdqa (%r9),%ymm0 -vmovdqa 32(%r9),%ymm1 -vpmullw %ymm0,%ymm10,%ymm2 -vpmullw %ymm0,%ymm12,%ymm3 -vpmulhw %ymm1,%ymm10,%ymm10 -vpmulhw %ymm1,%ymm12,%ymm12 -vpmulhw %ymm8,%ymm2,%ymm2 -vpmulhw %ymm8,%ymm3,%ymm3 -vpsubw %ymm2,%ymm10,%ymm10 # rb0d0 -vpsubw %ymm3,%ymm12,%ymm12 # rb1d1 - -vpaddw %ymm5,%ymm9,%ymm9 -vpaddw %ymm7,%ymm11,%ymm11 -vpsubw %ymm13,%ymm10,%ymm13 -vpsubw %ymm12,%ymm6,%ymm6 - -vmovdqa %ymm13,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(64*\off+16)*2(%rdi) -vmovdqa %ymm6,(64*\off+32)*2(%rdi) -vmovdqa %ymm11,(64*\off+48)*2(%rdi) -.endm - -.text -.global cdecl(basemul_avx2_asm) -cdecl(basemul_avx2_asm): -mov %rsp,%r8 -and $-32,%rsp -sub $32,%rsp - -lea (_ZETAS_EXP+176)*2(%rcx),%r9 -schoolbook 0 - -add $32*2,%r9 -schoolbook 1 - -add $192*2,%r9 -schoolbook 2 - -add $32*2,%r9 -schoolbook 3 - -mov %r8,%rsp -ret diff --git a/pq-crypto/kyber_r3/kyber512r3_cbd.c b/pq-crypto/kyber_r3/kyber512r3_cbd.c deleted file mode 100644 index 82acdb30f87..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_cbd.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_cbd.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: load32_littleendian -* -* Description: load 4 bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t x[4]) { - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: load24_littleendian -* -* Description: load 3 bytes into a 32-bit integer -* in little-endian order -* This function is only needed for Kyber-512 -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x (most significant byte is zero) -**************************************************/ -static uint32_t load24_littleendian(const uint8_t x[3]) { - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - return r; -} - - -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd2(poly *r, const uint8_t buf[2 * S2N_KYBER_512_R3_N / 4]) { - unsigned int i, j; - - for (i = 0; i < S2N_KYBER_512_R3_N / 8; i++) { - uint32_t t = load32_littleendian(buf + 4 * i); - uint32_t d = t & 0x55555555; - d += (t >> 1) & 0x55555555; - - for (j = 0; j < 8; j++) { - int16_t a = (d >> (4 * j + 0)) & 0x3; - int16_t b = (d >> (4 * j + 2)) & 0x3; - r->coeffs[8 * i + j] = a - b; - } - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3 -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -static void cbd3(poly *r, const uint8_t buf[3 * S2N_KYBER_512_R3_N / 4]) { - unsigned int i, j; - - for (i = 0; i < S2N_KYBER_512_R3_N / 4; i++) { - uint32_t t = load24_littleendian(buf + 3 * i); - uint32_t d = t & 0x00249249; - d += (t >> 1) & 0x00249249; - d += (t >> 2) & 0x00249249; - - for (j = 0; j < 4; j++) { - int16_t a = (d >> (6 * j + 0)) & 0x7; - int16_t b = (d >> (6 * j + 3)) & 0x7; - r->coeffs[4 * i + j] = a - b; - } - } -} - -void cbd_eta1(poly *r, const uint8_t buf[S2N_KYBER_512_R3_ETA1 * S2N_KYBER_512_R3_N / 4]) { - cbd3(r, buf); -} - -void cbd_eta2(poly *r, const uint8_t buf[S2N_KYBER_512_R3_ETA2 * S2N_KYBER_512_R3_N / 4]) { - cbd2(r, buf); -} diff --git a/pq-crypto/kyber_r3/kyber512r3_cbd.h b/pq-crypto/kyber_r3/kyber512r3_cbd.h deleted file mode 100644 index 631821956c6..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_cbd.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly.h" - -#define cbd_eta1 S2N_KYBER_512_R3_NAMESPACE(cbd_eta1) -void cbd_eta1(poly *r, const uint8_t buf[S2N_KYBER_512_R3_ETA1 * S2N_KYBER_512_R3_N / 4]); - -#define cbd_eta2 S2N_KYBER_512_R3_NAMESPACE(cbd_eta2) -void cbd_eta2(poly *r, const uint8_t buf[S2N_KYBER_512_R3_ETA2 * S2N_KYBER_512_R3_N / 4]); diff --git a/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.c b/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.c deleted file mode 100644 index a922bd220f4..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.c +++ /dev/null @@ -1,137 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_cbd_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -/************************************************* -* Name: cbd2 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd2(poly * restrict r, const __m256i buf[2*S2N_KYBER_512_R3_N/128]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask55 = _mm256_set1_epi32(0x55555555); - const __m256i mask33 = _mm256_set1_epi32(0x33333333); - const __m256i mask03 = _mm256_set1_epi32(0x03030303); - const __m256i mask0F = _mm256_set1_epi32(0x0F0F0F0F); - - for(i = 0; i < S2N_KYBER_512_R3_N/64; i++) { - f0 = _mm256_load_si256(&buf[i]); - - f1 = _mm256_srli_epi16(f0, 1); - f0 = _mm256_and_si256(mask55, f0); - f1 = _mm256_and_si256(mask55, f1); - f0 = _mm256_add_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 2); - f0 = _mm256_and_si256(mask33, f0); - f1 = _mm256_and_si256(mask33, f1); - f0 = _mm256_add_epi8(f0, mask33); - f0 = _mm256_sub_epi8(f0, f1); - - f1 = _mm256_srli_epi16(f0, 4); - f0 = _mm256_and_si256(mask0F, f0); - f1 = _mm256_and_si256(mask0F, f1); - f0 = _mm256_sub_epi8(f0, mask03); - f1 = _mm256_sub_epi8(f1, mask03); - - f2 = _mm256_unpacklo_epi8(f0, f1); - f3 = _mm256_unpackhi_epi8(f0, f1); - - f0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f2)); - f1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f2,1)); - f2 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(f3)); - f3 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(f3,1)); - - _mm256_store_si256(&r->vec[4*i+0], f0); - _mm256_store_si256(&r->vec[4*i+1], f2); - _mm256_store_si256(&r->vec[4*i+2], f1); - _mm256_store_si256(&r->vec[4*i+3], f3); - } -} - -/************************************************* -* Name: cbd3 -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter eta=3 -* This function is only needed for Kyber-512 -* -* Arguments: - poly *r: pointer to output polynomial -* - const __m256i *buf: pointer to aligned input byte array -**************************************************/ -static void cbd3(poly * restrict r, const uint8_t buf[3*S2N_KYBER_512_R3_N/4+8]) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i mask249 = _mm256_set1_epi32(0x249249); - const __m256i mask6DB = _mm256_set1_epi32(0x6DB6DB); - const __m256i mask07 = _mm256_set1_epi32(7); - const __m256i mask70 = _mm256_set1_epi32(7 << 16); - const __m256i mask3 = _mm256_set1_epi16(3); - const __m256i shufbidx = _mm256_set_epi8(-1,15,14,13,-1,12,11,10,-1, 9, 8, 7,-1, 6, 5, 4, - -1,11,10, 9,-1, 8, 7, 6,-1, 5, 4, 3,-1, 2, 1, 0); - - for(i = 0; i < S2N_KYBER_512_R3_N/32; i++) { - // correcting cast-align and cast-qual errors - // old version: f0 = _mm256_loadu_si256((__m256i *)&buf[24*i]); - f0 = _mm256_loadu_si256((const void *)&buf[24*i]); - f0 = _mm256_permute4x64_epi64(f0,0x94); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - - f1 = _mm256_srli_epi32(f0,1); - f2 = _mm256_srli_epi32(f0,2); - f0 = _mm256_and_si256(mask249,f0); - f1 = _mm256_and_si256(mask249,f1); - f2 = _mm256_and_si256(mask249,f2); - f0 = _mm256_add_epi32(f0,f1); - f0 = _mm256_add_epi32(f0,f2); - - f1 = _mm256_srli_epi32(f0,3); - f0 = _mm256_add_epi32(f0,mask6DB); - f0 = _mm256_sub_epi32(f0,f1); - - f1 = _mm256_slli_epi32(f0,10); - f2 = _mm256_srli_epi32(f0,12); - f3 = _mm256_srli_epi32(f0, 2); - f0 = _mm256_and_si256(f0,mask07); - f1 = _mm256_and_si256(f1,mask70); - f2 = _mm256_and_si256(f2,mask07); - f3 = _mm256_and_si256(f3,mask70); - f0 = _mm256_add_epi16(f0,f1); - f1 = _mm256_add_epi16(f2,f3); - f0 = _mm256_sub_epi16(f0,mask3); - f1 = _mm256_sub_epi16(f1,mask3); - - f2 = _mm256_unpacklo_epi32(f0,f1); - f3 = _mm256_unpackhi_epi32(f0,f1); - - f0 = _mm256_permute2x128_si256(f2,f3,0x20); - f1 = _mm256_permute2x128_si256(f2,f3,0x31); - - _mm256_store_si256(&r->vec[2*i+0], f0); - _mm256_store_si256(&r->vec[2*i+1], f1); - } -} - -/* buf 32 bytes longer for cbd3 */ -void poly_cbd_eta1_avx2(poly *r, const __m256i buf[S2N_KYBER_512_R3_ETA1*S2N_KYBER_512_R3_N/128+1]) -{ - // correcting cast-align and cast-qual errors - // old version: cbd3(r, (uint8_t *)buf); - cbd3(r, (const void *)buf); -} - -void poly_cbd_eta2_avx2(poly *r, const __m256i buf[S2N_KYBER_512_R3_ETA2*S2N_KYBER_512_R3_N/128]) -{ - cbd2(r, buf); -} -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.h b/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.h deleted file mode 100644 index 972c71fbf5d..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_cbd_avx2.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define poly_cbd_eta1_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_cbd_eta1_avx2) -void poly_cbd_eta1_avx2(poly *r, const __m256i buf[S2N_KYBER_512_R3_ETA1*S2N_KYBER_512_R3_N/128+1]); - -#define poly_cbd_eta2_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_cbd_eta2_avx2) -void poly_cbd_eta2_avx2(poly *r, const __m256i buf[S2N_KYBER_512_R3_ETA2*S2N_KYBER_512_R3_N/128]); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_consts_avx2.c b/pq-crypto/kyber_r3/kyber512r3_consts_avx2.c deleted file mode 100644 index cdc0b817df0..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_consts_avx2.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "kyber512r3_align_avx2.h" -#include "kyber512r3_consts_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#define Q S2N_KYBER_512_R3_Q -#define MONT -1044 // 2^16 mod q -#define QINV -3327 // q^-1 mod 2^16 -#define V 20159 // floor(2^26/q + 0.5) -#define FHI 1441 // mont^2/128 -#define FLO -10079 // qinv*FHI -#define MONTSQHI 1353 // mont^2 -#define MONTSQLO 20553 // qinv*MONTSQHI -#define MASK 4095 -#define SHIFT 32 - -const qdata_t qdata = {{ -#define _16XQ 0 - Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, - -#define _16XQINV 16 - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV, - -#define _16XV 32 - V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, - -#define _16XFLO 48 - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - FLO, FLO, FLO, FLO, FLO, FLO, FLO, FLO, - -#define _16XFHI 64 - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - FHI, FHI, FHI, FHI, FHI, FHI, FHI, FHI, - -#define _16XMONTSQLO 80 - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - MONTSQLO, MONTSQLO, MONTSQLO, MONTSQLO, - -#define _16XMONTSQHI 96 - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - MONTSQHI, MONTSQHI, MONTSQHI, MONTSQHI, - -#define _16XMASK 112 - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - MASK, MASK, MASK, MASK, MASK, MASK, MASK, MASK, - -#define _REVIDXB 128 - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - 3854, 3340, 2826, 2312, 1798, 1284, 770, 256, - -#define _REVIDXD 144 - 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0, 0, - -#define _ZETAS_EXP 160 - 31498, 31498, 31498, 31498, -758, -758, -758, -758, - 5237, 5237, 5237, 5237, 1397, 1397, 1397, 1397, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - 14745, 14745, 14745, 14745, 14745, 14745, 14745, 14745, - -359, -359, -359, -359, -359, -359, -359, -359, - -359, -359, -359, -359, -359, -359, -359, -359, - 13525, 13525, 13525, 13525, 13525, 13525, 13525, 13525, - -12402, -12402, -12402, -12402, -12402, -12402, -12402, -12402, - 1493, 1493, 1493, 1493, 1493, 1493, 1493, 1493, - 1422, 1422, 1422, 1422, 1422, 1422, 1422, 1422, - -20907, -20907, -20907, -20907, 27758, 27758, 27758, 27758, - -3799, -3799, -3799, -3799, -15690, -15690, -15690, -15690, - -171, -171, -171, -171, 622, 622, 622, 622, - 1577, 1577, 1577, 1577, 182, 182, 182, 182, - -5827, -5827, 17363, 17363, -26360, -26360, -29057, -29057, - 5571, 5571, -1102, -1102, 21438, 21438, -26242, -26242, - 573, 573, -1325, -1325, 264, 264, 383, 383, - -829, -829, 1458, 1458, -1602, -1602, -130, -130, - -5689, -6516, 1496, 30967, -23565, 20179, 20710, 25080, - -12796, 26616, 16064, -12442, 9134, -650, -25986, 27837, - 1223, 652, -552, 1015, -1293, 1491, -282, -1544, - 516, -8, -320, -666, -1618, -1162, 126, 1469, - -335, -11477, -32227, 20494, -27738, 945, -14883, 6182, - 32010, 10631, 29175, -28762, -18486, 17560, -14430, -5276, - -1103, 555, -1251, 1550, 422, 177, -291, 1574, - -246, 1159, -777, -602, -1590, -872, 418, -156, - 11182, 13387, -14233, -21655, 13131, -4587, 23092, 5493, - -32502, 30317, -18741, 12639, 20100, 18525, 19529, -12619, - 430, 843, 871, 105, 587, -235, -460, 1653, - 778, -147, 1483, 1119, 644, 349, 329, -75, - 787, 787, 787, 787, 787, 787, 787, 787, - 787, 787, 787, 787, 787, 787, 787, 787, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - -1517, -1517, -1517, -1517, -1517, -1517, -1517, -1517, - 28191, 28191, 28191, 28191, 28191, 28191, 28191, 28191, - -16694, -16694, -16694, -16694, -16694, -16694, -16694, -16694, - 287, 287, 287, 287, 287, 287, 287, 287, - 202, 202, 202, 202, 202, 202, 202, 202, - 10690, 10690, 10690, 10690, 1358, 1358, 1358, 1358, - -11202, -11202, -11202, -11202, 31164, 31164, 31164, 31164, - 962, 962, 962, 962, -1202, -1202, -1202, -1202, - -1474, -1474, -1474, -1474, 1468, 1468, 1468, 1468, - -28073, -28073, 24313, 24313, -10532, -10532, 8800, 8800, - 18426, 18426, 8859, 8859, 26675, 26675, -16163, -16163, - -681, -681, 1017, 1017, 732, 732, 608, 608, - -1542, -1542, 411, 411, -205, -205, -1571, -1571, - 19883, -28250, -15887, -8898, -28309, 9075, -30199, 18249, - 13426, 14017, -29156, -12757, 16832, 4311, -24155, -17915, - -853, -90, -271, 830, 107, -1421, -247, -951, - -398, 961, -1508, -725, 448, -1065, 677, -1275, - -31183, 25435, -7382, 24391, -20927, 10946, 24214, 16989, - 10335, -7934, -22502, 10906, 31636, 28644, 23998, -17422, - 817, 603, 1322, -1465, -1215, 1218, -874, -1187, - -1185, -1278, -1510, -870, -108, 996, 958, 1522, - 20297, 2146, 15355, -32384, -6280, -14903, -11044, 14469, - -21498, -20198, 23210, -17442, -23860, -20257, 7756, 23132, - 1097, 610, -1285, 384, -136, -1335, 220, -1659, - -1530, 794, -854, 478, -308, 991, -1460, 1628, - -#define _16XSHIFT 624 - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, - SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT, SHIFT -}}; -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_consts_avx2.h b/pq-crypto/kyber_r3/kyber512r3_consts_avx2.h deleted file mode 100644 index 1983ba44d6f..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_consts_avx2.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include "kyber512r3_params.h" - -#define _16XQ 0 -#define _16XQINV 16 -#define _16XV 32 -#define _16XFLO 48 -#define _16XFHI 64 -#define _16XMONTSQLO 80 -#define _16XMONTSQHI 96 -#define _16XMASK 112 -#define _REVIDXB 128 -#define _REVIDXD 144 -#define _ZETAS_EXP 160 -#define _16XSHIFT 624 - -/* The C ABI on MacOS exports all symbols with a leading - * underscore. This means that any symbols we refer to from - * C files (functions) can't be found, and all symbols we - * refer to from ASM also can't be found. - * - * This define helps us get around this - */ -#ifdef __ASSEMBLER__ -#if defined(__WIN32__) || defined(__APPLE__) -#define decorate(s) _##s -#define cdecl2(s) decorate(s) -#define cdecl(s) cdecl2(S2N_KYBER_512_R3_NAMESPACE(##s)) -#else -#define cdecl(s) S2N_KYBER_512_R3_NAMESPACE(##s) -#endif -#endif - - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#ifndef __ASSEMBLER__ -#include "kyber512r3_align_avx2.h" -typedef ALIGNED_INT16(640) qdata_t; -#define qdata S2N_KYBER_512_R3_NAMESPACE(qdata) -extern const qdata_t qdata; -#endif -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_fips202.c b/pq-crypto/kyber_r3/kyber512r3_fips202.c deleted file mode 100644 index 3632963ed81..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_fips202.c +++ /dev/null @@ -1,544 +0,0 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include -#include - -#include "kyber512r3_params.h" -#include "kyber512r3_fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* - * Name: load64 - * - * Description: Load 8 bytes into uint64_t in little-endian order - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns the loaded 64-bit unsigned integer - **************************************************/ -static uint64_t load64(const uint8_t *x) { - uint64_t r = 0; - for (size_t i = 0; i < 8; ++i) { - r |= (uint64_t)x[i] << 8 * i; - } - - return r; -} - -/************************************************* - * Name: store64 - * - * Description: Store a 64-bit integer to a byte array in little-endian order - * - * Arguments: - uint8_t *x: pointer to the output byte array - * - uint64_t u: input 64-bit unsigned integer - **************************************************/ -static void store64(uint8_t *x, uint64_t u) { - for (size_t i = 0; i < 8; ++i) { - x[i] = (uint8_t) (u >> 8 * i); - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = { - 0x0000000000000001ULL, 0x0000000000008082ULL, - 0x800000000000808aULL, 0x8000000080008000ULL, - 0x000000000000808bULL, 0x0000000080000001ULL, - 0x8000000080008081ULL, 0x8000000000008009ULL, - 0x000000000000008aULL, 0x0000000000000088ULL, - 0x0000000080008009ULL, 0x000000008000000aULL, - 0x000000008000808bULL, 0x800000000000008bULL, - 0x8000000000008089ULL, 0x8000000000008003ULL, - 0x8000000000008002ULL, 0x8000000000000080ULL, - 0x000000000000800aULL, 0x800000008000000aULL, - 0x8000000080008081ULL, 0x8000000000008080ULL, - 0x0000000080000001ULL, 0x8000000080008008ULL, -}; - -/************************************************* - * Name: KeccakF1600_StatePermute - * - * Description: The Keccak F1600 Permutation - * - * Arguments: - uint64_t *state: pointer to input/output Keccak state - **************************************************/ -static void KeccakF1600_StatePermute(uint64_t *state) { - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - - /* copyFromState(A, state) */ - Aba = state[0]; - Abe = state[1]; - Abi = state[2]; - Abo = state[3]; - Abu = state[4]; - Aga = state[5]; - Age = state[6]; - Agi = state[7]; - Ago = state[8]; - Agu = state[9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for (round = 0; round < NROUNDS; round += 2) { - uint64_t BCa, BCe, BCi, BCo, BCu; - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - /* prepareTheta */ - BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; - BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; - BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; - BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; - BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; - - /* thetaRhoPiChiIotaPrepareTheta(round , A, E) */ - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^ ((~BCe) & BCi); - Eba ^= KeccakF_RoundConstants[round]; - Ebe = BCe ^ ((~BCi) & BCo); - Ebi = BCi ^ ((~BCo) & BCu); - Ebo = BCo ^ ((~BCu) & BCa); - Ebu = BCu ^ ((~BCa) & BCe); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^ ((~BCe) & BCi); - Ege = BCe ^ ((~BCi) & BCo); - Egi = BCi ^ ((~BCo) & BCu); - Ego = BCo ^ ((~BCu) & BCa); - Egu = BCu ^ ((~BCa) & BCe); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^ ((~BCe) & BCi); - Eke = BCe ^ ((~BCi) & BCo); - Eki = BCi ^ ((~BCo) & BCu); - Eko = BCo ^ ((~BCu) & BCa); - Eku = BCu ^ ((~BCa) & BCe); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^ ((~BCe) & BCi); - Eme = BCe ^ ((~BCi) & BCo); - Emi = BCi ^ ((~BCo) & BCu); - Emo = BCo ^ ((~BCu) & BCa); - Emu = BCu ^ ((~BCa) & BCe); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^ ((~BCe) & BCi); - Ese = BCe ^ ((~BCi) & BCo); - Esi = BCi ^ ((~BCo) & BCu); - Eso = BCo ^ ((~BCu) & BCa); - Esu = BCu ^ ((~BCa) & BCe); - - /* prepareTheta */ - BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; - BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; - BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; - BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; - BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; - - /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */ - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^ ((~BCe) & BCi); - Aba ^= KeccakF_RoundConstants[round + 1]; - Abe = BCe ^ ((~BCi) & BCo); - Abi = BCi ^ ((~BCo) & BCu); - Abo = BCo ^ ((~BCu) & BCa); - Abu = BCu ^ ((~BCa) & BCe); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^ ((~BCe) & BCi); - Age = BCe ^ ((~BCi) & BCo); - Agi = BCi ^ ((~BCo) & BCu); - Ago = BCo ^ ((~BCu) & BCa); - Agu = BCu ^ ((~BCa) & BCe); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^ ((~BCe) & BCi); - Ake = BCe ^ ((~BCi) & BCo); - Aki = BCi ^ ((~BCo) & BCu); - Ako = BCo ^ ((~BCu) & BCa); - Aku = BCu ^ ((~BCa) & BCe); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^ ((~BCe) & BCi); - Ame = BCe ^ ((~BCi) & BCo); - Ami = BCi ^ ((~BCo) & BCu); - Amo = BCo ^ ((~BCu) & BCa); - Amu = BCu ^ ((~BCa) & BCe); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^ ((~BCe) & BCi); - Ase = BCe ^ ((~BCi) & BCo); - Asi = BCi ^ ((~BCo) & BCu); - Aso = BCo ^ ((~BCu) & BCa); - Asu = BCu ^ ((~BCa) & BCe); - } - - /* copyToState(state, A) */ - state[0] = Aba; - state[1] = Abe; - state[2] = Abi; - state[3] = Abo; - state[4] = Abu; - state[5] = Aga; - state[6] = Age; - state[7] = Agi; - state[8] = Ago; - state[9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; -} - -/************************************************* - * Name: keccak_absorb - * - * Description: Absorb step of Keccak; - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - const uint8_t *m: pointer to input to be absorbed into s - * - size_t mlen: length of input in bytes - * - uint8_t p: domain-separation byte for different - * Keccak-derived functions - **************************************************/ -static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, size_t mlen, uint8_t p) { - size_t i; - uint8_t t[200]; - - /* Zero state */ - for (i = 0; i < 25; ++i) { - s[i] = 0; - } - - while (mlen >= r) { - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(m + 8 * i); - } - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) { - t[i] = 0; - } - for (i = 0; i < mlen; ++i) { - t[i] = m[i]; - } - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(t + 8 * i); - } -} - -/************************************************* - * Name: keccak_squeezeblocks - * - * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. - * Modifies the state. Can be called multiple times to keep - * squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *h: pointer to output blocks - * - size_t nblocks: number of blocks to be - * squeezed (written to h) - * - uint64_t *s: pointer to input/output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - **************************************************/ -static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, uint32_t r) { - while (nblocks > 0) { - KeccakF1600_StatePermute(s); - for (size_t i = 0; i < (r >> 3); i++) { - store64(h + 8 * i, s[i]); - } - h += r; - nblocks--; - } -} - -/************************************************* - * Name: shake128_absorb - * - * Description: Absorb step of the SHAKE128 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed - * into s - * - size_t inlen: length of input in bytes - **************************************************/ -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen) { - keccak_absorb(state->ctx, S2N_KYBER_512_R3_SHAKE128_RATE, input, inlen, 0x1F); -} - -/************************************************* - * Name: shake128_squeezeblocks - * - * Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of - * SHAKE128_RATE bytes each. Modifies the state. Can be called - * multiple times to keep squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed - * (written to output) - * - shake128ctx *state: pointer to input/output Keccak state - **************************************************/ -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state) { - keccak_squeezeblocks(output, nblocks, state->ctx, S2N_KYBER_512_R3_SHAKE128_RATE); -} - -/************************************************* - * Name: shake256_absorb - * - * Description: Absorb step of the SHAKE256 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - shake256ctx *state: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed - * into s - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen) { - keccak_absorb(state->ctx, S2N_KYBER_512_R3_SHAKE256_RATE, input, inlen, 0x1F); -} - -/************************************************* - * Name: shake256_squeezeblocks - * - * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of - * SHAKE256_RATE bytes each. Modifies the state. Can be called - * multiple times to keep squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed - * (written to output) - * - shake256ctx *state: pointer to input/output Keccak state - **************************************************/ -void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state) { - keccak_squeezeblocks(output, nblocks, state->ctx, S2N_KYBER_512_R3_SHAKE256_RATE); -} - -/************************************************* - * Name: shake256 - * - * Description: SHAKE256 XOF with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - size_t outlen: requested output length in bytes - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen) { - size_t nblocks = outlen / S2N_KYBER_512_R3_SHAKE256_RATE; - uint8_t t[S2N_KYBER_512_R3_SHAKE256_RATE]; - shake256ctx s; - - shake256_absorb(&s, input, inlen); - shake256_squeezeblocks(output, nblocks, &s); - - output += nblocks * S2N_KYBER_512_R3_SHAKE256_RATE; - outlen -= nblocks * S2N_KYBER_512_R3_SHAKE256_RATE; - - if (outlen) { - shake256_squeezeblocks(t, 1, &s); - for (size_t i = 0; i < outlen; ++i) { - output[i] = t[i]; - } - } -} - -/************************************************* - * Name: sha3_256 - * - * Description: SHA3-256 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) { - uint64_t s[25]; - uint8_t t[S2N_KYBER_512_R3_SHA3_256_RATE]; - - /* Absorb input */ - keccak_absorb(s, S2N_KYBER_512_R3_SHA3_256_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, S2N_KYBER_512_R3_SHA3_256_RATE); - - for (size_t i = 0; i < 32; i++) { - output[i] = t[i]; - } -} - -/************************************************* - * Name: sha3_512 - * - * Description: SHA3-512 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) { - uint64_t s[25]; - uint8_t t[S2N_KYBER_512_R3_SHA3_512_RATE]; - - /* Absorb input */ - keccak_absorb(s, S2N_KYBER_512_R3_SHA3_512_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, S2N_KYBER_512_R3_SHA3_512_RATE); - - for (size_t i = 0; i < 64; i++) { - output[i] = t[i]; - } -} diff --git a/pq-crypto/kyber_r3/kyber512r3_fips202.h b/pq-crypto/kyber_r3/kyber512r3_fips202.h deleted file mode 100644 index 1f4f395f72d..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_fips202.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once - -#include -#include -#include "kyber512r3_params.h" - -#define S2N_KYBER_512_R3_SHAKE128_RATE 168 -#define S2N_KYBER_512_R3_SHAKE256_RATE 136 -#define S2N_KYBER_512_R3_SHA3_256_RATE 136 -#define S2N_KYBER_512_R3_SHA3_384_RATE 104 -#define S2N_KYBER_512_R3_SHA3_512_RATE 72 - -#define S2N_KYBER_512_R3_PQC_SHAKECTX_SIZE 25 - -/* Context for non-incremental API */ -#define shake128ctx S2N_KYBER_512_R3_NAMESPACE(shake128ctx) -typedef struct { - uint64_t ctx[S2N_KYBER_512_R3_PQC_SHAKECTX_SIZE]; -} shake128ctx; - -/* Context for non-incremental API */ -#define shake256ctx S2N_KYBER_512_R3_NAMESPACE(shake256ctx) -typedef struct { - uint64_t ctx[S2N_KYBER_512_R3_PQC_SHAKECTX_SIZE]; -} shake256ctx; - -/* Initialize the state and absorb the provided input. - * - * This function does not support being called multiple times - * with the same state. - */ -#define shake128_absorb S2N_KYBER_512_R3_NAMESPACE(shake128_absorb) -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen); -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -#define shake128_squeezeblocks S2N_KYBER_512_R3_NAMESPACE(shake128_squeezeblocks) -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state); - -/* Copy the state. */ -#define shake128_ctx_clone S2N_KYBER_512_R3_NAMESPACE(shake128_ctx_clone) -void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src); - -/* Initialize the state and absorb the provided input. - * - * This function does not support being called multiple times - * with the same state. - */ -#define shake256_absorb S2N_KYBER_512_R3_NAMESPACE(shake256_absorb) -void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen); -/* Squeeze output out of the sponge. - * - * Supports being called multiple times - */ -#define shake256_squeezeblocks S2N_KYBER_512_R3_NAMESPACE(shake256_squeezeblocks) -void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state); - -/* One-stop SHAKE256 call */ -#define shake256 S2N_KYBER_512_R3_NAMESPACE(shake256) -void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); - -#define sha3_256 S2N_KYBER_512_R3_NAMESPACE(sha3_256) -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); - -/* One-stop SHA3-512 shop */ -#define sha3_512 S2N_KYBER_512_R3_NAMESPACE(sha3_512) -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen); diff --git a/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.c b/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.c deleted file mode 100644 index 5f07fb44a39..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.c +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include -#include -#include "kyber512r3_fips202.h" -#include "kyber512r3_fips202x4_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define KeccakF1600_StatePermute4x S2N_KYBER_512_R3_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds) -extern void KeccakF1600_StatePermute4x(__m256i *s); - -/* Implementation is used from Crystal Kyber Repository - * See for more details: https://github.com/XKCP/XKCP */ - -static void keccakx4_absorb_once(__m256i s[25], - unsigned int r, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen, - uint8_t p) -{ - size_t i; - uint64_t pos = 0; - __m256i t, idx; - - for(i = 0; i < 25; ++i) - s[i] = _mm256_setzero_si256(); - - idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0); - while(inlen >= r) { - for(i = 0; i < r/8; ++i) { - t = _mm256_i64gather_epi64((long long *)pos, idx, 1); - s[i] = _mm256_xor_si256(s[i], t); - pos += 8; - } - inlen -= r; - - KeccakF1600_StatePermute4x(s); - } - - for(i = 0; i < inlen/8; ++i) { - t = _mm256_i64gather_epi64((long long *)pos, idx, 1); - s[i] = _mm256_xor_si256(s[i], t); - pos += 8; - } - inlen -= 8*i; - - if(inlen) { - t = _mm256_i64gather_epi64((long long *)pos, idx, 1); - idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1); - t = _mm256_and_si256(t, idx); - s[i] = _mm256_xor_si256(s[i], t); - } - - t = _mm256_set1_epi64x((uint64_t)p << 8*inlen); - s[i] = _mm256_xor_si256(s[i], t); - t = _mm256_set1_epi64x(1ULL << 63); - s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t); -} - -static void keccakx4_squeezeblocks(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t nblocks, - unsigned int r, - __m256i s[25]) -{ - unsigned int i; - __m128d t; - - while(nblocks > 0) { - KeccakF1600_StatePermute4x(s); - for(i=0; i < r/8; ++i) { - t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); - // correcting cast-align errors - // old version: _mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t); - _mm_storel_pd((__attribute__((__may_alias__)) void *)&out0[8*i], t); - // old version: _mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t); - _mm_storeh_pd((__attribute__((__may_alias__)) void *)&out1[8*i], t); - t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1)); - // old version: _mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t); - _mm_storel_pd((__attribute__((__may_alias__)) void *)&out2[8*i], t); - // old version: _mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t); - _mm_storeh_pd((__attribute__((__may_alias__)) void *)&out3[8*i], t); - } - - out0 += r; - out1 += r; - out2 += r; - out3 += r; - --nblocks; - } -} - -void shake128x4_absorb_once(keccakx4_state *state, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen) -{ - keccakx4_absorb_once(state->s, S2N_KYBER_512_R3_SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F); -} - -void shake128x4_squeezeblocks(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t nblocks, - keccakx4_state *state) -{ - keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, S2N_KYBER_512_R3_SHAKE128_RATE, state->s); -} - -void shake256x4_absorb_once(keccakx4_state *state, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen) -{ - keccakx4_absorb_once(state->s, S2N_KYBER_512_R3_SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F); -} - -void shake256x4_squeezeblocks(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t nblocks, - keccakx4_state *state) -{ - keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, S2N_KYBER_512_R3_SHAKE256_RATE, state->s); -} - -void shake128x4(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t outlen, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen) -{ - unsigned int i; - size_t nblocks = outlen/S2N_KYBER_512_R3_SHAKE128_RATE; - uint8_t t[4][S2N_KYBER_512_R3_SHAKE128_RATE]; - keccakx4_state state; - - shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen); - shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); - - out0 += nblocks*S2N_KYBER_512_R3_SHAKE128_RATE; - out1 += nblocks*S2N_KYBER_512_R3_SHAKE128_RATE; - out2 += nblocks*S2N_KYBER_512_R3_SHAKE128_RATE; - out3 += nblocks*S2N_KYBER_512_R3_SHAKE128_RATE; - outlen -= nblocks*S2N_KYBER_512_R3_SHAKE128_RATE; - - if(outlen) { - shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); - for(i = 0; i < outlen; ++i) { - out0[i] = t[0][i]; - out1[i] = t[1][i]; - out2[i] = t[2][i]; - out3[i] = t[3][i]; - } - } -} - -void shake256x4(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t outlen, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen) -{ - unsigned int i; - size_t nblocks = outlen/S2N_KYBER_512_R3_SHAKE256_RATE; - uint8_t t[4][S2N_KYBER_512_R3_SHAKE256_RATE]; - keccakx4_state state; - - shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen); - shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state); - - out0 += nblocks*S2N_KYBER_512_R3_SHAKE256_RATE; - out1 += nblocks*S2N_KYBER_512_R3_SHAKE256_RATE; - out2 += nblocks*S2N_KYBER_512_R3_SHAKE256_RATE; - out3 += nblocks*S2N_KYBER_512_R3_SHAKE256_RATE; - outlen -= nblocks*S2N_KYBER_512_R3_SHAKE256_RATE; - - if(outlen) { - shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state); - for(i = 0; i < outlen; ++i) { - out0[i] = t[0][i]; - out1[i] = t[1][i]; - out2[i] = t[2][i]; - out3[i] = t[3][i]; - } - } -} -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.h b/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.h deleted file mode 100644 index 8c4896724ca..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_fips202x4_avx2.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include -#include -#include "kyber512r3_params.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define keccakx4_state S2N_KYBER_512_R3_NAMESPACE(keccakx4_state) -typedef struct { - __m256i s[25]; -} keccakx4_state; - -#define shake128x4_absorb_once S2N_KYBER_512_R3_NAMESPACE(shake128x4_absorb_once) -void shake128x4_absorb_once(keccakx4_state *state, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen); - -#define shake128x4_squeezeblocks S2N_KYBER_512_R3_NAMESPACE(shake128x4_squeezeblocks) -void shake128x4_squeezeblocks(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t nblocks, - keccakx4_state *state); - -#define shake256x4_absorb_once S2N_KYBER_512_R3_NAMESPACE(shake256x4_absorb_once) -void shake256x4_absorb_once(keccakx4_state *state, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen); - -#define shake256x4_squeezeblocks S2N_KYBER_512_R3_NAMESPACE(shake256x4_squeezeblocks) -void shake256x4_squeezeblocks(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t nblocks, - keccakx4_state *state); - -#define shake128x4 S2N_KYBER_512_R3_NAMESPACE(shake128x4) -void shake128x4(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t outlen, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen); - -#define shake256x4 S2N_KYBER_512_R3_NAMESPACE(shake256x4) -void shake256x4(uint8_t *out0, - uint8_t *out1, - uint8_t *out2, - uint8_t *out3, - size_t outlen, - const uint8_t *in0, - const uint8_t *in1, - const uint8_t *in2, - const uint8_t *in3, - size_t inlen); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_fq_avx2.S b/pq-crypto/kyber_r3/kyber512r3_fq_avx2.S deleted file mode 100644 index 3492489a670..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_fq_avx2.S +++ /dev/null @@ -1,122 +0,0 @@ -#include "kyber512r3_consts_avx2.h" - -// The small macros (.inc files) are combined with .S files directly -/*****.include "fq.inc"*****/ -/***************************/ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm -/***************************/ - -.text -reduce128_avx: -#load -vmovdqa (%rdi),%ymm2 -vmovdqa 32(%rdi),%ymm3 -vmovdqa 64(%rdi),%ymm4 -vmovdqa 96(%rdi),%ymm5 -vmovdqa 128(%rdi),%ymm6 -vmovdqa 160(%rdi),%ymm7 -vmovdqa 192(%rdi),%ymm8 -vmovdqa 224(%rdi),%ymm9 - -red16 2 -red16 3 -red16 4 -red16 5 -red16 6 -red16 7 -red16 8 -red16 9 - -#store -vmovdqa %ymm2,(%rdi) -vmovdqa %ymm3,32(%rdi) -vmovdqa %ymm4,64(%rdi) -vmovdqa %ymm5,96(%rdi) -vmovdqa %ymm6,128(%rdi) -vmovdqa %ymm7,160(%rdi) -vmovdqa %ymm8,192(%rdi) -vmovdqa %ymm9,224(%rdi) - -ret - -.global cdecl(reduce_avx2_asm) -cdecl(reduce_avx2_asm): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XV*2(%rsi),%ymm1 -call reduce128_avx -add $256,%rdi -call reduce128_avx -ret - -tomont128_avx: -#load -vmovdqa (%rdi),%ymm3 -vmovdqa 32(%rdi),%ymm4 -vmovdqa 64(%rdi),%ymm5 -vmovdqa 96(%rdi),%ymm6 -vmovdqa 128(%rdi),%ymm7 -vmovdqa 160(%rdi),%ymm8 -vmovdqa 192(%rdi),%ymm9 -vmovdqa 224(%rdi),%ymm10 - -fqmulprecomp 1,2,3,11 -fqmulprecomp 1,2,4,12 -fqmulprecomp 1,2,5,13 -fqmulprecomp 1,2,6,14 -fqmulprecomp 1,2,7,15 -fqmulprecomp 1,2,8,11 -fqmulprecomp 1,2,9,12 -fqmulprecomp 1,2,10,13 - -#store -vmovdqa %ymm3,(%rdi) -vmovdqa %ymm4,32(%rdi) -vmovdqa %ymm5,64(%rdi) -vmovdqa %ymm6,96(%rdi) -vmovdqa %ymm7,128(%rdi) -vmovdqa %ymm8,160(%rdi) -vmovdqa %ymm9,192(%rdi) -vmovdqa %ymm10,224(%rdi) - -ret - -.global cdecl(tomont_avx2_asm) -cdecl(tomont_avx2_asm): -#consts -vmovdqa _16XQ*2(%rsi),%ymm0 -vmovdqa _16XMONTSQLO*2(%rsi),%ymm1 -vmovdqa _16XMONTSQHI*2(%rsi),%ymm2 -call tomont128_avx -add $256,%rdi -call tomont128_avx -ret diff --git a/pq-crypto/kyber_r3/kyber512r3_indcpa.c b/pq-crypto/kyber_r3/kyber512r3_indcpa.c deleted file mode 100644 index bbb77259df4..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_indcpa.c +++ /dev/null @@ -1,325 +0,0 @@ -#include -#include -#include "kyber512r3_params.h" -#include "kyber512r3_indcpa.h" -#include "kyber512r3_poly.h" -#include "kyber512r3_polyvec.h" -#include "kyber512r3_fips202.h" -#include "kyber512r3_symmetric.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_safety.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], polyvec *pk, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES]) { - polyvec_tobytes(r, pk); - for (size_t i = 0; i < S2N_KYBER_512_R3_SYMBYTES; i++) { - r[i + S2N_KYBER_512_R3_POLYVECBYTES] = seed[i]; - } -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key -* polynomial vector -* - uint8_t *seed: pointer to output seed to generate -* matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], const uint8_t packedpk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES]) { - polyvec_frombytes(pk, packedpk); - for (size_t i = 0; i < S2N_KYBER_512_R3_SYMBYTES; i++) { - seed[i] = packedpk[i + S2N_KYBER_512_R3_POLYVECBYTES]; - } -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES], polyvec *sk) { - polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; -* inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of -* polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]) { - polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[S2N_KYBER_512_R3_INDCPA_BYTES], polyvec *b, poly *v) { - polyvec_compress(r, b); - poly_compress(r + S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES]) { - polyvec_decompress(b, c); - poly_decompress(v, c + S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - unsigned int len: requested number of 16-bit integers -* (uniform mod q) -* - const uint8_t *buf: pointer to input buffer -* (assumed to be uniform random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, unsigned int len, const uint8_t *buf, unsigned int buflen) { - unsigned int ctr, pos; - - ctr = pos = 0; - while (ctr < len && pos + 3 <= buflen) { - uint16_t val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF; - uint16_t val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF; - pos += 3; - - if (val0 < S2N_KYBER_512_R3_Q) { - r[ctr++] = val0; - } - if (ctr < len && val1 < S2N_KYBER_512_R3_Q) { - r[ctr++] = val1; - } - } - - return ctr; -} - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T -* is generated -**************************************************/ -#define XOF_BLOCKBYTES 168 -#define GEN_MATRIX_NBLOCKS ((12*S2N_KYBER_512_R3_N/8*(1 << 12)/S2N_KYBER_512_R3_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES) -static void gen_matrix(polyvec *a, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], int transposed) { - unsigned int ctr, buflen, off; - uint8_t buf[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2]; - xof_state state; - - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - for (unsigned int j = 0; j < S2N_KYBER_512_R3_K; j++) { - if (transposed) { - kyber_shake128_absorb(&state, seed, i, j); - } else { - kyber_shake128_absorb(&state, seed, j, i); - } - - shake128_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state); - buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES; - ctr = rej_uniform(a[i].vec[j].coeffs, S2N_KYBER_512_R3_N, buf, buflen); - - while (ctr < S2N_KYBER_512_R3_N) { - off = buflen % 3; - for (unsigned int k = 0; k < off; k++) { - buf[k] = buf[buflen - off + k]; - } - shake128_squeezeblocks(buf + off, 1, &state); - buflen = off + XOF_BLOCKBYTES; - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, S2N_KYBER_512_R3_N - ctr, buf, buflen); - } - } - } -} - -/************************************************* -* Name: indcpa_keypair -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key -* (of length S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key -* (of length S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES bytes) -* -* Returns: 0 on success -* !0 on failure -**************************************************/ -int indcpa_keypair(uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]) { - uint8_t buf[2 * S2N_KYBER_512_R3_SYMBYTES]; - const uint8_t *publicseed = buf; - const uint8_t *noiseseed = buf + S2N_KYBER_512_R3_SYMBYTES; - uint8_t nonce = 0; - polyvec a[S2N_KYBER_512_R3_K], e, pkpv, skpv; - - POSIX_GUARD_RESULT(s2n_get_random_bytes(buf, S2N_KYBER_512_R3_SYMBYTES)); - sha3_512(buf, buf, S2N_KYBER_512_R3_SYMBYTES); - - gen_matrix(a, publicseed, 0); - - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_getnoise_eta1(&skpv.vec[i], noiseseed, nonce++); - } - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_getnoise_eta1(&e.vec[i], noiseseed, nonce++); - } - - polyvec_ntt(&skpv); - polyvec_ntt(&e); - - //* matrix-vector multiplication */ - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - polyvec_pointwise_acc_montgomery(&pkpv.vec[i], &a[i], &skpv); - poly_tomont(&pkpv.vec[i]); - } - - polyvec_add(&pkpv, &pkpv, &e); - polyvec_reduce(&pkpv); - - pack_sk(sk, &skpv); - pack_pk(pk, &pkpv, publicseed); - - return 0; -} - -/************************************************* -* Name: indcpa_enc -* -* Description: Encryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *c: pointer to output ciphertext -* (of length S2N_KYBER_512_R3_INDCPA_BYTES bytes) -* - const uint8_t *m: pointer to input message -* (of length S2N_KYBER_512_R3_INDCPA_MSGBYTES bytes) -* - const uint8_t *pk: pointer to input public key -* (of length S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES) -* - const uint8_t *coins: pointer to input random coins -* used as seed (of length S2N_KYBER_512_R3_SYMBYTES) -* to deterministically generate all -* randomness -**************************************************/ -void indcpa_enc(uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], const uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], - const uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], const uint8_t coins[S2N_KYBER_512_R3_SYMBYTES]) { - uint8_t seed[S2N_KYBER_512_R3_SYMBYTES]; - uint8_t nonce = 0; - polyvec sp, pkpv, ep, at[S2N_KYBER_512_R3_K], bp; - poly v, k, epp; - - unpack_pk(&pkpv, seed, pk); - poly_frommsg(&k, m); - gen_matrix(at, seed, 1); - - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_getnoise_eta1(sp.vec + i, coins, nonce++); - } - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_getnoise_eta2(ep.vec + i, coins, nonce++); - } - poly_getnoise_eta2(&epp, coins, nonce++); - - polyvec_ntt(&sp); - - /* matrix-vector multiplication */ - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - polyvec_pointwise_acc_montgomery(&bp.vec[i], &at[i], &sp); - } - - polyvec_pointwise_acc_montgomery(&v, &pkpv, &sp); - - polyvec_invntt_tomont(&bp); - poly_invntt_tomont(&v); - - polyvec_add(&bp, &bp, &ep); - poly_add(&v, &v, &epp); - poly_add(&v, &v, &k); - polyvec_reduce(&bp); - poly_reduce(&v); - - pack_ciphertext(c, &bp, &v); -} - -/************************************************* -* Name: indcpa_dec -* -* Description: Decryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *m: pointer to output decrypted message -* (of length S2N_KYBER_512_R3_INDCPA_MSGBYTES) -* - const uint8_t *c: pointer to input ciphertext -* (of length S2N_KYBER_512_R3_INDCPA_BYTES) -* - const uint8_t *sk: pointer to input secret key -* (of length S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES) -**************************************************/ -void indcpa_dec(uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], const uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], - const uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]) { - polyvec bp, skpv; - poly v, mp; - - unpack_ciphertext(&bp, &v, c); - unpack_sk(&skpv, sk); - - polyvec_ntt(&bp); - polyvec_pointwise_acc_montgomery(&mp, &skpv, &bp); - poly_invntt_tomont(&mp); - - poly_sub(&mp, &v, &mp); - poly_reduce(&mp); - - poly_tomsg(m, &mp); -} diff --git a/pq-crypto/kyber_r3/kyber512r3_indcpa.h b/pq-crypto/kyber_r3/kyber512r3_indcpa.h deleted file mode 100644 index f8b9e401a06..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_indcpa.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" - -#define indcpa_keypair S2N_KYBER_512_R3_NAMESPACE(indcpa_keypair) -int indcpa_keypair(uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]); - -#define indcpa_enc S2N_KYBER_512_R3_NAMESPACE(indcpa_enc) -void indcpa_enc(uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], const uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], - const uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], const uint8_t coins[S2N_KYBER_512_R3_SYMBYTES]); - -#define indcpa_dec S2N_KYBER_512_R3_NAMESPACE(indcpa_dec) -void indcpa_dec(uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], const uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], - const uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]); diff --git a/pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.c b/pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.c deleted file mode 100644 index 91e75138816..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_indcpa_avx2.c +++ /dev/null @@ -1,363 +0,0 @@ -#include -#include -#include -#include "kyber512r3_align_avx2.h" -#include "kyber512r3_params.h" -#include "kyber512r3_indcpa_avx2.h" -#include "kyber512r3_polyvec_avx2.h" -#include "kyber512r3_poly_avx2.h" -#include "kyber512r3_rejsample_avx2.h" -#include "kyber512r3_fips202.h" -#include "kyber512r3_fips202x4_avx2.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_safety.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk and the -* public seed used to generate the matrix A. -* The polynomial coefficients in pk are assumed to -* lie in the invertal [0,q], i.e. pk must be reduced -* by polyvec_reduce_avx2(). -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* polyvec *pk: pointer to the input public-key polyvec -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t r[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], - polyvec *pk, - const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES]) -{ - polyvec_tobytes_avx2(r, pk); - memcpy(r+S2N_KYBER_512_R3_POLYVECBYTES, seed, S2N_KYBER_512_R3_SYMBYTES); -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key polynomial vector -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, - uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], - const uint8_t packedpk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES]) -{ - polyvec_frombytes_avx2(pk, packedpk); - memcpy(seed, packedpk+S2N_KYBER_512_R3_POLYVECBYTES, S2N_KYBER_512_R3_SYMBYTES); -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key. -* The polynomial coefficients in sk are assumed to -* lie in the invertal [0,q], i.e. sk must be reduced -* by polyvec_reduce_avx2(). -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t r[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES], polyvec *sk) -{ - polyvec_tobytes_avx2(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t packedsk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]) -{ - polyvec_frombytes_avx2(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v. -* The polynomial coefficients in b and v are assumed to -* lie in the invertal [0,q], i.e. b and v must be reduced -* by polyvec_reduce_avx2() and poly_reduce_avx2(), respectively. -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* poly *pk: pointer to the input vector of polynomials b -* poly *v: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t r[S2N_KYBER_512_R3_INDCPA_BYTES], polyvec *b, poly *v) -{ - polyvec_compress_avx2(r, b); - poly_compress_avx2(r+S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES]) -{ - polyvec_decompress_avx2(b, c); - poly_decompress_avx2(v, c+S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output array -* - unsigned int len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes) -* - unsigned int buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static unsigned int rej_uniform(int16_t *r, - unsigned int len, - const uint8_t *buf, - unsigned int buflen) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - - ctr = pos = 0; - while(ctr < len && pos <= buflen - 3) { // buflen is always at least 3 - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF; - pos += 3; - - if(val0 < S2N_KYBER_512_R3_Q) - r[ctr++] = val0; - if(ctr < len && val1 < S2N_KYBER_512_R3_Q) - r[ctr++] = val1; - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix_avx2(A,B,0) -#define gen_at(A,B) gen_matrix_avx2(A,B,1) - -/************************************************* -* Name: gen_matrix_avx2 -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -void gen_matrix_avx2(polyvec *a, const uint8_t seed[32], int transposed) -{ - unsigned int ctr0, ctr1, ctr2, ctr3; - ALIGNED_UINT8(S2N_KYBER_512_R3_REJ_UNIFORM_AVX_NBLOCKS*S2N_KYBER_512_R3_SHAKE128_RATE) buf[4]; - __m256i f; - keccakx4_state state; - - // correcting cast-align and cast-qual errors - // old version: f = _mm256_loadu_si256((__m256i *)seed); - f = _mm256_loadu_si256((const void *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - if(transposed) { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 0; - buf[1].coeffs[33] = 1; - buf[2].coeffs[32] = 1; - buf[2].coeffs[33] = 0; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - else { - buf[0].coeffs[32] = 0; - buf[0].coeffs[33] = 0; - buf[1].coeffs[32] = 1; - buf[1].coeffs[33] = 0; - buf[2].coeffs[32] = 0; - buf[2].coeffs[33] = 1; - buf[3].coeffs[32] = 1; - buf[3].coeffs[33] = 1; - } - - shake128x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 34); - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, S2N_KYBER_512_R3_REJ_UNIFORM_AVX_NBLOCKS, &state); - - ctr0 = rej_uniform_avx2(a[0].vec[0].coeffs, buf[0].coeffs); - ctr1 = rej_uniform_avx2(a[0].vec[1].coeffs, buf[1].coeffs); - ctr2 = rej_uniform_avx2(a[1].vec[0].coeffs, buf[2].coeffs); - ctr3 = rej_uniform_avx2(a[1].vec[1].coeffs, buf[3].coeffs); - - while(ctr0 < S2N_KYBER_512_R3_N || ctr1 < S2N_KYBER_512_R3_N || ctr2 < S2N_KYBER_512_R3_N || ctr3 < S2N_KYBER_512_R3_N) { - shake128x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 1, &state); - - ctr0 += rej_uniform(a[0].vec[0].coeffs + ctr0, S2N_KYBER_512_R3_N - ctr0, buf[0].coeffs, S2N_KYBER_512_R3_SHAKE128_RATE); - ctr1 += rej_uniform(a[0].vec[1].coeffs + ctr1, S2N_KYBER_512_R3_N - ctr1, buf[1].coeffs, S2N_KYBER_512_R3_SHAKE128_RATE); - ctr2 += rej_uniform(a[1].vec[0].coeffs + ctr2, S2N_KYBER_512_R3_N - ctr2, buf[2].coeffs, S2N_KYBER_512_R3_SHAKE128_RATE); - ctr3 += rej_uniform(a[1].vec[1].coeffs + ctr3, S2N_KYBER_512_R3_N - ctr3, buf[3].coeffs, S2N_KYBER_512_R3_SHAKE128_RATE); - } - - poly_nttunpack_avx2(&a[0].vec[0]); - poly_nttunpack_avx2(&a[0].vec[1]); - poly_nttunpack_avx2(&a[1].vec[0]); - poly_nttunpack_avx2(&a[1].vec[1]); -} - -/************************************************* -* Name: indcpa_keypair_avx2 -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key -* (of length S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key - (of length S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES bytes) -**************************************************/ -int indcpa_keypair_avx2(uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], - uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]) -{ - unsigned int i; - uint8_t buf[2*S2N_KYBER_512_R3_SYMBYTES]; - const uint8_t *publicseed = buf; - const uint8_t *noiseseed = buf + S2N_KYBER_512_R3_SYMBYTES; - polyvec a[S2N_KYBER_512_R3_K], e, pkpv, skpv; - - POSIX_GUARD_RESULT(s2n_get_random_bytes(buf, S2N_KYBER_512_R3_SYMBYTES)); - sha3_512(buf, buf, S2N_KYBER_512_R3_SYMBYTES); - - gen_a(a, publicseed); - - poly_getnoise_eta1_4x(skpv.vec+0, skpv.vec+1, e.vec+0, e.vec+1, noiseseed, 0, 1, 2, 3); - - polyvec_ntt_avx2(&skpv); - polyvec_reduce_avx2(&skpv); - polyvec_ntt_avx2(&e); - - // matrix-vector multiplication - for(i=0;i -#include "kyber512r3_params.h" -#include "kyber512r3_polyvec_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#define gen_matrix_avx2 S2N_KYBER_512_R3_NAMESPACE(gen_matrix_avx2) -void gen_matrix_avx2(polyvec *a, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], int transposed); - -#define indcpa_keypair_avx2 S2N_KYBER_512_R3_NAMESPACE(indcpa_keypair_avx2) -int indcpa_keypair_avx2(uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], - uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]); - -#define indcpa_enc_avx2 S2N_KYBER_512_R3_NAMESPACE(indcpa_enc_avx2) -void indcpa_enc_avx2(uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], - const uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], - const uint8_t pk[S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES], - const uint8_t coins[S2N_KYBER_512_R3_SYMBYTES]); - -#define indcpa_dec_avx2 S2N_KYBER_512_R3_NAMESPACE(indcpa_dec_avx2) -void indcpa_dec_avx2(uint8_t m[S2N_KYBER_512_R3_INDCPA_MSGBYTES], - const uint8_t c[S2N_KYBER_512_R3_INDCPA_BYTES], - const uint8_t sk[S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES]); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_invntt_avx2.S b/pq-crypto/kyber_r3/kyber512r3_invntt_avx2.S deleted file mode 100644 index 8f131668ff7..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_invntt_avx2.S +++ /dev/null @@ -1,255 +0,0 @@ -#include "kyber512r3_consts_avx2.h" - -// The small macros (.inc files) are combined with .S files directly -/*****.include "shuffle.inc"*****/ -/********************************/ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm -/********************************/ - -/*****.include "fq.inc"*****/ -/***************************/ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm -/***************************/ - -.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,zl0=2,zl1=2,zh0=3,zh1=3 -vpsubw %ymm\rl0,%ymm\rh0,%ymm12 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rl0 -vpsubw %ymm\rl1,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl0,%ymm12,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl1 -vpsubw %ymm\rl2,%ymm\rh2,%ymm14 - -vpmullw %ymm\zl0,%ymm13,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl2 -vpsubw %ymm\rl3,%ymm\rh3,%ymm15 - -vpmullw %ymm\zl1,%ymm14,%ymm\rh2 -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl3 -vpmullw %ymm\zl1,%ymm15,%ymm\rh3 - -vpmulhw %ymm\zh0,%ymm12,%ymm12 -vpmulhw %ymm\zh0,%ymm13,%ymm13 - -vpmulhw %ymm\zh1,%ymm14,%ymm14 -vpmulhw %ymm\zh1,%ymm15,%ymm15 - -vpmulhw %ymm0,%ymm\rh0,%ymm\rh0 - -vpmulhw %ymm0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm0,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm0,%ymm\rh3,%ymm\rh3 - -# - -# - -vpsubw %ymm\rh0,%ymm12,%ymm\rh0 - -vpsubw %ymm\rh1,%ymm13,%ymm\rh1 - -vpsubw %ymm\rh2,%ymm14,%ymm\rh2 -vpsubw %ymm\rh3,%ymm15,%ymm\rh3 -.endm - -.macro intt_levels0t5 off -/* level 0 */ -vmovdqa _16XFLO*2(%rsi),%ymm2 -vmovdqa _16XFHI*2(%rsi),%ymm3 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -fqmulprecomp 2,3,4 -fqmulprecomp 2,3,6 -fqmulprecomp 2,3,5 -fqmulprecomp 2,3,7 - -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 - -fqmulprecomp 2,3,8 -fqmulprecomp 2,3,10 -fqmulprecomp 2,3,9 -fqmulprecomp 2,3,11 - -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+208)*2(%rsi),%ymm15 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+176)*2(%rsi),%ymm1 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+224)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+192)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm12 -vpshufb %ymm12,%ymm15,%ymm15 -vpshufb %ymm12,%ymm1,%ymm1 -vpshufb %ymm12,%ymm2,%ymm2 -vpshufb %ymm12,%ymm3,%ymm3 - -butterfly 4,5,8,9,6,7,10,11,15,1,2,3 - -/* level 1 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+144)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+160)*2(%rsi),%ymm3 -vmovdqa _REVIDXB*2(%rsi),%ymm1 -vpshufb %ymm1,%ymm2,%ymm2 -vpshufb %ymm1,%ymm3,%ymm3 - -butterfly 4,5,6,7,8,9,10,11,2,2,3,3 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 -shuffle1 10,11,8,11 - -/* level 2 */ -vmovdqa _REVIDXD*2(%rsi),%ymm12 -vpermd (_ZETAS_EXP+(1-\off)*224+112)*2(%rsi),%ymm12,%ymm2 -vpermd (_ZETAS_EXP+(1-\off)*224+128)*2(%rsi),%ymm12,%ymm10 - -butterfly 3,4,6,8,5,7,9,11,2,2,10,10 - -vmovdqa _16XV*2(%rsi),%ymm1 -red16 3 - -shuffle2 3,4,10,4 -shuffle2 6,8,3,8 -shuffle2 5,7,6,7 -shuffle2 9,11,5,11 - -/* level 3 */ -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+80)*2(%rsi),%ymm2 -vpermq $0x1B,(_ZETAS_EXP+(1-\off)*224+96)*2(%rsi),%ymm9 - -butterfly 10,3,6,5,4,8,7,11,2,2,9,9 - -shuffle4 10,3,9,3 -shuffle4 6,5,10,5 -shuffle4 4,8,6,8 -shuffle4 7,11,4,11 - -/* level 4 */ -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+48)*2(%rsi),%ymm2 -vpermq $0x4E,(_ZETAS_EXP+(1-\off)*224+64)*2(%rsi),%ymm7 - -butterfly 9,10,6,4,3,5,8,11,2,2,7,7 - -red16 9 - -shuffle8 9,10,7,10 -shuffle8 6,4,9,4 -shuffle8 3,5,6,5 -shuffle8 8,11,3,11 - -/* level 5 */ -vmovdqa (_ZETAS_EXP+(1-\off)*224+16)*2(%rsi),%ymm2 -vmovdqa (_ZETAS_EXP+(1-\off)*224+32)*2(%rsi),%ymm8 - -butterfly 7,9,6,3,10,4,5,11,2,2,8,8 - -vmovdqa %ymm7,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.macro intt_level6 off -/* level 6 */ -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm2 - -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm3 - -butterfly 4,5,6,7,8,9,10,11 - -.if \off == 0 -red16 4 -.endif - -vmovdqa %ymm4,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm7,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.text -.global cdecl(invntt_avx2_asm) -cdecl(invntt_avx2_asm): -vmovdqa _16XQ*2(%rsi),%ymm0 - -intt_levels0t5 0 -intt_levels0t5 1 - -intt_level6 0 -intt_level6 1 -ret diff --git a/pq-crypto/kyber_r3/kyber512r3_kem.c b/pq-crypto/kyber_r3/kyber512r3_kem.c deleted file mode 100644 index c04fabeee9d..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_kem.c +++ /dev/null @@ -1,163 +0,0 @@ -#include -#include - -#include "kyber512r3_indcpa.h" -#include "kyber512r3_indcpa_avx2.h" -#include "kyber512r3_params.h" -#include "kyber512r3_symmetric.h" -#include "pq-crypto/s2n_pq.h" -#include "pq-crypto/s2n_pq_random.h" -#include "tls/s2n_kem.h" -#include "utils/s2n_safety.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - unsigned char *pk: pointer to output public key -* (an already allocated array of S2N_KYBER_512_R3_PUBLIC_KEY_BYTES bytes) -* - unsigned char *sk: pointer to output private key -* (an already allocated array of S2N_KYBER_512_R3_SECRET_KEY_BYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int s2n_kyber_512_r3_crypto_kem_keypair(const struct s2n_kem *kem, uint8_t *pk, uint8_t *sk) -{ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); -#if defined(S2N_KYBER512R3_AVX2_BMI2) - if (s2n_kyber512r3_is_avx2_bmi2_enabled()) { - POSIX_GUARD(indcpa_keypair_avx2(pk, sk)); - } else -#endif - { - POSIX_GUARD(indcpa_keypair(pk, sk)); - } - - for (size_t i = 0; i < S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES; i++) { - sk[i + S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES] = pk[i]; - } - sha3_256(sk + S2N_KYBER_512_R3_SECRET_KEY_BYTES - 2 * S2N_KYBER_512_R3_SYMBYTES, pk, S2N_KYBER_512_R3_PUBLIC_KEY_BYTES); - /* Value z for pseudo-random output on reject */ - POSIX_GUARD_RESULT(s2n_get_random_bytes(sk + S2N_KYBER_512_R3_SECRET_KEY_BYTES - S2N_KYBER_512_R3_SYMBYTES, S2N_KYBER_512_R3_SYMBYTES)); - return S2N_SUCCESS; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - unsigned char *ct: pointer to output cipher text -* (an already allocated array of S2N_KYBER_512_R3_CIPHERTEXT_BYTES bytes) -* - unsigned char *ss: pointer to output shared secret -* (an already allocated array of S2N_KYBER_512_R3_SHARED_SECRET_BYTES bytes) -* - const unsigned char *pk: pointer to input public key -* (an already allocated array of S2N_KYBER_512_R3_PUBLIC_KEY_BYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int s2n_kyber_512_r3_crypto_kem_enc(const struct s2n_kem *kem, uint8_t *ct, uint8_t *ss, - const uint8_t *pk) -{ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - uint8_t buf[2 * S2N_KYBER_512_R3_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2 * S2N_KYBER_512_R3_SYMBYTES]; - - POSIX_GUARD_RESULT(s2n_get_random_bytes(buf, S2N_KYBER_512_R3_SYMBYTES)); - /* Don't release system RNG output */ - sha3_256(buf, buf, S2N_KYBER_512_R3_SYMBYTES); - - /* Multitarget countermeasure for coins + contributory KEM */ - sha3_256(buf + S2N_KYBER_512_R3_SYMBYTES, pk, S2N_KYBER_512_R3_PUBLIC_KEY_BYTES); - sha3_512(kr, buf, 2 * S2N_KYBER_512_R3_SYMBYTES); - - /* coins are in kr+S2N_KYBER_512_R3_SYMBYTES */ -#if defined(S2N_KYBER512R3_AVX2_BMI2) - if (s2n_kyber512r3_is_avx2_bmi2_enabled()) { - indcpa_enc_avx2(ct, buf, pk, kr + S2N_KYBER_512_R3_SYMBYTES); - } else -#endif - { - indcpa_enc(ct, buf, pk, kr + S2N_KYBER_512_R3_SYMBYTES); - } - - /* overwrite coins in kr with H(c) */ - sha3_256(kr + S2N_KYBER_512_R3_SYMBYTES, ct, S2N_KYBER_512_R3_CIPHERTEXT_BYTES); - /* hash concatenation of pre-k and H(c) to k */ - shake256(ss, S2N_KYBER_512_R3_SSBYTES, kr, 2 * S2N_KYBER_512_R3_SYMBYTES); - return S2N_SUCCESS; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - unsigned char *ss: pointer to output shared secret -* (an already allocated array of S2N_KYBER_512_R3_SHARED_SECRET_BYTES bytes) -* - const unsigned char *ct: pointer to input cipher text -* (an already allocated array of S2N_KYBER_512_R3_CIPHERTEXT_BYTES bytes) -* - const unsigned char *sk: pointer to input private key -* (an already allocated array of S2N_KYBER_512_R3_SECRET_KEY_BYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int s2n_kyber_512_r3_crypto_kem_dec(const struct s2n_kem *kem, uint8_t *ss, const uint8_t *ct, - const uint8_t *sk) -{ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - uint8_t buf[2 * S2N_KYBER_512_R3_SYMBYTES]; - /* Will contain key, coins */ - uint8_t kr[2 * S2N_KYBER_512_R3_SYMBYTES]; - uint8_t cmp[S2N_KYBER_512_R3_CIPHERTEXT_BYTES]; - const uint8_t *pk = sk + S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES; - -#if defined(S2N_KYBER512R3_AVX2_BMI2) - if (s2n_kyber512r3_is_avx2_bmi2_enabled()) { - indcpa_dec_avx2(buf, ct, sk); - } else -#endif - { - indcpa_dec(buf, ct, sk); - } - - /* Multitarget countermeasure for coins + contributory KEM */ - for (size_t i = 0; i < S2N_KYBER_512_R3_SYMBYTES; i++) { - buf[S2N_KYBER_512_R3_SYMBYTES + i] = sk[S2N_KYBER_512_R3_SECRET_KEY_BYTES - 2 * S2N_KYBER_512_R3_SYMBYTES + i]; - } - sha3_512(kr, buf, 2 * S2N_KYBER_512_R3_SYMBYTES); - - /* coins are in kr+S2N_KYBER_512_R3_SYMBYTES */ -#if defined(S2N_KYBER512R3_AVX2_BMI2) - if (s2n_kyber512r3_is_avx2_bmi2_enabled()) { - indcpa_enc_avx2(cmp, buf, pk, kr + S2N_KYBER_512_R3_SYMBYTES); - } else -#endif - { - indcpa_enc(cmp, buf, pk, kr + S2N_KYBER_512_R3_SYMBYTES); - } - - /* If ct and cmp are equal (dont_copy = 1), decryption has succeeded and we do NOT overwrite pre-k below. - * If ct and cmp are not equal (dont_copy = 0), decryption fails and we do overwrite pre-k. */ - int dont_copy = s2n_constant_time_equals(ct, cmp, S2N_KYBER_512_R3_CIPHERTEXT_BYTES); - - /* overwrite coins in kr with H(c) */ - sha3_256(kr + S2N_KYBER_512_R3_SYMBYTES, ct, S2N_KYBER_512_R3_CIPHERTEXT_BYTES); - - /* Overwrite pre-k with z on re-encryption failure */ - POSIX_GUARD(s2n_constant_time_copy_or_dont(kr, sk + S2N_KYBER_512_R3_SECRET_KEY_BYTES - S2N_KYBER_512_R3_SYMBYTES, - S2N_KYBER_512_R3_SYMBYTES, dont_copy)); - - /* hash concatenation of pre-k and H(c) to k */ - shake256(ss, S2N_KYBER_512_R3_SSBYTES, kr, 2 * S2N_KYBER_512_R3_SYMBYTES); - return S2N_SUCCESS; -} diff --git a/pq-crypto/kyber_r3/kyber512r3_ntt.c b/pq-crypto/kyber_r3/kyber512r3_ntt.c deleted file mode 100644 index c38147bafe6..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_ntt.c +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_ntt.h" -#include "kyber512r3_reduce.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -const int16_t zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, - 2127, 1855, 1468, 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, - 732, 608, 1787, 411, 3124, 1758, 1223, 652, 2777, 1015, 2036, 1491, 3047, - 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, 2476, 3239, 3058, 830, - 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, 2226, - 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, - 1653, 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, - 418, 329, 3173, 3254, 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, - 1218, 1994, 2455, 220, 2142, 1670, 2144, 1799, 2051, 794, 1819, 2475, 2459, - 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628 -}; - -const int16_t zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, - 1278, 1530, 1185, 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, - 1285, 2007, 2719, 2726, 2232, 2512, 75, 156, 3000, 2911, 2980, 872, 2685, - 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, 1676, 1755, 460, 291, 235, - 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, 1275, 2652, - 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, - 2677, 2106, 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, - 829, 2946, 3065, 1325, 2756, 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, - 3127, 3042, 1907, 1836, 1517, 359, 758, 1441 -}; - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a * b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements -* of Zq -**************************************************/ -void ntt(int16_t r[256]) { - unsigned int len, start, j, k; - int16_t t, zeta; - - k = 1; - for (len = 128; len >= 2; len >>= 1) { - for (start = 0; start < 256; start = j + len) { - zeta = zetas[k++]; - for (j = start; j < start + len; ++j) { - t = fqmul(zeta, r[j + len]); - r[j + len] = r[j] - t; - r[j] = r[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt_tomont -* -* Description: Inplace inverse number-theoretic transform in Rq and -* multiplication by Montgomery factor 2^16. -* Input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t r[256]: pointer to input/output vector of elements -* of Zq -**************************************************/ -void invntt(int16_t r[256]) { - unsigned int start, len, j, k; - int16_t t, zeta; - - k = 0; - for (len = 2; len <= 128; len <<= 1) { - for (start = 0; start < 256; start = j + len) { - zeta = zetas_inv[k++]; - for (j = start; j < start + len; ++j) { - t = r[j]; - r[j] = barrett_reduce(t + r[j + len]); - r[j + len] = t - r[j + len]; - r[j + len] = fqmul(zeta, r[j + len]); - } - } - } - - for (j = 0; j < 256; ++j) { - r[j] = fqmul(r[j], zetas_inv[127]); - } -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} diff --git a/pq-crypto/kyber_r3/kyber512r3_ntt.h b/pq-crypto/kyber_r3/kyber512r3_ntt.h deleted file mode 100644 index 98d6235764b..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_ntt.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" - -#define zetas S2N_KYBER_512_R3_NAMESPACE(zetas) -extern const int16_t zetas[128]; - -#define zetas_inv S2N_KYBER_512_R3_NAMESPACE(zetas_inv) -extern const int16_t zetas_inv[128]; - -#define ntt S2N_KYBER_512_R3_NAMESPACE(ntt) -void ntt(int16_t poly[256]); - -#define invntt S2N_KYBER_512_R3_NAMESPACE(invntt) -void invntt(int16_t poly[256]); - -#define basemul S2N_KYBER_512_R3_NAMESPACE(basemul) -void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); diff --git a/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.S b/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.S deleted file mode 100644 index dc80086cb16..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.S +++ /dev/null @@ -1,218 +0,0 @@ -#include "kyber512r3_consts_avx2.h" - -// The small macros (.inc files) are combined with .S files directly -/*****.include "shuffle.inc"*****/ -/********************************/ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm -/********************************/ - -.macro mul rh0,rh1,rh2,rh3,zl0=15,zl1=15,zh0=2,zh1=2 -vpmullw %ymm\zl0,%ymm\rh0,%ymm12 -vpmullw %ymm\zl0,%ymm\rh1,%ymm13 - -vpmullw %ymm\zl1,%ymm\rh2,%ymm14 -vpmullw %ymm\zl1,%ymm\rh3,%ymm15 - -vpmulhw %ymm\zh0,%ymm\rh0,%ymm\rh0 -vpmulhw %ymm\zh0,%ymm\rh1,%ymm\rh1 - -vpmulhw %ymm\zh1,%ymm\rh2,%ymm\rh2 -vpmulhw %ymm\zh1,%ymm\rh3,%ymm\rh3 -.endm - -.macro reduce -vpmulhw %ymm0,%ymm12,%ymm12 -vpmulhw %ymm0,%ymm13,%ymm13 - -vpmulhw %ymm0,%ymm14,%ymm14 -vpmulhw %ymm0,%ymm15,%ymm15 -.endm - -.macro update rln,rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 -vpaddw %ymm\rh0,%ymm\rl0,%ymm\rln -vpsubw %ymm\rh0,%ymm\rl0,%ymm\rh0 -vpaddw %ymm\rh1,%ymm\rl1,%ymm\rl0 - -vpsubw %ymm\rh1,%ymm\rl1,%ymm\rh1 -vpaddw %ymm\rh2,%ymm\rl2,%ymm\rl1 -vpsubw %ymm\rh2,%ymm\rl2,%ymm\rh2 - -vpaddw %ymm\rh3,%ymm\rl3,%ymm\rl2 -vpsubw %ymm\rh3,%ymm\rl3,%ymm\rh3 - -vpsubw %ymm12,%ymm\rln,%ymm\rln -vpaddw %ymm12,%ymm\rh0,%ymm\rh0 -vpsubw %ymm13,%ymm\rl0,%ymm\rl0 - -vpaddw %ymm13,%ymm\rh1,%ymm\rh1 -vpsubw %ymm14,%ymm\rl1,%ymm\rl1 -vpaddw %ymm14,%ymm\rh2,%ymm\rh2 - -vpsubw %ymm15,%ymm\rl2,%ymm\rl2 -vpaddw %ymm15,%ymm\rh3,%ymm\rh3 -.endm - -.macro level0 off -vpbroadcastq (_ZETAS_EXP+0)*2(%rsi),%ymm15 -vmovdqa (64*\off+128)*2(%rdi),%ymm8 -vmovdqa (64*\off+144)*2(%rdi),%ymm9 -vmovdqa (64*\off+160)*2(%rdi),%ymm10 -vmovdqa (64*\off+176)*2(%rdi),%ymm11 -vpbroadcastq (_ZETAS_EXP+4)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (64*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (64*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (64*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (64*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -vmovdqa %ymm3,(64*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(64*\off+ 16)*2(%rdi) -vmovdqa %ymm5,(64*\off+ 32)*2(%rdi) -vmovdqa %ymm6,(64*\off+ 48)*2(%rdi) -vmovdqa %ymm8,(64*\off+128)*2(%rdi) -vmovdqa %ymm9,(64*\off+144)*2(%rdi) -vmovdqa %ymm10,(64*\off+160)*2(%rdi) -vmovdqa %ymm11,(64*\off+176)*2(%rdi) -.endm - -.macro levels1t6 off -/* level 1 */ -vmovdqa (_ZETAS_EXP+224*\off+16)*2(%rsi),%ymm15 -vmovdqa (128*\off+ 64)*2(%rdi),%ymm8 -vmovdqa (128*\off+ 80)*2(%rdi),%ymm9 -vmovdqa (128*\off+ 96)*2(%rdi),%ymm10 -vmovdqa (128*\off+112)*2(%rdi),%ymm11 -vmovdqa (_ZETAS_EXP+224*\off+32)*2(%rsi),%ymm2 - -mul 8,9,10,11 - -vmovdqa (128*\off+ 0)*2(%rdi),%ymm4 -vmovdqa (128*\off+ 16)*2(%rdi),%ymm5 -vmovdqa (128*\off+ 32)*2(%rdi),%ymm6 -vmovdqa (128*\off+ 48)*2(%rdi),%ymm7 - -reduce -update 3,4,5,6,7,8,9,10,11 - -/* level 2 */ -shuffle8 5,10,7,10 -shuffle8 6,11,5,11 - -vmovdqa (_ZETAS_EXP+224*\off+48)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+64)*2(%rsi),%ymm2 - -mul 7,10,5,11 - -shuffle8 3,8,6,8 -shuffle8 4,9,3,9 - -reduce -update 4,6,8,3,9,7,10,5,11 - -/* level 3 */ -shuffle4 8,5,9,5 -shuffle4 3,11,8,11 - -vmovdqa (_ZETAS_EXP+224*\off+80)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+96)*2(%rsi),%ymm2 - -mul 9,5,8,11 - -shuffle4 4,7,3,7 -shuffle4 6,10,4,10 - -reduce -update 6,3,7,4,10,9,5,8,11 - -/* level 4 */ -shuffle2 7,8,10,8 -shuffle2 4,11,7,11 - -vmovdqa (_ZETAS_EXP+224*\off+112)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+128)*2(%rsi),%ymm2 - -mul 10,8,7,11 - -shuffle2 6,9,4,9 -shuffle2 3,5,6,5 - -reduce -update 3,4,9,6,5,10,8,7,11 - -/* level 5 */ -shuffle1 9,7,5,7 -shuffle1 6,11,9,11 - -vmovdqa (_ZETAS_EXP+224*\off+144)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+160)*2(%rsi),%ymm2 - -mul 5,7,9,11 - -shuffle1 3,10,6,10 -shuffle1 4,8,3,8 - -reduce -update 4,6,10,3,8,5,7,9,11 - -/* level 6 */ -vmovdqa (_ZETAS_EXP+224*\off+176)*2(%rsi),%ymm14 -vmovdqa (_ZETAS_EXP+224*\off+208)*2(%rsi),%ymm15 -vmovdqa (_ZETAS_EXP+224*\off+192)*2(%rsi),%ymm8 -vmovdqa (_ZETAS_EXP+224*\off+224)*2(%rsi),%ymm2 - -mul 10,3,9,11,14,15,8,2 - -reduce -update 8,4,6,5,7,10,3,9,11 - -vmovdqa %ymm8,(128*\off+ 0)*2(%rdi) -vmovdqa %ymm4,(128*\off+ 16)*2(%rdi) -vmovdqa %ymm10,(128*\off+ 32)*2(%rdi) -vmovdqa %ymm3,(128*\off+ 48)*2(%rdi) -vmovdqa %ymm6,(128*\off+ 64)*2(%rdi) -vmovdqa %ymm5,(128*\off+ 80)*2(%rdi) -vmovdqa %ymm9,(128*\off+ 96)*2(%rdi) -vmovdqa %ymm11,(128*\off+112)*2(%rdi) -.endm - -.text -.global cdecl(ntt_avx2_asm) -cdecl(ntt_avx2_asm): -vmovdqa _16XQ*2(%rsi),%ymm0 - -level0 0 -level0 1 - -levels1t6 0 -levels1t6 1 - -ret diff --git a/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.h b/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.h deleted file mode 100644 index 36161323580..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_ntt_avx2.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define ntt_avx2_asm S2N_KYBER_512_R3_NAMESPACE(ntt_avx2_asm) -void ntt_avx2_asm(__m256i *r, const __m256i *qdata); - -#define invntt_avx2_asm S2N_KYBER_512_R3_NAMESPACE(invntt_avx2_asm) -void invntt_avx2_asm(__m256i *r, const __m256i *qdata); - -#define nttunpack_avx2_asm S2N_KYBER_512_R3_NAMESPACE(nttunpack_avx2_asm) -void nttunpack_avx2_asm(__m256i *r, const __m256i *qdata); - -#define basemul_avx2_asm S2N_KYBER_512_R3_NAMESPACE(basemul_avx2_asm) -void basemul_avx2_asm(__m256i *r, - const __m256i *a, - const __m256i *b, - const __m256i *qdata); - -#define ntttobytes_avx2_asm S2N_KYBER_512_R3_NAMESPACE(ntttobytes_avx2_asm) -void ntttobytes_avx2_asm(uint8_t *r, const __m256i *a, const __m256i *qdata); - -#define nttfrombytes_avx2_asm S2N_KYBER_512_R3_NAMESPACE(nttfrombytes_avx2_asm) -void nttfrombytes_avx2_asm(__m256i *r, const uint8_t *a, const __m256i *qdata); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_params.h b/pq-crypto/kyber_r3/kyber512r3_params.h deleted file mode 100644 index 76c1578584b..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_params.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "pq-crypto/s2n_pq_asm.h" - -/* All kyber512r3 functions and global variables in the pq-crypto/kyber_r3 directory - * should be defined using the namespace macro to avoid symbol collisions. For example, - * in foo.h, declare a function as follows: - * - * #define foo_function S2N_KYBER_512_R3_NAMESPACE(foo_function) - * int foo_function(int foo_argument); */ -#define S2N_KYBER_512_R3_NAMESPACE(s) s2n_kyber_512_r3_##s - -#define S2N_KYBER_512_R3_K 2 - -#define S2N_KYBER_512_R3_N 256 -#define S2N_KYBER_512_R3_Q 3329 - -#define S2N_KYBER_512_R3_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define S2N_KYBER_512_R3_SSBYTES 32 /* size in bytes of shared key */ - -#define S2N_KYBER_512_R3_POLYBYTES 384 -#define S2N_KYBER_512_R3_POLYVECBYTES (S2N_KYBER_512_R3_K * S2N_KYBER_512_R3_POLYBYTES) - -#define S2N_KYBER_512_R3_ETA1 3 -#define S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES 128 -#define S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES 640 - -#define S2N_KYBER_512_R3_ETA2 2 - -#define S2N_KYBER_512_R3_INDCPA_MSGBYTES S2N_KYBER_512_R3_SYMBYTES -#define S2N_KYBER_512_R3_INDCPA_PUBLICKEYBYTES (S2N_KYBER_512_R3_POLYVECBYTES + S2N_KYBER_512_R3_SYMBYTES) -#define S2N_KYBER_512_R3_INDCPA_SECRETKEYBYTES (S2N_KYBER_512_R3_POLYVECBYTES) -#define S2N_KYBER_512_R3_INDCPA_BYTES (S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES + S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES) diff --git a/pq-crypto/kyber_r3/kyber512r3_poly.c b/pq-crypto/kyber_r3/kyber512r3_poly.c deleted file mode 100644 index 61a725ec44b..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_poly.c +++ /dev/null @@ -1,302 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly.h" -#include "kyber512r3_ntt.h" -#include "kyber512r3_reduce.h" -#include "kyber512r3_cbd.h" -#include "kyber512r3_symmetric.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_compress(uint8_t r[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES], poly *a) { - unsigned int i, j; - uint8_t t[8]; - - poly_csubq(a); - - for (i = 0; i < S2N_KYBER_512_R3_N / 8; i++) { - for (j = 0; j < 8; j++) { - t[j] = ((((uint16_t)a->coeffs[8 * i + j] << 4) + S2N_KYBER_512_R3_Q / 2) / S2N_KYBER_512_R3_Q) & 15; - } - - r[0] = t[0] | (t[1] << 4); - r[1] = t[2] | (t[3] << 4); - r[2] = t[4] | (t[5] << 4); - r[3] = t[6] | (t[7] << 4); - r += 4; - } -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of length S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void poly_decompress(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES]) { - unsigned int i; - - for (i = 0; i < S2N_KYBER_512_R3_N / 2; i++) { - r->coeffs[2 * i + 0] = (((uint16_t)(a[0] & 15) * S2N_KYBER_512_R3_Q) + 8) >> 4; - r->coeffs[2 * i + 1] = (((uint16_t)(a[0] >> 4) * S2N_KYBER_512_R3_Q) + 8) >> 4; - a += 1; - } -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYBYTES bytes) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes(uint8_t r[S2N_KYBER_512_R3_POLYBYTES], poly *a) { - unsigned int i; - - poly_csubq(a); - - for (i = 0; i < S2N_KYBER_512_R3_N / 2; i++) { - uint16_t t0 = a->coeffs[2 * i]; - uint16_t t1 = a->coeffs[2 * i + 1]; - r[3 * i + 0] = (t0 >> 0); - r[3 * i + 1] = (t0 >> 8) | (t1 << 4); - r[3 * i + 2] = (t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of S2N_KYBER_512_R3_POLYBYTES bytes) -**************************************************/ -void poly_frombytes(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYBYTES]) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N / 2; i++) { - r->coeffs[2 * i] = ((a[3 * i + 0] >> 0) | ((uint16_t)a[3 * i + 1] << 8)) & 0xFFF; - r->coeffs[2 * i + 1] = ((a[3 * i + 1] >> 4) | ((uint16_t)a[3 * i + 2] << 4)) & 0xFFF; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg(poly *r, const uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES]) { - unsigned int i, j; - int16_t mask; - - for (i = 0; i < S2N_KYBER_512_R3_N / 8; i++) { - for (j = 0; j < 8; j++) { - mask = -(int16_t)((msg[i] >> j) & 1); - r->coeffs[8 * i + j] = mask & ((S2N_KYBER_512_R3_Q + 1) / 2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg(uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES], poly *a) { - unsigned int i, j; - uint16_t t; - - poly_csubq(a); - - for (i = 0; i < S2N_KYBER_512_R3_N / 8; i++) { - msg[i] = 0; - for (j = 0; j < 8; j++) { - t = ((((uint16_t)a->coeffs[8 * i + j] << 1) + S2N_KYBER_512_R3_Q / 2) / S2N_KYBER_512_R3_Q) & 1; - msg[i] |= t << j; - } - } -} - -/************************************************* -* Name: poly_getnoise_eta1 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter S2N_KYBER_512_R3_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length S2N_KYBER_512_R3_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce) { - uint8_t buf[S2N_KYBER_512_R3_ETA1 * S2N_KYBER_512_R3_N / 4]; - shake256_prf(buf, sizeof(buf), seed, nonce); - cbd_eta1(r, buf); -} - -/************************************************* -* Name: poly_getnoise_eta2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter S2N_KYBER_512_R3_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length S2N_KYBER_512_R3_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce) { - uint8_t buf[S2N_KYBER_512_R3_ETA2 * S2N_KYBER_512_R3_N / 4]; - shake256_prf(buf, sizeof(buf), seed, nonce); - cbd_eta2(r, buf); -} - - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt(poly *r) { - ntt(r->coeffs); - poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt_tomont -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont(poly *r) { - invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul_montgomery -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N / 4; i++) { - basemul(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], zetas[64 + i]); - basemul(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], - -zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_tomont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont(poly *r) { - unsigned int i; - const int16_t f = (1ULL << 32) % S2N_KYBER_512_R3_Q; - for (i = 0; i < S2N_KYBER_512_R3_N; i++) { - r->coeffs[i] = montgomery_reduce((int32_t)r->coeffs[i] * f); - } -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce(poly *r) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N; i++) { - r->coeffs[i] = barrett_reduce(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of a polynomial. For details of conditional subtraction -* of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_csubq(poly *r) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N; i++) { - r->coeffs[i] = csubq(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add(poly *r, const poly *a, const poly *b) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N; i++) { - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub(poly *r, const poly *a, const poly *b) { - unsigned int i; - for (i = 0; i < S2N_KYBER_512_R3_N; i++) { - r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; - } -} diff --git a/pq-crypto/kyber_r3/kyber512r3_poly.h b/pq-crypto/kyber_r3/kyber512r3_poly.h deleted file mode 100644 index da43766e51b..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_poly.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" - -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -#define poly S2N_KYBER_512_R3_NAMESPACE(poly) -typedef struct { - int16_t coeffs[S2N_KYBER_512_R3_N]; -} poly; - -#define poly_compress S2N_KYBER_512_R3_NAMESPACE(poly_compress) -void poly_compress(uint8_t r[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES], poly *a); - -#define poly_decompress S2N_KYBER_512_R3_NAMESPACE(poly_decompress) -void poly_decompress(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes S2N_KYBER_512_R3_NAMESPACE(poly_tobytes) -void poly_tobytes(uint8_t r[S2N_KYBER_512_R3_POLYBYTES], poly *a); - -#define poly_frombytes S2N_KYBER_512_R3_NAMESPACE(poly_frombytes) -void poly_frombytes(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYBYTES]); - -#define poly_frommsg S2N_KYBER_512_R3_NAMESPACE(poly_frommsg) -void poly_frommsg(poly *r, const uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES]); - -#define poly_tomsg S2N_KYBER_512_R3_NAMESPACE(poly_tomsg) -void poly_tomsg(uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES], poly *r); - -#define poly_getnoise_eta1 S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta1) -void poly_getnoise_eta1(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2 S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta2) -void poly_getnoise_eta2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce); - -#define poly_ntt S2N_KYBER_512_R3_NAMESPACE(poly_ntt) -void poly_ntt(poly *r); - -#define poly_invntt_tomont S2N_KYBER_512_R3_NAMESPACE(poly_invntt_tomont) -void poly_invntt_tomont(poly *r); - -#define poly_basemul_montgomery S2N_KYBER_512_R3_NAMESPACE(poly_basemul_montgomery) -void poly_basemul_montgomery(poly *r, const poly *a, const poly *b); - -#define poly_tomont S2N_KYBER_512_R3_NAMESPACE(poly_tomont) -void poly_tomont(poly *r); - -#define poly_reduce S2N_KYBER_512_R3_NAMESPACE(poly_reduce) -void poly_reduce(poly *r); - -#define poly_csubq S2N_KYBER_512_R3_NAMESPACE(poly_csubq) -void poly_csubq(poly *r); - -#define poly_add S2N_KYBER_512_R3_NAMESPACE(poly_add) -void poly_add(poly *r, const poly *a, const poly *b); - -#define poly_sub S2N_KYBER_512_R3_NAMESPACE(poly_sub) -void poly_sub(poly *r, const poly *a, const poly *b); diff --git a/pq-crypto/kyber_r3/kyber512r3_poly_avx2.c b/pq-crypto/kyber_r3/kyber512r3_poly_avx2.c deleted file mode 100644 index aa961ff4030..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_poly_avx2.c +++ /dev/null @@ -1,453 +0,0 @@ -#include -#include -#include "kyber512r3_align_avx2.h" -#include "kyber512r3_consts_avx2.h" -#include "kyber512r3_poly_avx2.h" -#include "kyber512r3_ntt_avx2.h" -#include "kyber512r3_reduce_avx2.h" -#include "kyber512r3_cbd_avx2.h" -#include "kyber512r3_fips202.h" -#include "kyber512r3_fips202x4_avx2.h" -#include "kyber512r3_symmetric.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -/************************************************* -* Name: poly_compress_avx2 -* -* Description: Compression and subsequent serialization of a polynomial. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce_avx2(). -* -* Arguments: - uint8_t *r: pointer to output byte array -* (of length S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES) -* - const poly *a: pointer to input polynomial -**************************************************/ -void poly_compress_avx2(uint8_t r[128], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2, f3; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i shift1 = _mm256_set1_epi16(1 << 9); - const __m256i mask = _mm256_set1_epi16(15); - const __m256i shift2 = _mm256_set1_epi16((16 << 8) + 1); - const __m256i permdidx = _mm256_set_epi32(7,3,6,2,5,1,4,0); - - for(i=0;ivec[4*i+0]); - f1 = _mm256_load_si256(&a->vec[4*i+1]); - f2 = _mm256_load_si256(&a->vec[4*i+2]); - f3 = _mm256_load_si256(&a->vec[4*i+3]); - f0 = _mm256_mulhi_epi16(f0,v); - f1 = _mm256_mulhi_epi16(f1,v); - f2 = _mm256_mulhi_epi16(f2,v); - f3 = _mm256_mulhi_epi16(f3,v); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f1 = _mm256_mulhrs_epi16(f1,shift1); - f2 = _mm256_mulhrs_epi16(f2,shift1); - f3 = _mm256_mulhrs_epi16(f3,shift1); - f0 = _mm256_and_si256(f0,mask); - f1 = _mm256_and_si256(f1,mask); - f2 = _mm256_and_si256(f2,mask); - f3 = _mm256_and_si256(f3,mask); - f0 = _mm256_packus_epi16(f0,f1); - f2 = _mm256_packus_epi16(f2,f3); - f0 = _mm256_maddubs_epi16(f0,shift2); - f2 = _mm256_maddubs_epi16(f2,shift2); - f0 = _mm256_packus_epi16(f0,f2); - f0 = _mm256_permutevar8x32_epi32(f0,permdidx); - // correcting cast-align error - // old version: _mm256_storeu_si256((__m256i *)&r[32*i],f0); - _mm256_storeu_si256((void *)&r[32*i],f0); - } -} - -void poly_decompress_avx2(poly * restrict r, const uint8_t a[128]) -{ - unsigned int i; - __m128i t; - __m256i f; - const __m256i q = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i shufbidx = _mm256_set_epi8(7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4, - 3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0); - const __m256i mask = _mm256_set1_epi32(0x00F0000F); - const __m256i shift = _mm256_set1_epi32((128 << 16) + 2048); - - for(i=0;ivec[i],f); - } -} - -/************************************************* -* Name: poly_tobytes_avx2 -* -* Description: Serialization of a polynomial in NTT representation. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce_avx2(). The coefficients are orderd as output by -* poly_ntt_avx2(); the serialized output coefficients are in bitreversed -* order. -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYBYTES bytes) -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tobytes_avx2(uint8_t r[S2N_KYBER_512_R3_POLYBYTES], const poly *a) -{ - ntttobytes_avx2_asm(r, a->vec, qdata.vec); -} - -/************************************************* -* Name: poly_frombytes_avx2 -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes_avx2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array -* (of S2N_KYBER_512_R3_POLYBYTES bytes) -**************************************************/ -void poly_frombytes_avx2(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYBYTES]) -{ - nttfrombytes_avx2_asm(r->vec, a, qdata.vec); -} - -/************************************************* -* Name: poly_frommsg_avx2 -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void poly_frommsg_avx2(poly * restrict r, const uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES]) -{ - __m256i f, g0, g1, g2, g3, h0, h1, h2, h3; - const __m256i shift = _mm256_broadcastsi128_si256(_mm_set_epi32(0,1,2,3)); - const __m256i idx = _mm256_broadcastsi128_si256(_mm_set_epi8(15,14,11,10,7,6,3,2,13,12,9,8,5,4,1,0)); - const __m256i hqs = _mm256_set1_epi16((S2N_KYBER_512_R3_Q+1)/2); - -#define FROMMSG64(i) \ - g3 = _mm256_shuffle_epi32(f,0x55*i); \ - g3 = _mm256_sllv_epi32(g3,shift); \ - g3 = _mm256_shuffle_epi8(g3,idx); \ - g0 = _mm256_slli_epi16(g3,12); \ - g1 = _mm256_slli_epi16(g3,8); \ - g2 = _mm256_slli_epi16(g3,4); \ - g0 = _mm256_srai_epi16(g0,15); \ - g1 = _mm256_srai_epi16(g1,15); \ - g2 = _mm256_srai_epi16(g2,15); \ - g3 = _mm256_srai_epi16(g3,15); \ - g0 = _mm256_and_si256(g0,hqs); /* 19 18 17 16 3 2 1 0 */ \ - g1 = _mm256_and_si256(g1,hqs); /* 23 22 21 20 7 6 5 4 */ \ - g2 = _mm256_and_si256(g2,hqs); /* 27 26 25 24 11 10 9 8 */ \ - g3 = _mm256_and_si256(g3,hqs); /* 31 30 29 28 15 14 13 12 */ \ - h0 = _mm256_unpacklo_epi64(g0,g1); \ - h2 = _mm256_unpackhi_epi64(g0,g1); \ - h1 = _mm256_unpacklo_epi64(g2,g3); \ - h3 = _mm256_unpackhi_epi64(g2,g3); \ - g0 = _mm256_permute2x128_si256(h0,h1,0x20); \ - g2 = _mm256_permute2x128_si256(h0,h1,0x31); \ - g1 = _mm256_permute2x128_si256(h2,h3,0x20); \ - g3 = _mm256_permute2x128_si256(h2,h3,0x31); \ - _mm256_store_si256(&r->vec[0+2*i+0],g0); \ - _mm256_store_si256(&r->vec[0+2*i+1],g1); \ - _mm256_store_si256(&r->vec[8+2*i+0],g2); \ - _mm256_store_si256(&r->vec[8+2*i+1],g3) - - // correcting cast-align and cast-qual errors - // old version: f = _mm256_loadu_si256((__m256i *)msg); - f = _mm256_loadu_si256((const void *)msg); - FROMMSG64(0); - FROMMSG64(1); - FROMMSG64(2); - FROMMSG64(3); -} - -/************************************************* -* Name: poly_tomsg_avx2 -* -* Description: Convert polynomial to 32-byte message. -* The coefficients of the input polynomial are assumed to -* lie in the invertal [0,q], i.e. the polynomial must be reduced -* by poly_reduce_avx2(). -* -* Arguments: - uint8_t *msg: pointer to output message -* - poly *a: pointer to input polynomial -**************************************************/ -void poly_tomsg_avx2(uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES], const poly * restrict a) -{ - unsigned int i; - uint32_t small; - __m256i f0, f1, g0, g1; - const __m256i hq = _mm256_set1_epi16((S2N_KYBER_512_R3_Q - 1)/2); - const __m256i hhq = _mm256_set1_epi16((S2N_KYBER_512_R3_Q - 1)/4); - - for(i=0;ivec[2*i+0]); - f1 = _mm256_load_si256(&a->vec[2*i+1]); - f0 = _mm256_sub_epi16(hq, f0); - f1 = _mm256_sub_epi16(hq, f1); - g0 = _mm256_srai_epi16(f0, 15); - g1 = _mm256_srai_epi16(f1, 15); - f0 = _mm256_xor_si256(f0, g0); - f1 = _mm256_xor_si256(f1, g1); - f0 = _mm256_sub_epi16(f0, hhq); - f1 = _mm256_sub_epi16(f1, hhq); - f0 = _mm256_packs_epi16(f0, f1); - f0 = _mm256_permute4x64_epi64(f0, 0xD8); - small = _mm256_movemask_epi8(f0); - memcpy(&msg[4*i], &small, 4); - } -} - -/************************************************* -* Name: poly_getnoise_eta1_avx2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter S2N_KYBER_512_R3_ETA1 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length S2N_KYBER_512_R3_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta1_avx2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(S2N_KYBER_512_R3_ETA1*S2N_KYBER_512_R3_N/4+32) buf; // +32 bytes as required by poly_cbd_eta1_avx2 - shake256_prf(buf.coeffs, S2N_KYBER_512_R3_ETA1*S2N_KYBER_512_R3_N/4, seed, nonce); - poly_cbd_eta1_avx2(r, buf.vec); -} - -/************************************************* -* Name: poly_getnoise_eta2_avx2 -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter S2N_KYBER_512_R3_ETA2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed -* (of length S2N_KYBER_512_R3_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void poly_getnoise_eta2_avx2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce) -{ - ALIGNED_UINT8(S2N_KYBER_512_R3_ETA2*S2N_KYBER_512_R3_N/4) buf; - shake256_prf(buf.coeffs, S2N_KYBER_512_R3_ETA2*S2N_KYBER_512_R3_N/4, seed, nonce); - poly_cbd_eta2_avx2(r, buf.vec); -} - -#define NOISE_NBLOCKS ((S2N_KYBER_512_R3_ETA1*S2N_KYBER_512_R3_N/4+S2N_KYBER_512_R3_SHAKE256_RATE-1)/S2N_KYBER_512_R3_SHAKE256_RATE) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*S2N_KYBER_512_R3_SHAKE256_RATE) buf[4]; - __m256i f; - keccakx4_state state; - - // correcting cast-align and cast-qual errors - // old version: f = _mm256_loadu_si256((__m256i *)seed); - f = _mm256_loadu_si256((const void *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - - poly_cbd_eta1_avx2(r0, buf[0].vec); - poly_cbd_eta1_avx2(r1, buf[1].vec); - poly_cbd_eta1_avx2(r2, buf[2].vec); - poly_cbd_eta1_avx2(r3, buf[3].vec); -} - -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3) -{ - ALIGNED_UINT8(NOISE_NBLOCKS*S2N_KYBER_512_R3_SHAKE256_RATE) buf[4]; - __m256i f; - keccakx4_state state; - - // correcting cast-align and cast-qual errors - // old version: f = _mm256_loadu_si256((__m256i *)seed); - f = _mm256_loadu_si256((const void *)seed); - _mm256_store_si256(buf[0].vec, f); - _mm256_store_si256(buf[1].vec, f); - _mm256_store_si256(buf[2].vec, f); - _mm256_store_si256(buf[3].vec, f); - - buf[0].coeffs[32] = nonce0; - buf[1].coeffs[32] = nonce1; - buf[2].coeffs[32] = nonce2; - buf[3].coeffs[32] = nonce3; - - shake256x4_absorb_once(&state, buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, 33); - shake256x4_squeezeblocks(buf[0].coeffs, buf[1].coeffs, buf[2].coeffs, buf[3].coeffs, NOISE_NBLOCKS, &state); - - poly_cbd_eta1_avx2(r0, buf[0].vec); - poly_cbd_eta1_avx2(r1, buf[1].vec); - poly_cbd_eta2_avx2(r2, buf[2].vec); - poly_cbd_eta2_avx2(r3, buf[3].vec); -} - -/************************************************* -* Name: poly_ntt_avx2 -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place. -* Input coefficients assumed to be in normal order, -* output coefficients are in special order that is natural -* for the vectorization. Input coefficients are assumed to be -* bounded by q in absolute value, output coefficients are bounded -* by 16118 in absolute value. -* -* Arguments: - poly *r: pointer to in/output polynomial -**************************************************/ -void poly_ntt_avx2(poly *r) -{ - ntt_avx2_asm(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_invntt_tomont_avx2 -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) -* of a polynomial in place; -* Input coefficients assumed to be in special order from vectorized -* forward ntt, output in normal order. Input coefficients can be -* arbitrary 16-bit integers, output coefficients are bounded by 14870 -* in absolute value. -* -* Arguments: - poly *a: pointer to in/output polynomial -**************************************************/ -void poly_invntt_tomont_avx2(poly *r) -{ - invntt_avx2_asm(r->vec, qdata.vec); -} - -void poly_nttunpack_avx2(poly *r) -{ - nttunpack_avx2_asm(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_basemul_montgomery_avx2 -* -* Description: Multiplication of two polynomials in NTT domain. -* One of the input polynomials needs to have coefficients -* bounded by q, the other polynomial can have arbitrary -* coefficients. Output coefficients are bounded by 6656. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_basemul_montgomery_avx2(poly *r, const poly *a, const poly *b) -{ - basemul_avx2_asm(r->vec, a->vec, b->vec, qdata.vec); -} - -/************************************************* -* Name: poly_tomont_avx2 -* -* Description: Inplace conversion of all coefficients of a polynomial -* from normal domain to Montgomery domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_tomont_avx2(poly *r) -{ - tomont_avx2_asm(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_reduce_avx2 -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void poly_reduce_avx2(poly *r) -{ - reduce_avx2_asm(r->vec, qdata.vec); -} - -/************************************************* -* Name: poly_add_avx2 -* -* Description: Add two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_add_avx2(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_add_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} - -/************************************************* -* Name: poly_sub_avx2 -* -* Description: Subtract two polynomials. No modular reduction -* is performed. -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void poly_sub_avx2(poly *r, const poly *a, const poly *b) -{ - unsigned int i; - __m256i f0, f1; - - for(i=0;ivec[i]); - f1 = _mm256_load_si256(&b->vec[i]); - f0 = _mm256_sub_epi16(f0, f1); - _mm256_store_si256(&r->vec[i], f0); - } -} -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_poly_avx2.h b/pq-crypto/kyber_r3/kyber512r3_poly_avx2.h deleted file mode 100644 index bd6e857f79c..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_poly_avx2.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_align_avx2.h" -#include "kyber512r3_params.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#define poly S2N_KYBER_512_R3_NAMESPACE(poly) -typedef ALIGNED_INT16(S2N_KYBER_512_R3_N) poly; - -#define poly_compress_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_compress_avx2) -void poly_compress_avx2(uint8_t r[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES], const poly *a); - -#define poly_decompress_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_decompress_avx2) -void poly_decompress_avx2(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYCOMPRESSEDBYTES]); - -#define poly_tobytes_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_tobytes_avx2) -void poly_tobytes_avx2(uint8_t r[S2N_KYBER_512_R3_POLYBYTES], const poly *a); - -#define poly_frombytes_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_frombytes_avx2) -void poly_frombytes_avx2(poly *r, const uint8_t a[S2N_KYBER_512_R3_POLYBYTES]); - -#define poly_frommsg_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_frommsg_avx2) -void poly_frommsg_avx2(poly *r, const uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES]); - -#define poly_tomsg_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_tomsg_avx2) -void poly_tomsg_avx2(uint8_t msg[S2N_KYBER_512_R3_INDCPA_MSGBYTES], const poly *r); - -#define poly_getnoise_eta1_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta1_avx2) -void poly_getnoise_eta1_avx2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta2_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta2_avx2) -void poly_getnoise_eta2_avx2(poly *r, const uint8_t seed[S2N_KYBER_512_R3_SYMBYTES], uint8_t nonce); - -#define poly_getnoise_eta1_4x S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta2_4x) -void poly_getnoise_eta1_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); - -#define poly_getnoise_eta1122_4x S2N_KYBER_512_R3_NAMESPACE(poly_getnoise_eta1122_4x) -void poly_getnoise_eta1122_4x(poly *r0, - poly *r1, - poly *r2, - poly *r3, - const uint8_t seed[32], - uint8_t nonce0, - uint8_t nonce1, - uint8_t nonce2, - uint8_t nonce3); - -#define poly_ntt_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_ntt_avx2) -void poly_ntt_avx2(poly *r); - -#define poly_invntt_tomont_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_invntt_tomont_avx2) -void poly_invntt_tomont_avx2(poly *r); - -#define poly_nttunpack_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_nttunpack_avx2) -void poly_nttunpack_avx2(poly *r); - -#define poly_basemul_montgomery_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_basemul_montgomery_avx2) -void poly_basemul_montgomery_avx2(poly *r, const poly *a, const poly *b); - -#define poly_tomont_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_tomont_avx2) -void poly_tomont_avx2(poly *r); - -#define poly_reduce_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_reduce_avx2) -void poly_reduce_avx2(poly *r); - -#define poly_add_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_add_avx2) -void poly_add_avx2(poly *r, const poly *a, const poly *b); - -#define poly_sub_avx2 S2N_KYBER_512_R3_NAMESPACE(poly_sub_avx2) -void poly_sub_avx2(poly *r, const poly *a, const poly *b); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_polyvec.c b/pq-crypto/kyber_r3/kyber512r3_polyvec.c deleted file mode 100644 index fb3b54f8ccd..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_polyvec.c +++ /dev/null @@ -1,188 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly.h" -#include "kyber512r3_polyvec.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress(uint8_t r[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES], polyvec *a) { - polyvec_csubq(a); - - uint16_t t[4]; - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - for (unsigned int j = 0; j < S2N_KYBER_512_R3_N / 4; j++) { - for (unsigned int k = 0; k < 4; k++) - t[k] = ((((uint32_t)a->vec[i].coeffs[4 * j + k] << 10) + S2N_KYBER_512_R3_Q / 2) - / S2N_KYBER_512_R3_Q) & 0x3ff; - - r[0] = (t[0] >> 0); - r[1] = (t[0] >> 8) | (t[1] << 2); - r[2] = (t[1] >> 6) | (t[2] << 4); - r[3] = (t[2] >> 4) | (t[3] << 6); - r[4] = (t[3] >> 2); - r += 5; - } - } -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES]) { - uint16_t t[4]; - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - for (unsigned int j = 0; j < S2N_KYBER_512_R3_N / 4; j++) { - t[0] = (a[0] >> 0) | ((uint16_t)a[1] << 8); - t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6); - t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4); - t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2); - a += 5; - - for (unsigned int k = 0; k < 4; k++) { - r->vec[i].coeffs[4 * j + k] = ((uint32_t)(t[k] & 0x3FF) * S2N_KYBER_512_R3_Q + 512) >> 10; - } - } - } -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYVECBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes(uint8_t r[S2N_KYBER_512_R3_POLYVECBYTES], polyvec *a) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_tobytes(r + i * S2N_KYBER_512_R3_POLYBYTES, &a->vec[i]); - } -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length S2N_KYBER_512_R3_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECBYTES]) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_frombytes(&r->vec[i], a + i * S2N_KYBER_512_R3_POLYBYTES); - } -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt(polyvec *r) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_ntt(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_invntt_tomont -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont(polyvec *r) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_invntt_tomont(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_pointwise_acc_montgomery -* -* Description: Pointwise multiply elements of a and b, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_pointwise_acc_montgomery(poly *r, const polyvec *a, const polyvec *b) { - poly t; - - poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]); - for (unsigned int i = 1; i < S2N_KYBER_512_R3_K; i++) { - poly_basemul_montgomery(&t, &a->vec[i], &b->vec[i]); - poly_add(r, r, &t); - } - - poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce(polyvec *r) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_reduce(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in -* reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void polyvec_csubq(polyvec *r) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_csubq(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { - for (unsigned int i = 0; i < S2N_KYBER_512_R3_K; i++) { - poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); - } -} diff --git a/pq-crypto/kyber_r3/kyber512r3_polyvec.h b/pq-crypto/kyber_r3/kyber512r3_polyvec.h deleted file mode 100644 index 797f3c0d31c..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_polyvec.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly.h" - -#define polyvec S2N_KYBER_512_R3_NAMESPACE(polyvec) -typedef struct { - poly vec[S2N_KYBER_512_R3_K]; -} polyvec; - -#define polyvec_compress S2N_KYBER_512_R3_NAMESPACE(polyvec_compress) -void polyvec_compress(uint8_t r[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES], polyvec *a); - -#define polyvec_decompress S2N_KYBER_512_R3_NAMESPACE(polyvec_decompress) -void polyvec_decompress(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES]); - -#define polyvec_tobytes S2N_KYBER_512_R3_NAMESPACE(polyvec_tobytes) -void polyvec_tobytes(uint8_t r[S2N_KYBER_512_R3_POLYVECBYTES], polyvec *a); - -#define polyvec_frombytes S2N_KYBER_512_R3_NAMESPACE(polyvec_frombytes) -void polyvec_frombytes(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECBYTES]); - -#define polyvec_ntt S2N_KYBER_512_R3_NAMESPACE(polyvec_ntt) -void polyvec_ntt(polyvec *r); - -#define polyvec_invntt_tomont S2N_KYBER_512_R3_NAMESPACE(polyvec_invntt_tomont) -void polyvec_invntt_tomont(polyvec *r); - -#define polyvec_pointwise_acc_montgomery S2N_KYBER_512_R3_NAMESPACE(polyvec_pointwise_acc_montgomery) -void polyvec_pointwise_acc_montgomery(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce S2N_KYBER_512_R3_NAMESPACE(polyvec_reduce) -void polyvec_reduce(polyvec *r); - -#define polyvec_csubq S2N_KYBER_512_R3_NAMESPACE(polyvec_csubq) -void polyvec_csubq(polyvec *r); - -#define polyvec_add S2N_KYBER_512_R3_NAMESPACE(polyvec_add) -void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); diff --git a/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.c b/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.c deleted file mode 100644 index 8434b96d76d..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.c +++ /dev/null @@ -1,227 +0,0 @@ -#include -#include -#include "kyber512r3_polyvec_avx2.h" -#include "kyber512r3_poly_avx2.h" -#include "kyber512r3_consts_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -static void poly_compress10(uint8_t r[320], const poly * restrict a) -{ - unsigned int i; - __m256i f0, f1, f2; - __m128i t0, t1; - const __m256i v = _mm256_load_si256(&qdata.vec[_16XV/16]); - const __m256i v8 = _mm256_slli_epi16(v,3); - const __m256i off = _mm256_set1_epi16(15); - const __m256i shift1 = _mm256_set1_epi16(1 << 12); - const __m256i mask = _mm256_set1_epi16(1023); - const __m256i shift2 = _mm256_set1_epi64x((1024LL << 48) + (1LL << 32) + (1024 << 16) + 1); - const __m256i sllvdidx = _mm256_set1_epi64x(12); - const __m256i shufbidx = _mm256_set_epi8( 8, 4, 3, 2, 1, 0,-1,-1,-1,-1,-1,-1,12,11,10, 9, - -1,-1,-1,-1,-1,-1,12,11,10, 9, 8, 4, 3, 2, 1, 0); - - for(i=0;ivec[i]); - f1 = _mm256_mullo_epi16(f0,v8); - f2 = _mm256_add_epi16(f0,off); - f0 = _mm256_slli_epi16(f0,3); - f0 = _mm256_mulhi_epi16(f0,v); - f2 = _mm256_sub_epi16(f1,f2); - f1 = _mm256_andnot_si256(f1,f2); - f1 = _mm256_srli_epi16(f1,15); - f0 = _mm256_sub_epi16(f0,f1); - f0 = _mm256_mulhrs_epi16(f0,shift1); - f0 = _mm256_and_si256(f0,mask); - f0 = _mm256_madd_epi16(f0,shift2); - f0 = _mm256_sllv_epi32(f0,sllvdidx); - f0 = _mm256_srli_epi64(f0,12); - f0 = _mm256_shuffle_epi8(f0,shufbidx); - t0 = _mm256_castsi256_si128(f0); - t1 = _mm256_extracti128_si256(f0,1); - t0 = _mm_blend_epi16(t0,t1,0xE0); - // correcting cast-align error - // old version: _mm_storeu_si128((__m128i *)&r[20*i+ 0],t0); - _mm_storeu_si128((void *)&r[20*i+ 0],t0); - memcpy(&r[20*i+16],&t1,4); - } -} - -static void poly_decompress10(poly * restrict r, const uint8_t a[320+12]) -{ - unsigned int i; - __m256i f; - const __m256i q = _mm256_set1_epi32((S2N_KYBER_512_R3_Q << 16) + 4*S2N_KYBER_512_R3_Q); - const __m256i shufbidx = _mm256_set_epi8(11,10,10, 9, 9, 8, 8, 7, - 6, 5, 5, 4, 4, 3, 3, 2, - 9, 8, 8, 7, 7, 6, 6, 5, - 4, 3, 3, 2, 2, 1, 1, 0); - const __m256i sllvdidx = _mm256_set1_epi64x(4); - const __m256i mask = _mm256_set1_epi32((32736 << 16) + 8184); - - for(i=0;ivec[i],f); - } -} - -/************************************************* -* Name: polyvec_compress_avx2 -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_compress_avx2(uint8_t r[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES+2], const polyvec *a) -{ - unsigned int i; - - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_decompress_avx2 -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress_avx2 -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const uint8_t *a: pointer to input byte array -* (of length S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void polyvec_decompress_avx2(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES+12]) -{ - unsigned int i; - - for(i=0;ivec[i],&a[320*i]); -} - -/************************************************* -* Name: polyvec_tobytes_avx2 -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array -* (needs space for S2N_KYBER_512_R3_POLYVECBYTES) -* - polyvec *a: pointer to input vector of polynomials -**************************************************/ -void polyvec_tobytes_avx2(uint8_t r[S2N_KYBER_512_R3_POLYVECBYTES], const polyvec *a) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_frombytes_avx2 -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes_avx2 -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials -* (of length S2N_KYBER_512_R3_POLYVECBYTES) -**************************************************/ -void polyvec_frombytes_avx2(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECBYTES]) -{ - unsigned int i; - for(i=0;ivec[i], a+i*S2N_KYBER_512_R3_POLYBYTES); -} - -/************************************************* -* Name: polyvec_ntt_avx2 -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_ntt_avx2(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_invntt_tomont_avx2 -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* and multiply by Montgomery factor 2^16 -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void polyvec_invntt_tomont_avx2(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_basemul_acc_montgomery_avx2 -* -* Description: Multiply elements in a and b in NTT domain, accumulate into r, -* and multiply by 2^-16. -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_basemul_acc_montgomery_avx2(poly *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - poly tmp; - - poly_basemul_montgomery_avx2(r,&a->vec[0],&b->vec[0]); - for(i=1;ivec[i],&b->vec[i]); - poly_add_avx2(r,r,&tmp); - } -} - -/************************************************* -* Name: polyvec_reduce_avx2 -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials; -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - polyvec *r: pointer to input/output polynomial -**************************************************/ -void polyvec_reduce_avx2(polyvec *r) -{ - unsigned int i; - for(i=0;ivec[i]); -} - -/************************************************* -* Name: polyvec_add_avx2 -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void polyvec_add_avx2(polyvec *r, const polyvec *a, const polyvec *b) -{ - unsigned int i; - for(i=0;ivec[i], &a->vec[i], &b->vec[i]); -} -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.h b/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.h deleted file mode 100644 index 536e1b23d0d..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_polyvec_avx2.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" -#include "kyber512r3_poly_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#define polyvec S2N_KYBER_512_R3_NAMESPACE(polyvec) -typedef struct{ - poly vec[S2N_KYBER_512_R3_K]; -} polyvec; - -#define polyvec_compress_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_compress_avx2) -void polyvec_compress_avx2(uint8_t r[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES+2], const polyvec *a); - -#define polyvec_decompress_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_decompress_avx2) -void polyvec_decompress_avx2(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECCOMPRESSEDBYTES+12]); - -#define polyvec_tobytes_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_tobytes_avx2) -void polyvec_tobytes_avx2(uint8_t r[S2N_KYBER_512_R3_POLYVECBYTES], const polyvec *a); - -#define polyvec_frombytes_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_frombytes_avx2) -void polyvec_frombytes_avx2(polyvec *r, const uint8_t a[S2N_KYBER_512_R3_POLYVECBYTES]); - -#define polyvec_ntt_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_ntt_avx2) -void polyvec_ntt_avx2(polyvec *r); - -#define polyvec_invntt_tomont_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_invntt_tomont_avx2) -void polyvec_invntt_tomont_avx2(polyvec *r); - -#define polyvec_basemul_acc_montgomery_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_basemul_acc_montgomery_avx2) -void polyvec_basemul_acc_montgomery_avx2(poly *r, const polyvec *a, const polyvec *b); - -#define polyvec_reduce_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_reduce_avx2) -void polyvec_reduce_avx2(polyvec *r); - -#define polyvec_add_avx2 S2N_KYBER_512_R3_NAMESPACE(polyvec_add_avx2) -void polyvec_add_avx2(polyvec *r, const polyvec *a, const polyvec *b); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_reduce.c b/pq-crypto/kyber_r3/kyber512r3_reduce.c deleted file mode 100644 index ed7099f7ae0..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_reduce.c +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include "kyber512r3_params.h" -#include "kyber512r3_reduce.h" - -S2N_ENSURE_PORTABLE_OPTIMIZATIONS - -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; -* has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t montgomery_reduce(int32_t a) { - int32_t t; - int16_t u; - - u = a * S2N_KYBER_512_R3_QINV; - t = (int32_t)u * S2N_KYBER_512_R3_Q; - t = a - t; - t >>= 16; - return t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t barrett_reduce(int16_t a) { - int16_t t; - const int16_t v = ((1U << 26) + S2N_KYBER_512_R3_Q / 2) / S2N_KYBER_512_R3_Q; - - t = (int32_t)v * a >> 26; - t *= S2N_KYBER_512_R3_Q; - return a - t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t x: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t csubq(int16_t a) { - a -= S2N_KYBER_512_R3_Q; - a += (a >> 15) & S2N_KYBER_512_R3_Q; - return a; -} diff --git a/pq-crypto/kyber_r3/kyber512r3_reduce.h b/pq-crypto/kyber_r3/kyber512r3_reduce.h deleted file mode 100644 index bab9fa54f91..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_reduce.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" - -#define S2N_KYBER_512_R3_QINV 62209 /* q^-1 mod 2^16 */ - -#define montgomery_reduce S2N_KYBER_512_R3_NAMESPACE(montgomery_reduce) -int16_t montgomery_reduce(int32_t a); - -#define barrett_reduce S2N_KYBER_512_R3_NAMESPACE(barrett_reduce) -int16_t barrett_reduce(int16_t a); - -#define csubq S2N_KYBER_512_R3_NAMESPACE(csubq) -int16_t csubq(int16_t x); diff --git a/pq-crypto/kyber_r3/kyber512r3_reduce_avx2.h b/pq-crypto/kyber_r3/kyber512r3_reduce_avx2.h deleted file mode 100644 index 24f0ede4e02..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_reduce_avx2.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "kyber512r3_params.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -#define reduce_avx2_asm S2N_KYBER_512_R3_NAMESPACE(reduce_avx2_asm) -void reduce_avx2_asm(__m256i *r, const __m256i *qdata); - -#define tomont_avx2_asm S2N_KYBER_512_R3_NAMESPACE(tomont_avx2_asm) -void tomont_avx2_asm(__m256i *r, const __m256i *qdata); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.c b/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.c deleted file mode 100644 index 1461e0b9b18..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.c +++ /dev/null @@ -1,420 +0,0 @@ -#include -#include -#include "kyber512r3_params.h" -#include "kyber512r3_consts_avx2.h" -#include "kyber512r3_rejsample_avx2.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#include - -//#define BMI - -#ifndef BMI -static const uint8_t idx[256][8] = { - {-1, -1, -1, -1, -1, -1, -1, -1}, - { 0, -1, -1, -1, -1, -1, -1, -1}, - { 2, -1, -1, -1, -1, -1, -1, -1}, - { 0, 2, -1, -1, -1, -1, -1, -1}, - { 4, -1, -1, -1, -1, -1, -1, -1}, - { 0, 4, -1, -1, -1, -1, -1, -1}, - { 2, 4, -1, -1, -1, -1, -1, -1}, - { 0, 2, 4, -1, -1, -1, -1, -1}, - { 6, -1, -1, -1, -1, -1, -1, -1}, - { 0, 6, -1, -1, -1, -1, -1, -1}, - { 2, 6, -1, -1, -1, -1, -1, -1}, - { 0, 2, 6, -1, -1, -1, -1, -1}, - { 4, 6, -1, -1, -1, -1, -1, -1}, - { 0, 4, 6, -1, -1, -1, -1, -1}, - { 2, 4, 6, -1, -1, -1, -1, -1}, - { 0, 2, 4, 6, -1, -1, -1, -1}, - { 8, -1, -1, -1, -1, -1, -1, -1}, - { 0, 8, -1, -1, -1, -1, -1, -1}, - { 2, 8, -1, -1, -1, -1, -1, -1}, - { 0, 2, 8, -1, -1, -1, -1, -1}, - { 4, 8, -1, -1, -1, -1, -1, -1}, - { 0, 4, 8, -1, -1, -1, -1, -1}, - { 2, 4, 8, -1, -1, -1, -1, -1}, - { 0, 2, 4, 8, -1, -1, -1, -1}, - { 6, 8, -1, -1, -1, -1, -1, -1}, - { 0, 6, 8, -1, -1, -1, -1, -1}, - { 2, 6, 8, -1, -1, -1, -1, -1}, - { 0, 2, 6, 8, -1, -1, -1, -1}, - { 4, 6, 8, -1, -1, -1, -1, -1}, - { 0, 4, 6, 8, -1, -1, -1, -1}, - { 2, 4, 6, 8, -1, -1, -1, -1}, - { 0, 2, 4, 6, 8, -1, -1, -1}, - {10, -1, -1, -1, -1, -1, -1, -1}, - { 0, 10, -1, -1, -1, -1, -1, -1}, - { 2, 10, -1, -1, -1, -1, -1, -1}, - { 0, 2, 10, -1, -1, -1, -1, -1}, - { 4, 10, -1, -1, -1, -1, -1, -1}, - { 0, 4, 10, -1, -1, -1, -1, -1}, - { 2, 4, 10, -1, -1, -1, -1, -1}, - { 0, 2, 4, 10, -1, -1, -1, -1}, - { 6, 10, -1, -1, -1, -1, -1, -1}, - { 0, 6, 10, -1, -1, -1, -1, -1}, - { 2, 6, 10, -1, -1, -1, -1, -1}, - { 0, 2, 6, 10, -1, -1, -1, -1}, - { 4, 6, 10, -1, -1, -1, -1, -1}, - { 0, 4, 6, 10, -1, -1, -1, -1}, - { 2, 4, 6, 10, -1, -1, -1, -1}, - { 0, 2, 4, 6, 10, -1, -1, -1}, - { 8, 10, -1, -1, -1, -1, -1, -1}, - { 0, 8, 10, -1, -1, -1, -1, -1}, - { 2, 8, 10, -1, -1, -1, -1, -1}, - { 0, 2, 8, 10, -1, -1, -1, -1}, - { 4, 8, 10, -1, -1, -1, -1, -1}, - { 0, 4, 8, 10, -1, -1, -1, -1}, - { 2, 4, 8, 10, -1, -1, -1, -1}, - { 0, 2, 4, 8, 10, -1, -1, -1}, - { 6, 8, 10, -1, -1, -1, -1, -1}, - { 0, 6, 8, 10, -1, -1, -1, -1}, - { 2, 6, 8, 10, -1, -1, -1, -1}, - { 0, 2, 6, 8, 10, -1, -1, -1}, - { 4, 6, 8, 10, -1, -1, -1, -1}, - { 0, 4, 6, 8, 10, -1, -1, -1}, - { 2, 4, 6, 8, 10, -1, -1, -1}, - { 0, 2, 4, 6, 8, 10, -1, -1}, - {12, -1, -1, -1, -1, -1, -1, -1}, - { 0, 12, -1, -1, -1, -1, -1, -1}, - { 2, 12, -1, -1, -1, -1, -1, -1}, - { 0, 2, 12, -1, -1, -1, -1, -1}, - { 4, 12, -1, -1, -1, -1, -1, -1}, - { 0, 4, 12, -1, -1, -1, -1, -1}, - { 2, 4, 12, -1, -1, -1, -1, -1}, - { 0, 2, 4, 12, -1, -1, -1, -1}, - { 6, 12, -1, -1, -1, -1, -1, -1}, - { 0, 6, 12, -1, -1, -1, -1, -1}, - { 2, 6, 12, -1, -1, -1, -1, -1}, - { 0, 2, 6, 12, -1, -1, -1, -1}, - { 4, 6, 12, -1, -1, -1, -1, -1}, - { 0, 4, 6, 12, -1, -1, -1, -1}, - { 2, 4, 6, 12, -1, -1, -1, -1}, - { 0, 2, 4, 6, 12, -1, -1, -1}, - { 8, 12, -1, -1, -1, -1, -1, -1}, - { 0, 8, 12, -1, -1, -1, -1, -1}, - { 2, 8, 12, -1, -1, -1, -1, -1}, - { 0, 2, 8, 12, -1, -1, -1, -1}, - { 4, 8, 12, -1, -1, -1, -1, -1}, - { 0, 4, 8, 12, -1, -1, -1, -1}, - { 2, 4, 8, 12, -1, -1, -1, -1}, - { 0, 2, 4, 8, 12, -1, -1, -1}, - { 6, 8, 12, -1, -1, -1, -1, -1}, - { 0, 6, 8, 12, -1, -1, -1, -1}, - { 2, 6, 8, 12, -1, -1, -1, -1}, - { 0, 2, 6, 8, 12, -1, -1, -1}, - { 4, 6, 8, 12, -1, -1, -1, -1}, - { 0, 4, 6, 8, 12, -1, -1, -1}, - { 2, 4, 6, 8, 12, -1, -1, -1}, - { 0, 2, 4, 6, 8, 12, -1, -1}, - {10, 12, -1, -1, -1, -1, -1, -1}, - { 0, 10, 12, -1, -1, -1, -1, -1}, - { 2, 10, 12, -1, -1, -1, -1, -1}, - { 0, 2, 10, 12, -1, -1, -1, -1}, - { 4, 10, 12, -1, -1, -1, -1, -1}, - { 0, 4, 10, 12, -1, -1, -1, -1}, - { 2, 4, 10, 12, -1, -1, -1, -1}, - { 0, 2, 4, 10, 12, -1, -1, -1}, - { 6, 10, 12, -1, -1, -1, -1, -1}, - { 0, 6, 10, 12, -1, -1, -1, -1}, - { 2, 6, 10, 12, -1, -1, -1, -1}, - { 0, 2, 6, 10, 12, -1, -1, -1}, - { 4, 6, 10, 12, -1, -1, -1, -1}, - { 0, 4, 6, 10, 12, -1, -1, -1}, - { 2, 4, 6, 10, 12, -1, -1, -1}, - { 0, 2, 4, 6, 10, 12, -1, -1}, - { 8, 10, 12, -1, -1, -1, -1, -1}, - { 0, 8, 10, 12, -1, -1, -1, -1}, - { 2, 8, 10, 12, -1, -1, -1, -1}, - { 0, 2, 8, 10, 12, -1, -1, -1}, - { 4, 8, 10, 12, -1, -1, -1, -1}, - { 0, 4, 8, 10, 12, -1, -1, -1}, - { 2, 4, 8, 10, 12, -1, -1, -1}, - { 0, 2, 4, 8, 10, 12, -1, -1}, - { 6, 8, 10, 12, -1, -1, -1, -1}, - { 0, 6, 8, 10, 12, -1, -1, -1}, - { 2, 6, 8, 10, 12, -1, -1, -1}, - { 0, 2, 6, 8, 10, 12, -1, -1}, - { 4, 6, 8, 10, 12, -1, -1, -1}, - { 0, 4, 6, 8, 10, 12, -1, -1}, - { 2, 4, 6, 8, 10, 12, -1, -1}, - { 0, 2, 4, 6, 8, 10, 12, -1}, - {14, -1, -1, -1, -1, -1, -1, -1}, - { 0, 14, -1, -1, -1, -1, -1, -1}, - { 2, 14, -1, -1, -1, -1, -1, -1}, - { 0, 2, 14, -1, -1, -1, -1, -1}, - { 4, 14, -1, -1, -1, -1, -1, -1}, - { 0, 4, 14, -1, -1, -1, -1, -1}, - { 2, 4, 14, -1, -1, -1, -1, -1}, - { 0, 2, 4, 14, -1, -1, -1, -1}, - { 6, 14, -1, -1, -1, -1, -1, -1}, - { 0, 6, 14, -1, -1, -1, -1, -1}, - { 2, 6, 14, -1, -1, -1, -1, -1}, - { 0, 2, 6, 14, -1, -1, -1, -1}, - { 4, 6, 14, -1, -1, -1, -1, -1}, - { 0, 4, 6, 14, -1, -1, -1, -1}, - { 2, 4, 6, 14, -1, -1, -1, -1}, - { 0, 2, 4, 6, 14, -1, -1, -1}, - { 8, 14, -1, -1, -1, -1, -1, -1}, - { 0, 8, 14, -1, -1, -1, -1, -1}, - { 2, 8, 14, -1, -1, -1, -1, -1}, - { 0, 2, 8, 14, -1, -1, -1, -1}, - { 4, 8, 14, -1, -1, -1, -1, -1}, - { 0, 4, 8, 14, -1, -1, -1, -1}, - { 2, 4, 8, 14, -1, -1, -1, -1}, - { 0, 2, 4, 8, 14, -1, -1, -1}, - { 6, 8, 14, -1, -1, -1, -1, -1}, - { 0, 6, 8, 14, -1, -1, -1, -1}, - { 2, 6, 8, 14, -1, -1, -1, -1}, - { 0, 2, 6, 8, 14, -1, -1, -1}, - { 4, 6, 8, 14, -1, -1, -1, -1}, - { 0, 4, 6, 8, 14, -1, -1, -1}, - { 2, 4, 6, 8, 14, -1, -1, -1}, - { 0, 2, 4, 6, 8, 14, -1, -1}, - {10, 14, -1, -1, -1, -1, -1, -1}, - { 0, 10, 14, -1, -1, -1, -1, -1}, - { 2, 10, 14, -1, -1, -1, -1, -1}, - { 0, 2, 10, 14, -1, -1, -1, -1}, - { 4, 10, 14, -1, -1, -1, -1, -1}, - { 0, 4, 10, 14, -1, -1, -1, -1}, - { 2, 4, 10, 14, -1, -1, -1, -1}, - { 0, 2, 4, 10, 14, -1, -1, -1}, - { 6, 10, 14, -1, -1, -1, -1, -1}, - { 0, 6, 10, 14, -1, -1, -1, -1}, - { 2, 6, 10, 14, -1, -1, -1, -1}, - { 0, 2, 6, 10, 14, -1, -1, -1}, - { 4, 6, 10, 14, -1, -1, -1, -1}, - { 0, 4, 6, 10, 14, -1, -1, -1}, - { 2, 4, 6, 10, 14, -1, -1, -1}, - { 0, 2, 4, 6, 10, 14, -1, -1}, - { 8, 10, 14, -1, -1, -1, -1, -1}, - { 0, 8, 10, 14, -1, -1, -1, -1}, - { 2, 8, 10, 14, -1, -1, -1, -1}, - { 0, 2, 8, 10, 14, -1, -1, -1}, - { 4, 8, 10, 14, -1, -1, -1, -1}, - { 0, 4, 8, 10, 14, -1, -1, -1}, - { 2, 4, 8, 10, 14, -1, -1, -1}, - { 0, 2, 4, 8, 10, 14, -1, -1}, - { 6, 8, 10, 14, -1, -1, -1, -1}, - { 0, 6, 8, 10, 14, -1, -1, -1}, - { 2, 6, 8, 10, 14, -1, -1, -1}, - { 0, 2, 6, 8, 10, 14, -1, -1}, - { 4, 6, 8, 10, 14, -1, -1, -1}, - { 0, 4, 6, 8, 10, 14, -1, -1}, - { 2, 4, 6, 8, 10, 14, -1, -1}, - { 0, 2, 4, 6, 8, 10, 14, -1}, - {12, 14, -1, -1, -1, -1, -1, -1}, - { 0, 12, 14, -1, -1, -1, -1, -1}, - { 2, 12, 14, -1, -1, -1, -1, -1}, - { 0, 2, 12, 14, -1, -1, -1, -1}, - { 4, 12, 14, -1, -1, -1, -1, -1}, - { 0, 4, 12, 14, -1, -1, -1, -1}, - { 2, 4, 12, 14, -1, -1, -1, -1}, - { 0, 2, 4, 12, 14, -1, -1, -1}, - { 6, 12, 14, -1, -1, -1, -1, -1}, - { 0, 6, 12, 14, -1, -1, -1, -1}, - { 2, 6, 12, 14, -1, -1, -1, -1}, - { 0, 2, 6, 12, 14, -1, -1, -1}, - { 4, 6, 12, 14, -1, -1, -1, -1}, - { 0, 4, 6, 12, 14, -1, -1, -1}, - { 2, 4, 6, 12, 14, -1, -1, -1}, - { 0, 2, 4, 6, 12, 14, -1, -1}, - { 8, 12, 14, -1, -1, -1, -1, -1}, - { 0, 8, 12, 14, -1, -1, -1, -1}, - { 2, 8, 12, 14, -1, -1, -1, -1}, - { 0, 2, 8, 12, 14, -1, -1, -1}, - { 4, 8, 12, 14, -1, -1, -1, -1}, - { 0, 4, 8, 12, 14, -1, -1, -1}, - { 2, 4, 8, 12, 14, -1, -1, -1}, - { 0, 2, 4, 8, 12, 14, -1, -1}, - { 6, 8, 12, 14, -1, -1, -1, -1}, - { 0, 6, 8, 12, 14, -1, -1, -1}, - { 2, 6, 8, 12, 14, -1, -1, -1}, - { 0, 2, 6, 8, 12, 14, -1, -1}, - { 4, 6, 8, 12, 14, -1, -1, -1}, - { 0, 4, 6, 8, 12, 14, -1, -1}, - { 2, 4, 6, 8, 12, 14, -1, -1}, - { 0, 2, 4, 6, 8, 12, 14, -1}, - {10, 12, 14, -1, -1, -1, -1, -1}, - { 0, 10, 12, 14, -1, -1, -1, -1}, - { 2, 10, 12, 14, -1, -1, -1, -1}, - { 0, 2, 10, 12, 14, -1, -1, -1}, - { 4, 10, 12, 14, -1, -1, -1, -1}, - { 0, 4, 10, 12, 14, -1, -1, -1}, - { 2, 4, 10, 12, 14, -1, -1, -1}, - { 0, 2, 4, 10, 12, 14, -1, -1}, - { 6, 10, 12, 14, -1, -1, -1, -1}, - { 0, 6, 10, 12, 14, -1, -1, -1}, - { 2, 6, 10, 12, 14, -1, -1, -1}, - { 0, 2, 6, 10, 12, 14, -1, -1}, - { 4, 6, 10, 12, 14, -1, -1, -1}, - { 0, 4, 6, 10, 12, 14, -1, -1}, - { 2, 4, 6, 10, 12, 14, -1, -1}, - { 0, 2, 4, 6, 10, 12, 14, -1}, - { 8, 10, 12, 14, -1, -1, -1, -1}, - { 0, 8, 10, 12, 14, -1, -1, -1}, - { 2, 8, 10, 12, 14, -1, -1, -1}, - { 0, 2, 8, 10, 12, 14, -1, -1}, - { 4, 8, 10, 12, 14, -1, -1, -1}, - { 0, 4, 8, 10, 12, 14, -1, -1}, - { 2, 4, 8, 10, 12, 14, -1, -1}, - { 0, 2, 4, 8, 10, 12, 14, -1}, - { 6, 8, 10, 12, 14, -1, -1, -1}, - { 0, 6, 8, 10, 12, 14, -1, -1}, - { 2, 6, 8, 10, 12, 14, -1, -1}, - { 0, 2, 6, 8, 10, 12, 14, -1}, - { 4, 6, 8, 10, 12, 14, -1, -1}, - { 0, 4, 6, 8, 10, 12, 14, -1}, - { 2, 4, 6, 8, 10, 12, 14, -1}, - { 0, 2, 4, 6, 8, 10, 12, 14} -}; -#endif - -#define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a) -#define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a) - -unsigned int rej_uniform_avx2(int16_t * restrict r, const uint8_t *buf) -{ - unsigned int ctr, pos; - uint16_t val0, val1; - uint32_t good; -#ifdef BMI - uint64_t idx0, idx1, idx2, idx3; -#endif - const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ/16]); - const __m256i ones = _mm256_set1_epi8(1); - const __m256i mask = _mm256_set1_epi16(0xFFF); - const __m256i idx8 = _mm256_set_epi8(15,14,14,13,12,11,11,10, - 9, 8, 8, 7, 6, 5, 5, 4, - 11,10,10, 9, 8, 7, 7, 6, - 5, 4, 4, 3, 2, 1, 1, 0); - __m256i f0, f1, g0, g1, g2, g3; - __m128i f, t, pilo, pihi; - - ctr = pos = 0; - while(ctr <= S2N_KYBER_512_R3_N - 32 && pos <= S2N_KYBER_512_R3_REJ_UNIFORM_AVX_BUFLEN - 48) { - // correcting cast-align and cast-qual errors - // old version: f0 = _mm256_loadu_si256((__m256i *)&buf[pos]); - f0 = _mm256_loadu_si256((const void *)&buf[pos]); - // old version: f1 = _mm256_loadu_si256((__m256i *)&buf[pos+24]); - f1 = _mm256_loadu_si256((const void *)&buf[pos+24]); - f0 = _mm256_permute4x64_epi64(f0, 0x94); - f1 = _mm256_permute4x64_epi64(f1, 0x94); - f0 = _mm256_shuffle_epi8(f0, idx8); - f1 = _mm256_shuffle_epi8(f1, idx8); - g0 = _mm256_srli_epi16(f0, 4); - g1 = _mm256_srli_epi16(f1, 4); - f0 = _mm256_blend_epi16(f0, g0, 0xAA); - f1 = _mm256_blend_epi16(f1, g1, 0xAA); - f0 = _mm256_and_si256(f0, mask); - f1 = _mm256_and_si256(f1, mask); - pos += 48; - - g0 = _mm256_cmpgt_epi16(bound, f0); - g1 = _mm256_cmpgt_epi16(bound, f1); - - g0 = _mm256_packs_epi16(g0, g1); - good = _mm256_movemask_epi8(g0); - -#ifdef BMI - idx0 = _pdep_u64(good >> 0, 0x0101010101010101); - idx1 = _pdep_u64(good >> 8, 0x0101010101010101); - idx2 = _pdep_u64(good >> 16, 0x0101010101010101); - idx3 = _pdep_u64(good >> 24, 0x0101010101010101); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - idx1 = (idx1 << 8) - idx1; - idx1 = _pext_u64(0x0E0C0A0806040200, idx1); - idx2 = (idx2 << 8) - idx2; - idx2 = _pext_u64(0x0E0C0A0806040200, idx2); - idx3 = (idx3 << 8) - idx3; - idx3 = _pext_u64(0x0E0C0A0806040200, idx3); - - g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0)); - g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1)); - g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1); - g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1); -#else - // correcting cast-align and cast-qual errors - // old version: g0 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF])); - g0 = _mm256_castsi128_si256(_mm_loadl_epi64((const void *)&idx[(good >> 0) & 0xFF])); - // old version: g1 = _mm256_castsi128_si256(_mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF])); - g1 = _mm256_castsi128_si256(_mm_loadl_epi64((const void *)&idx[(good >> 8) & 0xFF])); - // old version: g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1); - g0 = _mm256_inserti128_si256(g0, _mm_loadl_epi64((const void *)&idx[(good >> 16) & 0xFF]), 1); - // old version: g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1); - g1 = _mm256_inserti128_si256(g1, _mm_loadl_epi64((const void *)&idx[(good >> 24) & 0xFF]), 1); -#endif - - g2 = _mm256_add_epi8(g0, ones); - g3 = _mm256_add_epi8(g1, ones); - g0 = _mm256_unpacklo_epi8(g0, g2); - g1 = _mm256_unpacklo_epi8(g1, g3); - - f0 = _mm256_shuffle_epi8(f0, g0); - f1 = _mm256_shuffle_epi8(f1, g1); - - // correcting cast-align errors - // old version: _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f0)); - _mm_storeu_si128((void *)&r[ctr], _mm256_castsi256_si128(f0)); - ctr += _mm_popcnt_u32((good >> 0) & 0xFF); - // old version: _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f0, 1)); - _mm_storeu_si128((void *)&r[ctr], _mm256_extracti128_si256(f0, 1)); - ctr += _mm_popcnt_u32((good >> 16) & 0xFF); - // old version: _mm_storeu_si128((__m128i *)&r[ctr], _mm256_castsi256_si128(f1)); - _mm_storeu_si128((void *)&r[ctr], _mm256_castsi256_si128(f1)); - ctr += _mm_popcnt_u32((good >> 8) & 0xFF); - // old version: _mm_storeu_si128((__m128i *)&r[ctr], _mm256_extracti128_si256(f1, 1)); - _mm_storeu_si128((void *)&r[ctr], _mm256_extracti128_si256(f1, 1)); - ctr += _mm_popcnt_u32((good >> 24) & 0xFF); - } - - while(ctr <= S2N_KYBER_512_R3_N - 8 && pos <= S2N_KYBER_512_R3_REJ_UNIFORM_AVX_BUFLEN - 12) { - // correcting cast-align and cast-qual errors - // old version: f = _mm_loadu_si128((__m128i *)&buf[pos]); - f = _mm_loadu_si128((const void *)&buf[pos]); - f = _mm_shuffle_epi8(f, _mm256_castsi256_si128(idx8)); - t = _mm_srli_epi16(f, 4); - f = _mm_blend_epi16(f, t, 0xAA); - f = _mm_and_si128(f, _mm256_castsi256_si128(mask)); - pos += 12; - - t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f); - good = _mm_movemask_epi8(t); - -#ifdef BMI - good &= 0x5555; - idx0 = _pdep_u64(good, 0x1111111111111111); - idx0 = (idx0 << 8) - idx0; - idx0 = _pext_u64(0x0E0C0A0806040200, idx0); - pilo = _mm_cvtsi64_si128(idx0); -#else - good = _pext_u32(good, 0x5555); - // correcting cast-align and cast-qual errors - // old version: pilo = _mm_loadl_epi64((__m128i *)&idx[good]); - pilo = _mm_loadl_epi64((const void *)&idx[good]); -#endif - - pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones)); - pilo = _mm_unpacklo_epi8(pilo, pihi); - f = _mm_shuffle_epi8(f, pilo); - // correcting cast-align error - // old version: _mm_storeu_si128((__m128i *)&r[ctr], f); - _mm_storeu_si128((void *)&r[ctr], f); - ctr += _mm_popcnt_u32(good); - } - - while(ctr < S2N_KYBER_512_R3_N && pos <= S2N_KYBER_512_R3_REJ_UNIFORM_AVX_BUFLEN - 3) { - val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF; - val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)); - pos += 3; - - if(val0 < S2N_KYBER_512_R3_Q) - r[ctr++] = val0; - if(val1 < S2N_KYBER_512_R3_Q && ctr < S2N_KYBER_512_R3_N) - r[ctr++] = val1; - } - - return ctr; -} -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.h b/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.h deleted file mode 100644 index bd8a9704645..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_rejsample_avx2.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include -#include "kyber512r3_params.h" -#include "kyber512r3_fips202.h" - -#if defined(S2N_KYBER512R3_AVX2_BMI2) -#define S2N_KYBER_512_R3_XOF_BLOCKBYTES S2N_KYBER_512_R3_SHAKE128_RATE -#define S2N_KYBER_512_R3_REJ_UNIFORM_AVX_NBLOCKS ((12*S2N_KYBER_512_R3_N/8*(1 << 12)/S2N_KYBER_512_R3_Q + S2N_KYBER_512_R3_XOF_BLOCKBYTES)/S2N_KYBER_512_R3_XOF_BLOCKBYTES) -#define S2N_KYBER_512_R3_REJ_UNIFORM_AVX_BUFLEN (S2N_KYBER_512_R3_REJ_UNIFORM_AVX_NBLOCKS*S2N_KYBER_512_R3_XOF_BLOCKBYTES) - -#define rej_uniform_avx2 S2N_KYBER_512_R3_NAMESPACE(rej_uniform_avx2) -unsigned int rej_uniform_avx2(int16_t *r, const uint8_t *buf); -#endif diff --git a/pq-crypto/kyber_r3/kyber512r3_shuffle_avx2.S b/pq-crypto/kyber_r3/kyber512r3_shuffle_avx2.S deleted file mode 100644 index ce7200e5ca5..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_shuffle_avx2.S +++ /dev/null @@ -1,272 +0,0 @@ -#include "kyber512r3_consts_avx2.h" - -// The small macros (.inc files) are combined with .S files directly -/*****.include "fq.inc"*****/ -/***************************/ -.macro red16 r,rs=0,x=12 -vpmulhw %ymm1,%ymm\r,%ymm\x -.if \rs -vpmulhrsw %ymm\rs,%ymm\x,%ymm\x -.else -vpsraw $10,%ymm\x,%ymm\x -.endif -vpmullw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro csubq r,x=12 -vpsubw %ymm0,%ymm\r,%ymm\r -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro caddq r,x=12 -vpsraw $15,%ymm\r,%ymm\x -vpand %ymm0,%ymm\x,%ymm\x -vpaddw %ymm\x,%ymm\r,%ymm\r -.endm - -.macro fqmulprecomp al,ah,b,x=12 -vpmullw %ymm\al,%ymm\b,%ymm\x -vpmulhw %ymm\ah,%ymm\b,%ymm\b -vpmulhw %ymm0,%ymm\x,%ymm\x -vpsubw %ymm\x,%ymm\b,%ymm\b -.endm -/***************************/ - -/*****.include "shuffle.inc"*****/ -/********************************/ -.macro shuffle8 r0,r1,r2,r3 -vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 -vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle4 r0,r1,r2,r3 -vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 -vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle2 r0,r1,r2,r3 -#vpsllq $32,%ymm\r1,%ymm\r2 -vmovsldup %ymm\r1,%ymm\r2 -vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrlq $32,%ymm\r0,%ymm\r0 -#vmovshdup %ymm\r0,%ymm\r0 -vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm - -.macro shuffle1 r0,r1,r2,r3 -vpslld $16,%ymm\r1,%ymm\r2 -vpblendw $0xAA,%ymm\r2,%ymm\r0,%ymm\r2 -vpsrld $16,%ymm\r0,%ymm\r0 -vpblendw $0xAA,%ymm\r1,%ymm\r0,%ymm\r3 -.endm -/********************************/ - -.text -nttunpack128_avx: -#load -vmovdqa (%rdi),%ymm4 -vmovdqa 32(%rdi),%ymm5 -vmovdqa 64(%rdi),%ymm6 -vmovdqa 96(%rdi),%ymm7 -vmovdqa 128(%rdi),%ymm8 -vmovdqa 160(%rdi),%ymm9 -vmovdqa 192(%rdi),%ymm10 -vmovdqa 224(%rdi),%ymm11 - -shuffle8 4,8,3,8 -shuffle8 5,9,4,9 -shuffle8 6,10,5,10 -shuffle8 7,11,6,11 - -shuffle4 3,5,7,5 -shuffle4 8,10,3,10 -shuffle4 4,6,8,6 -shuffle4 9,11,4,11 - -shuffle2 7,8,9,8 -shuffle2 5,6,7,6 -shuffle2 3,4,5,4 -shuffle2 10,11,3,11 - -shuffle1 9,5,10,5 -shuffle1 8,4,9,4 -shuffle1 7,3,8,3 -shuffle1 6,11,7,11 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm5,32(%rdi) -vmovdqa %ymm9,64(%rdi) -vmovdqa %ymm4,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm3,160(%rdi) -vmovdqa %ymm7,192(%rdi) -vmovdqa %ymm11,224(%rdi) - -ret - -.global cdecl(nttunpack_avx2_asm) -cdecl(nttunpack_avx2_asm): -call nttunpack128_avx -add $256,%rdi -call nttunpack128_avx -ret - -ntttobytes128_avx: -#load -vmovdqa (%rsi),%ymm5 -vmovdqa 32(%rsi),%ymm6 -vmovdqa 64(%rsi),%ymm7 -vmovdqa 96(%rsi),%ymm8 -vmovdqa 128(%rsi),%ymm9 -vmovdqa 160(%rsi),%ymm10 -vmovdqa 192(%rsi),%ymm11 -vmovdqa 224(%rsi),%ymm12 - -#csubq -csubq 5,13 -csubq 6,13 -csubq 7,13 -csubq 8,13 -csubq 9,13 -csubq 10,13 -csubq 11,13 -csubq 12,13 - -#bitpack -vpsllw $12,%ymm6,%ymm4 -vpor %ymm4,%ymm5,%ymm4 - -vpsrlw $4,%ymm6,%ymm5 -vpsllw $8,%ymm7,%ymm6 -vpor %ymm5,%ymm6,%ymm5 - -vpsrlw $8,%ymm7,%ymm6 -vpsllw $4,%ymm8,%ymm7 -vpor %ymm6,%ymm7,%ymm6 - -vpsllw $12,%ymm10,%ymm7 -vpor %ymm7,%ymm9,%ymm7 - -vpsrlw $4,%ymm10,%ymm8 -vpsllw $8,%ymm11,%ymm9 -vpor %ymm8,%ymm9,%ymm8 - -vpsrlw $8,%ymm11,%ymm9 -vpsllw $4,%ymm12,%ymm10 -vpor %ymm9,%ymm10,%ymm9 - -shuffle1 4,5,3,5 -shuffle1 6,7,4,7 -shuffle1 8,9,6,9 - -shuffle2 3,4,8,4 -shuffle2 6,5,3,5 -shuffle2 7,9,6,9 - -shuffle4 8,3,7,3 -shuffle4 6,4,8,4 -shuffle4 5,9,6,9 - -shuffle8 7,8,5,8 -shuffle8 6,3,7,3 -shuffle8 4,9,6,9 - -#store -vmovdqu %ymm5,(%rdi) -vmovdqu %ymm7,32(%rdi) -vmovdqu %ymm6,64(%rdi) -vmovdqu %ymm8,96(%rdi) -vmovdqu %ymm3,128(%rdi) -vmovdqu %ymm9,160(%rdi) - -ret - -.global cdecl(ntttobytes_avx2_asm) -cdecl(ntttobytes_avx2_asm): -#consts -vmovdqa _16XQ*2(%rdx),%ymm0 -call ntttobytes128_avx -add $256,%rsi -add $192,%rdi -call ntttobytes128_avx -ret - -nttfrombytes128_avx: -#load -vmovdqu (%rsi),%ymm4 -vmovdqu 32(%rsi),%ymm5 -vmovdqu 64(%rsi),%ymm6 -vmovdqu 96(%rsi),%ymm7 -vmovdqu 128(%rsi),%ymm8 -vmovdqu 160(%rsi),%ymm9 - -shuffle8 4,7,3,7 -shuffle8 5,8,4,8 -shuffle8 6,9,5,9 - -shuffle4 3,8,6,8 -shuffle4 7,5,3,5 -shuffle4 4,9,7,9 - -shuffle2 6,5,4,5 -shuffle2 8,7,6,7 -shuffle2 3,9,8,9 - -shuffle1 4,7,10,7 -shuffle1 5,8,4,8 -shuffle1 6,9,5,9 - -#bitunpack -vpsrlw $12,%ymm10,%ymm11 -vpsllw $4,%ymm7,%ymm12 -vpor %ymm11,%ymm12,%ymm11 -vpand %ymm0,%ymm10,%ymm10 -vpand %ymm0,%ymm11,%ymm11 - -vpsrlw $8,%ymm7,%ymm12 -vpsllw $8,%ymm4,%ymm13 -vpor %ymm12,%ymm13,%ymm12 -vpand %ymm0,%ymm12,%ymm12 - -vpsrlw $4,%ymm4,%ymm13 -vpand %ymm0,%ymm13,%ymm13 - -vpsrlw $12,%ymm8,%ymm14 -vpsllw $4,%ymm5,%ymm15 -vpor %ymm14,%ymm15,%ymm14 -vpand %ymm0,%ymm8,%ymm8 -vpand %ymm0,%ymm14,%ymm14 - -vpsrlw $8,%ymm5,%ymm15 -vpsllw $8,%ymm9,%ymm1 -vpor %ymm15,%ymm1,%ymm15 -vpand %ymm0,%ymm15,%ymm15 - -vpsrlw $4,%ymm9,%ymm1 -vpand %ymm0,%ymm1,%ymm1 - -#store -vmovdqa %ymm10,(%rdi) -vmovdqa %ymm11,32(%rdi) -vmovdqa %ymm12,64(%rdi) -vmovdqa %ymm13,96(%rdi) -vmovdqa %ymm8,128(%rdi) -vmovdqa %ymm14,160(%rdi) -vmovdqa %ymm15,192(%rdi) -vmovdqa %ymm1,224(%rdi) - -ret - -.global cdecl(nttfrombytes_avx2_asm) -cdecl(nttfrombytes_avx2_asm): -#consts -vmovdqa _16XMASK*2(%rdx),%ymm0 -call nttfrombytes128_avx -add $256,%rdi -add $192,%rsi -call nttfrombytes128_avx -ret diff --git a/pq-crypto/kyber_r3/kyber512r3_symmetric-shake.c b/pq-crypto/kyber_r3/kyber512r3_symmetric-shake.c deleted file mode 100644 index 390a2a4e381..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_symmetric-shake.c +++ /dev/null @@ -1,49 +0,0 @@ -#include "kyber512r3_params.h" -#include "kyber512r3_fips202.h" -#include "kyber512r3_symmetric.h" -#include - -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. - -* Arguments: - keccak_state *s: pointer to (uninitialized) output Keccak state -* - const uint8_t *input: pointer to S2N_KYBER_512_R3_SYMBYTES input to be absorbed into s -* - uint8_t i additional byte of input -* - uint8_t j additional byte of input -**************************************************/ -void kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y) { - size_t i; - uint8_t extseed[S2N_KYBER_512_R3_SYMBYTES + 2]; - - for (i = 0; i < S2N_KYBER_512_R3_SYMBYTES; i++) { - extseed[i] = input[i]; - } - extseed[i++] = x; - extseed[i] = y; - shake128_absorb(s, extseed, S2N_KYBER_512_R3_SYMBYTES + 2); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *output: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t * key: pointer to the key (of length S2N_KYBER_512_R3_SYMBYTES) -* - uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce) { - uint8_t extkey[S2N_KYBER_512_R3_SYMBYTES + 1]; - size_t i; - - for (i = 0; i < S2N_KYBER_512_R3_SYMBYTES; i++) { - extkey[i] = key[i]; - } - extkey[i] = nonce; - - shake256(output, outlen, extkey, S2N_KYBER_512_R3_SYMBYTES + 1); -} diff --git a/pq-crypto/kyber_r3/kyber512r3_symmetric.h b/pq-crypto/kyber_r3/kyber512r3_symmetric.h deleted file mode 100644 index e898a294504..00000000000 --- a/pq-crypto/kyber_r3/kyber512r3_symmetric.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include "kyber512r3_params.h" -#include "kyber512r3_fips202.h" -#include - -#define keccak_state S2N_KYBER_512_R3_NAMESPACE(keccak_state) -typedef shake128ctx keccak_state; - -#define xof_state S2N_KYBER_512_R3_NAMESPACE(xof_state) -typedef keccak_state xof_state; - -#define kyber_shake128_absorb S2N_KYBER_512_R3_NAMESPACE(kyber_shake128_absorb) -void kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y); - -#define shake256_prf S2N_KYBER_512_R3_NAMESPACE(shake256_prf) -void shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce); diff --git a/pq-crypto/s2n_pq.c b/pq-crypto/s2n_pq.c deleted file mode 100644 index 044994193af..00000000000 --- a/pq-crypto/s2n_pq.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#include "s2n_pq.h" - -#include "crypto/s2n_openssl.h" - -static bool kyber512r3_avx2_bmi2_enabled = false; - -#if defined(S2N_CPUID_AVAILABLE) - /* https://en.wikipedia.org/wiki/CPUID */ - #include - - #define PROCESSOR_INFO_AND_FEATURES 1 - #define EXTENDED_FEATURES_LEAF 7 - #define EXTENDED_FEATURES_SUBLEAF_ZERO 0 - - /* The cpuid.h header included with older versions of gcc and - * clang doesn't include definitions for bit_ADX, bit_BMI2, or - * __get_cpuid_count(). */ - #if !defined(bit_BMI2) - #define bit_BMI2 (1 << 8) - #endif - - #define EBX_BIT_AVX2 (1 << 5) - -bool s2n_get_cpuid_count(uint32_t leaf, uint32_t sub_leaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) -{ - /* 0x80000000 probes for extended cpuid info */ - uint32_t max_level = __get_cpuid_max(leaf & 0x80000000, 0); - - if (max_level == 0 || max_level < leaf) { - return false; - } - - __cpuid_count(leaf, sub_leaf, *eax, *ebx, *ecx, *edx); - return true; -} - -/* https://en.wikipedia.org/wiki/Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2) */ -bool s2n_cpu_supports_bmi2() -{ - uint32_t eax, ebx, ecx, edx; - if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { - return false; - } - - return (ebx & bit_BMI2); -} - -bool s2n_cpu_supports_avx2() -{ - uint32_t eax, ebx, ecx, edx; - if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { - return false; - } - - return (ebx & EBX_BIT_AVX2); -} - -bool s2n_cpu_supports_kyber512r3_avx2_bmi2() -{ - #if defined(S2N_KYBER512R3_AVX2_BMI2) - return s2n_cpu_supports_bmi2() && s2n_cpu_supports_avx2(); - #else - return false; - #endif -} - -#else /* defined(S2N_CPUID_AVAILABLE) */ - -/* If CPUID is not available, we cannot perform necessary run-time checks. */ -bool s2n_cpu_supports_kyber512r3_avx2_bmi2() -{ - return false; -} - -#endif /* defined(S2N_CPUID_AVAILABLE) */ - -bool s2n_kyber512r3_is_avx2_bmi2_enabled() -{ - return kyber512r3_avx2_bmi2_enabled; -} - -bool s2n_pq_is_enabled() -{ -#if defined(S2N_NO_PQ) - return false; -#else - /* TODO: Require s2n_libcrypto_is_awslc() when we remove s2n's Kyber512 implementation */ - return true; -#endif -} - -bool s2n_libcrypto_supports_kyber() -{ -#if defined(S2N_LIBCRYPTO_SUPPORTS_KYBER) - return true; -#else - return false; -#endif -} - -S2N_RESULT s2n_disable_kyber512r3_opt_avx2_bmi2() -{ - kyber512r3_avx2_bmi2_enabled = false; - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_try_enable_kyber512r3_opt_avx2_bmi2() -{ - if (s2n_pq_is_enabled() && s2n_cpu_supports_kyber512r3_avx2_bmi2()) { - kyber512r3_avx2_bmi2_enabled = true; - } - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_pq_init() -{ - RESULT_ENSURE_OK(s2n_try_enable_kyber512r3_opt_avx2_bmi2(), S2N_ERR_SAFETY); - - return S2N_RESULT_OK; -} diff --git a/pq-crypto/s2n_pq_asm.h b/pq-crypto/s2n_pq_asm.h deleted file mode 100644 index 9573242e8e5..00000000000 --- a/pq-crypto/s2n_pq_asm.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#pragma once - -#ifndef S2N_BLOCK_NONPORTABLE_OPTIMIZATIONS -#define S2N_BLOCK_NONPORTABLE_OPTIMIZATIONS 0 -#endif - -#define S2N_ANY_NONPORTABLE_OPTIMIZATIONS_ENABLED (__AVX__ || __AVX2__ || __BMI2__) - -#if S2N_BLOCK_NONPORTABLE_OPTIMIZATIONS && S2N_ANY_NONPORTABLE_OPTIMIZATIONS_ENABLED -#define S2N_ENSURE_PORTABLE_OPTIMIZATIONS \ - #error "Compiling portable code with non-portable assembly optimizations. This can result in runtime crashes if artifacts are deployed to older CPU's without these CPU instructions" -#else -#define S2N_ENSURE_PORTABLE_OPTIMIZATIONS -#endif diff --git a/pq-crypto/s2n_pq_asm.mk b/pq-crypto/s2n_pq_asm.mk deleted file mode 100644 index df4c00acc4e..00000000000 --- a/pq-crypto/s2n_pq_asm.mk +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://aws.amazon.com/apache2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. -# - -# To ensure CPU compatibility, try to compile all ASM code before including it in the build. -TRY_COMPILE_SIKEP434R3_ASM = -1 - -ifndef S2N_NO_PQ_ASM - # Kyber Round-3 code has several different optimizations - # which require specific compiler flags to be supported - # by the compiler. So for each needed instruction set - # extension we check if the compiler supports it and - # set proper flags to be added in the kyber_r3 Makefile. - - # Check if m256 intrinsics are supported by the compiler - m256_file := "$(S2N_ROOT)/tests/features/m256_intrinsics.c" - m256_file_out := "test_kyber512r3_m256_support.o" - KYBER512R3_M256_SUPPORTED := $(shell $(CC) -c -o $(m256_file_out) $(m256_file) > /dev/null 2>&1; echo $$?; rm $(m256_file_out) > /dev/null 2>&1) - - ifeq ($(KYBER512R3_M256_SUPPORTED), 0) - # Check if mavx2 and mbmi2 flags are supported by the compiler - dummy_file := "$(S2N_ROOT)/tests/features/noop_main.c" - dummy_file_out := "test_kyber512r3_avx2_bmi2_support.o" - KYBER512R3_AVX2_BMI2_SUPPORTED := $(shell $(CC) -mavx2 -mbmi2 -c -o $(dummy_file_out) $(dummy_file) > /dev/null 2>&1; echo $$?; rm $(dummy_file_out) > /dev/null 2>&1) - endif -endif diff --git a/pq-crypto/s2n_pq_random.c b/pq-crypto/s2n_pq_random.c deleted file mode 100644 index aa97630cf0f..00000000000 --- a/pq-crypto/s2n_pq_random.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#include "utils/s2n_random.h" -#include "utils/s2n_result.h" -#include "utils/s2n_safety.h" -#include "s2n_pq_random.h" - -static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes); - -static s2n_get_random_bytes_callback s2n_get_random_bytes_cb = s2n_get_random_bytes_default; - -S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes) { - RESULT_ENSURE_REF(buffer); - RESULT_GUARD(s2n_get_random_bytes_cb(buffer, num_bytes)); - - return S2N_RESULT_OK; -} - -static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes) { - struct s2n_blob out = { 0 }; - RESULT_GUARD_POSIX(s2n_blob_init(&out, buffer, num_bytes)); - RESULT_GUARD(s2n_get_private_random_data(&out)); - - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback) { - RESULT_ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST); - - s2n_get_random_bytes_cb = rand_bytes_callback; - - return S2N_RESULT_OK; -} diff --git a/s2n.mk b/s2n.mk index b384e795e75..70fdfa1b53e 100644 --- a/s2n.mk +++ b/s2n.mk @@ -89,11 +89,6 @@ ifdef S2N_TEST_IN_FIPS_MODE DEFAULT_CFLAGS += -DS2N_TEST_IN_FIPS_MODE endif -# Don't compile PQ related source code -ifdef S2N_NO_PQ - DEFAULT_CFLAGS += -DS2N_NO_PQ -endif - CFLAGS += ${DEFAULT_CFLAGS} ifdef GCC_VERSION diff --git a/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.c b/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.c deleted file mode 100644 index aba7ed42e87..00000000000 --- a/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -int main() { - return 0; -} diff --git a/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.flags b/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.flags deleted file mode 100644 index c72920f1597..00000000000 --- a/tests/features/S2N_KYBER512R3_AVX2_BMI2_SUPPORTED.flags +++ /dev/null @@ -1 +0,0 @@ --mavx2 -mavx -mbmi2 diff --git a/tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.c b/tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.c deleted file mode 100644 index 8dd1c5a3b80..00000000000 --- a/tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ -#include -#include - -#define ALIGNED_UINT8(N) \ - union { \ - uint8_t coeffs[N]; \ - __m256i vec[(N+31)/32]; \ - } - -int main() { - ALIGNED_UINT8(256) buf; - return 0; -} diff --git a/tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.flags b/tests/features/S2N_KYBER512R3_M256_INTRINSICS_SUPPORTED.flags deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/fuzz/allowed_coverage_failures.cfg b/tests/fuzz/allowed_coverage_failures.cfg new file mode 100644 index 00000000000..2b16c264d52 --- /dev/null +++ b/tests/fuzz/allowed_coverage_failures.cfg @@ -0,0 +1,5 @@ +# If s2n is compiled with a libcrypto that doesn't support Kyber, then Kyber fuzzing branch coverage will be near zero +# since the fuzz test will immediately return an error that Kyber isn't supported. This isn't a fuzz test failure, so +# allow the Kyber fuzz tests to have zero branch coverage. +s2n_kyber_r3_recv_ciphertext_fuzz_test +s2n_kyber_r3_recv_public_key_fuzz_test diff --git a/tests/fuzz/calcTotalCov.sh b/tests/fuzz/calcTotalCov.sh index 33ac4108042..f36255e9b47 100755 --- a/tests/fuzz/calcTotalCov.sh +++ b/tests/fuzz/calcTotalCov.sh @@ -28,7 +28,7 @@ if [[ -z "$S2N_ROOT" ]]; then S2N_ROOT=../.. fi -FUZZCOV_SOURCES="${S2N_ROOT}/api ${S2N_ROOT}/bin ${S2N_ROOT}/crypto ${S2N_ROOT}/error ${S2N_ROOT}/pq-crypto ${S2N_ROOT}/stuffer ${S2N_ROOT}/tls ${S2N_ROOT}/utils" +FUZZCOV_SOURCES="${S2N_ROOT}/api ${S2N_ROOT}/bin ${S2N_ROOT}/crypto ${S2N_ROOT}/error ${S2N_ROOT}/stuffer ${S2N_ROOT}/tls ${S2N_ROOT}/utils" # Outputs fuzz coverage results if the FUZZ_COVERAGE environment variable is set diff --git a/tests/fuzz/runFuzzTest.sh b/tests/fuzz/runFuzzTest.sh index f929f203a55..e6f11e12f0a 100755 --- a/tests/fuzz/runFuzzTest.sh +++ b/tests/fuzz/runFuzzTest.sh @@ -48,7 +48,7 @@ LIBFUZZER_ARGS+="-timeout=5 -max_len=4096 -print_final_stats=1 -jobs=${NUM_CPU_T TEST_SPECIFIC_OVERRIDES="${PWD}/LD_PRELOAD/${TEST_NAME}_overrides.so" GLOBAL_OVERRIDES="${PWD}/LD_PRELOAD/global_overrides.so" -FUZZCOV_SOURCES="${S2N_ROOT}/api ${S2N_ROOT}/bin ${S2N_ROOT}/crypto ${S2N_ROOT}/error ${S2N_ROOT}/pq-crypto ${S2N_ROOT}/stuffer ${S2N_ROOT}/tls ${S2N_ROOT}/utils" +FUZZCOV_SOURCES="${S2N_ROOT}/api ${S2N_ROOT}/bin ${S2N_ROOT}/crypto ${S2N_ROOT}/error ${S2N_ROOT}/stuffer ${S2N_ROOT}/tls ${S2N_ROOT}/utils" if [ -e $TEST_SPECIFIC_OVERRIDES ]; then @@ -197,7 +197,13 @@ then printf "\033[33;1mWARNING!\033[0m ${TEST_NAME} is only ${TESTS_PER_SEC} tests/sec, which is below ${MIN_TEST_PER_SEC}/sec! Fuzz tests are more effective at higher rates.\n\n" fi - if [ "$FEATURE_COVERAGE" -lt $MIN_FEATURES_COVERED ]; then + COVERAGE_FAILURE_ALLOWED=0 + if grep -Fxq ${TEST_NAME} ./allowed_coverage_failures.cfg + then + COVERAGE_FAILURE_ALLOWED=1 + fi + + if [ "$FEATURE_COVERAGE" -lt $MIN_FEATURES_COVERED && COVERAGE_FAILURE_ALLOWED -eq 0 ]; then printf "\033[31;1mERROR!\033[0m ${TEST_NAME} only covers ${FEATURE_COVERAGE} features, which is below ${MIN_FEATURES_COVERED}! This may be due to missing corpus files or a bug.\n" exit -1; fi diff --git a/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c b/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c index e426ca4f4ce..9f8f4e8a00f 100644 --- a/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c +++ b/tests/fuzz/s2n_hybrid_ecdhe_kyber_r3_fuzz_test.c @@ -21,8 +21,8 @@ #include "crypto/s2n_drbg.h" #include "crypto/s2n_hash.h" #include "crypto/s2n_openssl.h" +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "stuffer/s2n_stuffer.h" #include "tests/s2n_test.h" #include "tests/testlib/s2n_testlib.h" diff --git a/tests/fuzz/s2n_kyber_r3_recv_ciphertext_fuzz_test.c b/tests/fuzz/s2n_kyber_r3_recv_ciphertext_fuzz_test.c index 04a2f9b8306..3e2189db571 100644 --- a/tests/fuzz/s2n_kyber_r3_recv_ciphertext_fuzz_test.c +++ b/tests/fuzz/s2n_kyber_r3_recv_ciphertext_fuzz_test.c @@ -50,4 +50,3 @@ static void s2n_fuzz_cleanup() } S2N_FUZZ_TARGET(s2n_fuzz_init, s2n_fuzz_test, s2n_fuzz_cleanup) - diff --git a/tests/fuzz/s2n_kyber_r3_recv_public_key_fuzz_test.c b/tests/fuzz/s2n_kyber_r3_recv_public_key_fuzz_test.c index 11a1847bf7d..5f1f3c5b1c0 100644 --- a/tests/fuzz/s2n_kyber_r3_recv_public_key_fuzz_test.c +++ b/tests/fuzz/s2n_kyber_r3_recv_public_key_fuzz_test.c @@ -15,7 +15,7 @@ /* Target Functions: s2n_kem_recv_public_key s2n_kem_encapsulate kyber_512_r3_crypto_kem_enc kyber_768_r3_crypto_kem_enc kyber_1024_r3_crypto_kem_enc */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tests/s2n_test.h" #include "tests/testlib/s2n_testlib.h" #include "tls/s2n_kem.h" @@ -31,12 +31,13 @@ static struct s2n_kem_params kyber1024_r3_draft5_params = { .kem = &s2n_kyber_10 int s2n_fuzz_test(const uint8_t *buf, size_t len) { - POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber512_r3_draft0_params)); - POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber512_r3_draft5_params)); if (s2n_libcrypto_supports_kyber()) { + POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber512_r3_draft0_params)); + POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber512_r3_draft5_params)); POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber768_r3_draft5_params)); POSIX_GUARD(s2n_kem_recv_public_key_fuzz_test(buf, len, &kyber1024_r3_draft5_params)); } + return S2N_SUCCESS; } diff --git a/tests/integrationv2/common.py b/tests/integrationv2/common.py index bb3cf51c110..aff3c450c13 100644 --- a/tests/integrationv2/common.py +++ b/tests/integrationv2/common.py @@ -6,7 +6,7 @@ from constants import TEST_CERT_DIRECTORY -from global_flags import get_flag, S2N_NO_PQ, S2N_FIPS_MODE +from global_flags import get_flag, S2N_PROVIDER_VERSION def data_bytes(n_bytes): @@ -33,7 +33,7 @@ def pq_enabled(): """ Returns true or false to indicate whether PQ crypto is enabled in s2n """ - return not (get_flag(S2N_NO_PQ, False) or get_flag(S2N_FIPS_MODE, False)) + return "awslc" in get_flag(S2N_PROVIDER_VERSION) class AvailablePorts(object): diff --git a/tests/saw/Makefile b/tests/saw/Makefile index 07f965e881f..9633cbecd99 100644 --- a/tests/saw/Makefile +++ b/tests/saw/Makefile @@ -149,12 +149,6 @@ bitcode : CRYPTO_C = $(wildcard ../../crypto/*.c) $(wildcard ../../crypto/*.h) ../../crypto/Makefile CRYPTO_COPY = $(addprefix s2n/crypto/, $(notdir $(CRYPTO_C))) -PQ_CRYPTO_C = $(wildcard ../../pq-crypto/*.c) $(wildcard ../../pq-crypto/*.h) ../../pq-crypto/s2n_pq_asm.mk ../../pq-crypto/Makefile -PQ_CRYPTO_COPY = $(addprefix s2n/pq-crypto/, $(notdir $(PQ_CRYPTO_C))) - -KYBER_R3_C = $(wildcard ../../pq-crypto/kyber_r3/*.c) $(wildcard ../../pq-crypto/kyber_r3/*.h) ../../pq-crypto/kyber_r3/Makefile -KYBER_R3_COPY = $(addprefix s2n/pq-crypto/kyber_r3/, $(notdir $(KYBER_R3_C))) - UTILS_C = $(wildcard ../../utils/*.c) $(wildcard ../../utils/*.h) ../../utils/Makefile UTILS_COPY =$(addprefix s2n/utils/, $(notdir $(UTILS_C))) @@ -182,12 +176,6 @@ s2n/api : s2n/crypto : mkdir -p $@ -s2n/pq-crypto : - mkdir -p $@ - -s2n/pq-crypto/kyber_r3 : - mkdir -p $@ - s2n/utils : mkdir -p $@ @@ -205,12 +193,12 @@ export BITCODE_DIR := $(CURDIR)/bitcode/ tmp: mkdir -p tmp -bitcode/all_llvm.bc : s2n/crypto s2n/pq-crypto s2n/utils s2n/tls s2n/api s2n/error s2n/stuffer s2n/Makefile s2n/s2n.mk $(CRYPTO_COPY) $(PQ_CRYPTO_COPY) $(UTILS_COPY) $(TLS_COPY) $(API_COPY) $(ERROR_COPY) $(STUFFER_COPY) +bitcode/all_llvm.bc : s2n/crypto s2n/utils s2n/tls s2n/api s2n/error s2n/stuffer s2n/Makefile s2n/s2n.mk $(CRYPTO_COPY) $(PQ_CRYPTO_COPY) $(UTILS_COPY) $(TLS_COPY) $(API_COPY) $(ERROR_COPY) $(STUFFER_COPY) ${MAKE} -C s2n bc ${MAKE} -C bitcode all_llvm.bc -s2n/lib/libs2n.so : s2n/crypto s2n/pq-crypto s2n/pq-crypto/kyber_r3 s2n/utils s2n/tls s2n/api s2n/error s2n/stuffer s2n/lib s2n/Makefile s2n/s2n.mk $(CRYPTO_COPY) $(PQ_CRYPTO_COPY) $(KYBER_R3_COPY) $(UTILS_COPY) $(TLS_COPY) $(API_COPY) $(ERROR_COPY) $(STUFFER_COPY) $(LIB_COPY) +s2n/lib/libs2n.so : s2n/crypto s2n/utils s2n/tls s2n/api s2n/error s2n/stuffer s2n/lib s2n/Makefile s2n/s2n.mk $(CRYPTO_COPY) $(UTILS_COPY) $(TLS_COPY) $(API_COPY) $(ERROR_COPY) $(STUFFER_COPY) $(LIB_COPY) ${MAKE} -C s2n libs NO_STACK_PROTECTOR=1 NO_INLINE=1 s2n/%.h : ../../%.h @@ -230,6 +218,3 @@ s2n/Makefile : ../../Makefile s2n/s2n.mk : ../../s2n.mk cp $< $@ - -s2n/pq-crypto/s2n_pq_asm.mk : ../../pq-crypto/s2n_pq_asm.mk - cp $< $@ diff --git a/tests/testlib/s2n_kem_fuzz_testlib.c b/tests/testlib/s2n_kem_fuzz_testlib.c index f8606c540bd..fb44c95480e 100644 --- a/tests/testlib/s2n_kem_fuzz_testlib.c +++ b/tests/testlib/s2n_kem_fuzz_testlib.c @@ -13,7 +13,7 @@ * permissions and limitations under the License. */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_testlib.h" #include "tests/testlib/s2n_nist_kats.h" #include "tls/s2n_kem.h" diff --git a/tests/testlib/s2n_pq_kat_test_utils.c b/tests/testlib/s2n_pq_kat_test_utils.c index d9d41df2ace..e2161ad05eb 100644 --- a/tests/testlib/s2n_pq_kat_test_utils.c +++ b/tests/testlib/s2n_pq_kat_test_utils.c @@ -15,8 +15,7 @@ #include -#include "pq-crypto/s2n_pq.h" -#include "pq-crypto/s2n_pq_random.h" +#include "crypto/s2n_pq.h" #include "testlib/s2n_nist_kats.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_kem.h" @@ -53,13 +52,6 @@ int s2n_pq_kat_seed_entropy(void *ptr, uint32_t size) return S2N_SUCCESS; } -/* Since the NIST KATs were generated without prediction resistance, the - * mix entropy callback should never be called. */ -static int s2n_pq_kat_mix_entropy(void *ptr, uint32_t size) -{ - return S2N_FAILURE; -} - /* Adapted from s2n_drbg.c::s2n_drbg_generate(); this allows us to side-step the DRBG * prediction resistance that is built in to s2n's DRBG modes. The PQ KATs were generated * using AES 256 CTR NO DF NO PR. */ @@ -112,112 +104,6 @@ S2N_RESULT s2n_get_random_bytes_for_pq_kat_tests(uint8_t *buffer, uint32_t num_b return S2N_RESULT_OK; } -static int s2n_test_kem_with_kat(const struct s2n_kem *kem, const char *kat_file_name) -{ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); - POSIX_ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST); - - POSIX_ENSURE_REF(kem); - - FILE *kat_file = fopen(kat_file_name, "r"); - POSIX_ENSURE_REF(kat_file); - - uint8_t *ct, *client_shared_secret, *pk, *sk, *server_shared_secret, *pk_answer, *sk_answer, *ct_answer, *ss_answer; - /* Client side variables */ - POSIX_ENSURE_REF(ct = malloc(kem->ciphertext_length)); - POSIX_ENSURE_REF(client_shared_secret = malloc(kem->shared_secret_key_length)); - - /* Server side variables */ - POSIX_ENSURE_REF(pk = malloc(kem->public_key_length)); - POSIX_ENSURE_REF(sk = malloc(kem->private_key_length)); - POSIX_ENSURE_REF(server_shared_secret = malloc(kem->shared_secret_key_length)); - - /* Known answer variables */ - POSIX_ENSURE_REF(pk_answer = malloc(kem->public_key_length)); - POSIX_ENSURE_REF(sk_answer = malloc(kem->private_key_length)); - POSIX_ENSURE_REF(ct_answer = malloc(kem->ciphertext_length)); - POSIX_ENSURE_REF(ss_answer = malloc(kem->shared_secret_key_length)); - - s2n_stack_blob(personalization_string, SEED_LENGTH, SEED_LENGTH); - POSIX_GUARD(s2n_rand_set_callbacks(s2n_pq_kat_rand_init, s2n_pq_kat_rand_cleanup, s2n_pq_kat_seed_entropy, - s2n_pq_kat_mix_entropy)); - POSIX_GUARD_RESULT(s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_for_pq_kat_tests)); - - for (size_t i = 0; i < NUM_OF_KATS; i++) { - /* Verify test index */ - int32_t count = 0; - POSIX_GUARD(FindMarker(kat_file, "count = ")); - POSIX_ENSURE_GT(fscanf(kat_file, "%d", &count), 0); - POSIX_ENSURE_EQ(count, i); - - /* Set the DRBG to the state that was used to generate this test vector. We instantiate the DRBG - * as S2N_AES_256_CTR_NO_DF_PR; since the NIST KATs were generated without prediction resistance, - * we use the custom function s2n_drbg_generate_for_pq_kat_tests() defined above to turn off the - * prediction resistance. */ - POSIX_GUARD(ReadHex(kat_file, kat_entropy_blob.data, SEED_LENGTH, "seed = ")); - POSIX_GUARD_RESULT(s2n_drbg_instantiate(&drbg_for_pq_kats, &personalization_string, S2N_AES_256_CTR_NO_DF_PR)); - - /* Generate the public/private key pair */ - POSIX_GUARD(kem->generate_keypair(kem, pk, sk)); - - /* Create a shared secret and use the public key to encrypt it */ - POSIX_GUARD(kem->encapsulate(kem, ct, client_shared_secret, pk)); - - /* Use the private key to decrypt the ct to get the shared secret */ - POSIX_GUARD(kem->decapsulate(kem, server_shared_secret, ct, sk)); - - /* Read the KAT values */ - POSIX_GUARD(ReadHex(kat_file, pk_answer, kem->public_key_length, "pk = ")); - POSIX_GUARD(ReadHex(kat_file, sk_answer, kem->private_key_length, "sk = ")); - POSIX_GUARD(ReadHex(kat_file, ct_answer, kem->ciphertext_length, "ct = ")); - POSIX_GUARD(ReadHex(kat_file, ss_answer, kem->shared_secret_key_length, "ss = ")); - - /* Test the client and server got the same value */ - POSIX_ENSURE_EQ(memcmp(client_shared_secret, server_shared_secret, kem->shared_secret_key_length), 0); - - /* Compare the KAT values */ - POSIX_ENSURE_EQ(memcmp(pk_answer, pk, kem->public_key_length), 0); - POSIX_ENSURE_EQ(memcmp(sk_answer, sk, kem->private_key_length), 0); - POSIX_ENSURE_EQ(memcmp(ct_answer, ct, kem->ciphertext_length), 0); - POSIX_ENSURE_EQ(memcmp(ss_answer, server_shared_secret, kem->shared_secret_key_length), 0); - - /* Wipe the DRBG; it will reseed for each KAT test vector. */ - POSIX_GUARD_RESULT(s2n_drbg_wipe(&drbg_for_pq_kats)); - } - fclose(kat_file); - free(ct); - free(client_shared_secret); - free(pk); - free(sk); - free(server_shared_secret); - free(pk_answer); - free(sk_answer); - free(ct_answer); - free(ss_answer); - - return 0; -} - -S2N_RESULT s2n_pq_kem_kat_test(const struct s2n_kem_kat_test_vector *test_vectors, size_t count) -{ - RESULT_ENSURE_GT(count, 0); - for (size_t i = 0; i < count; i++) { - const struct s2n_kem_kat_test_vector vector = test_vectors[i]; - const struct s2n_kem *kem = vector.kem; - - /* Test the C code */ - RESULT_GUARD(vector.disable_asm()); - RESULT_GUARD_POSIX(s2n_test_kem_with_kat(kem, vector.kat_file)); - - /* Test the assembly, if available */ - RESULT_GUARD(vector.enable_asm()); - if (vector.asm_is_enabled()) { - RESULT_GUARD_POSIX(s2n_test_kem_with_kat(kem, vector.kat_file)); - } - } - return S2N_RESULT_OK; -} - S2N_RESULT s2n_pq_noop_asm() { return S2N_RESULT_OK; diff --git a/tests/unit/s2n_cipher_suite_match_test.c b/tests/unit/s2n_cipher_suite_match_test.c index a2bfbef5e65..8353210a65b 100644 --- a/tests/unit/s2n_cipher_suite_match_test.c +++ b/tests/unit/s2n_cipher_suite_match_test.c @@ -16,7 +16,7 @@ #include #include "crypto/s2n_ecc_evp.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_cipher_suites.h" diff --git a/tests/unit/s2n_client_extensions_test.c b/tests/unit/s2n_client_extensions_test.c index 0743391729b..456021f3f51 100644 --- a/tests/unit/s2n_client_extensions_test.c +++ b/tests/unit/s2n_client_extensions_test.c @@ -20,7 +20,7 @@ #include #include "api/s2n.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_connection.h" diff --git a/tests/unit/s2n_client_hello_get_supported_groups_test.c b/tests/unit/s2n_client_hello_get_supported_groups_test.c index 8cb7de97f2b..6ca477e70e7 100644 --- a/tests/unit/s2n_client_hello_get_supported_groups_test.c +++ b/tests/unit/s2n_client_hello_get_supported_groups_test.c @@ -13,7 +13,7 @@ * permissions and limitations under the License. */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/extensions/s2n_client_supported_groups.h" diff --git a/tests/unit/s2n_client_hello_retry_test.c b/tests/unit/s2n_client_hello_retry_test.c index 555f6acfee0..077d6e36bcd 100644 --- a/tests/unit/s2n_client_hello_retry_test.c +++ b/tests/unit/s2n_client_hello_retry_test.c @@ -13,7 +13,7 @@ * permissions and limitations under the License. */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n.h" #include "s2n_test.h" #include "stuffer/s2n_stuffer.h" @@ -189,7 +189,7 @@ int main(int argc, char **argv) conn->kex_params.server_kem_group_params.kem_group = kem_pref->tls13_kem_groups[0]; EXPECT_NULL(conn->kex_params.server_ecc_evp_params.negotiated_curve); - EXPECT_FAILURE_WITH_ERRNO(s2n_server_hello_retry_recv(conn), S2N_ERR_PQ_DISABLED); + EXPECT_FAILURE_WITH_ERRNO(s2n_server_hello_retry_recv(conn), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); EXPECT_SUCCESS(s2n_connection_free(conn)); } else { diff --git a/tests/unit/s2n_client_key_share_extension_pq_test.c b/tests/unit/s2n_client_key_share_extension_pq_test.c index e03acc3638a..bae35f96934 100644 --- a/tests/unit/s2n_client_key_share_extension_pq_test.c +++ b/tests/unit/s2n_client_key_share_extension_pq_test.c @@ -28,7 +28,7 @@ #include "testlib/s2n_testlib.h" #include "stuffer/s2n_stuffer.h" #include "utils/s2n_safety.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" /* clang-format on */ #define HELLO_RETRY_MSG_NO 1 @@ -941,7 +941,7 @@ static int s2n_generate_pq_hybrid_key_share_for_test(struct s2n_stuffer *out, st POSIX_ENSURE_REF(kem_group_params); /* This function should never be called when PQ is disabled */ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); const struct s2n_kem_group *kem_group = kem_group_params->kem_group; POSIX_ENSURE_REF(kem_group); diff --git a/tests/unit/s2n_client_pq_kem_extension_test.c b/tests/unit/s2n_client_pq_kem_extension_test.c index 599843f71f9..c4ff26d92a2 100644 --- a/tests/unit/s2n_client_pq_kem_extension_test.c +++ b/tests/unit/s2n_client_pq_kem_extension_test.c @@ -13,7 +13,7 @@ * permissions and limitations under the License. */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "tls/extensions/s2n_client_pq_kem.h" #include "tls/s2n_security_policies.h" diff --git a/tests/unit/s2n_client_supported_groups_extension_test.c b/tests/unit/s2n_client_supported_groups_extension_test.c index 0734e99722b..fd2a105ed62 100644 --- a/tests/unit/s2n_client_supported_groups_extension_test.c +++ b/tests/unit/s2n_client_supported_groups_extension_test.c @@ -15,7 +15,7 @@ #include -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "stuffer/s2n_stuffer.h" #include "tls/extensions/s2n_client_supported_groups.h" diff --git a/tests/unit/s2n_config_test.c b/tests/unit/s2n_config_test.c index 10c1a849dfa..423b0c94719 100644 --- a/tests/unit/s2n_config_test.c +++ b/tests/unit/s2n_config_test.c @@ -19,7 +19,7 @@ #include "api/s2n.h" #include "crypto/s2n_fips.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/extensions/s2n_client_supported_groups.h" diff --git a/tests/unit/s2n_kem_preferences_test.c b/tests/unit/s2n_kem_preferences_test.c index 505afcc5489..d4f938b9b40 100644 --- a/tests/unit/s2n_kem_preferences_test.c +++ b/tests/unit/s2n_kem_preferences_test.c @@ -16,7 +16,7 @@ #include "tls/s2n_kem_preferences.h" #include "crypto/s2n_fips.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "tls/s2n_tls_parameters.h" diff --git a/tests/unit/s2n_kem_test.c b/tests/unit/s2n_kem_test.c index 57f8325a7b8..c9423bc6606 100644 --- a/tests/unit/s2n_kem_test.c +++ b/tests/unit/s2n_kem_test.c @@ -15,7 +15,7 @@ #include "tls/s2n_kem.h" #include "crypto/s2n_ecc_evp.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tests/s2n_test.h" #include "tls/extensions/s2n_key_share.h" #include "tls/s2n_cipher_preferences.h" diff --git a/tests/unit/s2n_kex_with_kem_test.c b/tests/unit/s2n_kex_with_kem_test.c index 504135379bd..865c2495f73 100644 --- a/tests/unit/s2n_kex_with_kem_test.c +++ b/tests/unit/s2n_kex_with_kem_test.c @@ -13,7 +13,7 @@ * permissions and limitations under the License. */ -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tests/s2n_test.h" #include "tls/s2n_cipher_suites.h" #include "tls/s2n_client_key_exchange.h" @@ -128,14 +128,14 @@ static int assert_pq_disabled_checks(struct s2n_cipher_suite *cipher_suite, cons /* If PQ is disabled: * s2n_check_kem() (s2n_hybrid_ecdhe_kem.connection_supported) should indicate that the connection is not supported * s2n_configure_kem() (s2n_hybrid_ecdhe_kem.configure_connection) should return S2N_RESULT_ERROR - * set s2n_errno to S2N_ERR_PQ_DISABLED */ + * set s2n_errno to S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API */ bool connection_supported = true; POSIX_GUARD_RESULT(s2n_hybrid_ecdhe_kem.connection_supported(cipher_suite, server_conn, &connection_supported)); POSIX_ENSURE_EQ(connection_supported, false); POSIX_ENSURE_EQ(s2n_result_is_error(s2n_hybrid_ecdhe_kem.configure_connection(cipher_suite, server_conn)), true); - POSIX_ENSURE_EQ(s2n_errno, S2N_ERR_PQ_DISABLED); + POSIX_ENSURE_EQ(s2n_errno, S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); POSIX_GUARD(s2n_connection_free(server_conn)); s2n_errno = 0; diff --git a/tests/unit/s2n_pq_kem_kat_kyber_r3_test.c b/tests/unit/s2n_pq_kem_kat_kyber_r3_test.c deleted file mode 100644 index 66e72027608..00000000000 --- a/tests/unit/s2n_pq_kem_kat_kyber_r3_test.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#include "pq-crypto/s2n_pq.h" -#include "s2n_test.h" -#include "tests/testlib/s2n_testlib.h" - -static const struct s2n_kem_kat_test_vector test_vectors[] = { - { - .kem = &s2n_kyber_512_r3, - .kat_file = "kats/kyber_r3.kat", - .asm_is_enabled = s2n_pq_no_asm_available, - .enable_asm = s2n_pq_noop_asm, - .disable_asm = s2n_pq_noop_asm, - }, - { - .kem = &s2n_kyber_512_r3, - .kat_file = "kats/kyber_r3.kat", - .asm_is_enabled = s2n_kyber512r3_is_avx2_bmi2_enabled, - .enable_asm = s2n_try_enable_kyber512r3_opt_avx2_bmi2, - .disable_asm = s2n_disable_kyber512r3_opt_avx2_bmi2, - }, -}; - -int main() -{ - BEGIN_TEST(); - if (!s2n_pq_is_enabled() || s2n_libcrypto_supports_kyber()) { - /* The KAT tests rely on the low-level PQ crypto functions; - * there is nothing to test if PQ is disabled. - * - * In the case where we are using AWS-LC backed PQ, we rely on the - * KAT tests implemented in the AWS-LC repository. Implementing these - * tests within S2N is impossible due to the lack of AWS-LC interfaces - * for initializing the RNG. */ - END_TEST(); - } - EXPECT_OK(s2n_pq_kem_kat_test(test_vectors, s2n_array_len(test_vectors))); - END_TEST(); -} diff --git a/tests/unit/s2n_pq_kem_test.c b/tests/unit/s2n_pq_kem_test.c index 06959bbbc90..257766eb3c6 100644 --- a/tests/unit/s2n_pq_kem_test.c +++ b/tests/unit/s2n_pq_kem_test.c @@ -17,32 +17,16 @@ #include "crypto/s2n_fips.h" #include "crypto/s2n_openssl.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "tests/testlib/s2n_testlib.h" #include "tls/s2n_kem.h" #include "utils/s2n_safety.h" -struct s2n_kem_test_vector { - const struct s2n_kem *kem; - bool (*asm_is_enabled)(); - S2N_RESULT (*enable_asm)(); - S2N_RESULT (*disable_asm)(); -}; - -static const struct s2n_kem_test_vector test_vectors[] = { - { - .kem = &s2n_kyber_512_r3, - .asm_is_enabled = s2n_pq_no_asm_available, - .enable_asm = s2n_pq_noop_asm, - .disable_asm = s2n_pq_noop_asm, - }, - { - .kem = &s2n_kyber_512_r3, - .asm_is_enabled = s2n_kyber512r3_is_avx2_bmi2_enabled, - .enable_asm = s2n_try_enable_kyber512r3_opt_avx2_bmi2, - .disable_asm = s2n_disable_kyber512r3_opt_avx2_bmi2, - }, +static const struct s2n_kem *test_vectors[] = { + &s2n_kyber_512_r3, + &s2n_kyber_768_r3, + &s2n_kyber_1024_r3, }; /* EXPECT_SUCCESS checks explicitly function_call != -1; the PQ KEM functions may return @@ -62,8 +46,7 @@ int main() #endif for (size_t i = 0; i < s2n_array_len(test_vectors); i++) { - const struct s2n_kem_test_vector vector = test_vectors[i]; - const struct s2n_kem *kem = vector.kem; + const struct s2n_kem *kem = test_vectors[i]; DEFER_CLEANUP(struct s2n_blob public_key = { 0 }, s2n_free); EXPECT_SUCCESS(s2n_alloc(&public_key, kem->public_key_length)); @@ -81,36 +64,23 @@ int main() EXPECT_SUCCESS(s2n_alloc(&ciphertext, kem->ciphertext_length)); if (s2n_pq_is_enabled()) { - /* Run the tests for C and assembly implementations (where available) */ - s2n_result (*asm_toggle_funcs[])(void) = { vector.disable_asm, vector.enable_asm }; - - for (size_t j = 0; j < s2n_array_len(asm_toggle_funcs); j++) { - EXPECT_OK(asm_toggle_funcs[j]()); - - /* Test a successful round-trip: keygen->enc->dec */ - EXPECT_PQ_KEM_SUCCESS(kem->generate_keypair(kem, public_key.data, private_key.data)); - EXPECT_PQ_KEM_SUCCESS(kem->encapsulate(kem, ciphertext.data, client_shared_secret.data, public_key.data)); - EXPECT_PQ_KEM_SUCCESS(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data)); - EXPECT_BYTEARRAY_EQUAL(server_shared_secret.data, client_shared_secret.data, kem->shared_secret_key_length); - - /* By design, if an invalid private key + ciphertext pair is provided to decapsulate(), - * the function should still succeed (return S2N_SUCCESS); however, the shared secret - * that was "decapsulated" will be a garbage random value. */ - ciphertext.data[0] ^= 1; /* Flip a bit to invalidate the ciphertext */ - - EXPECT_PQ_KEM_SUCCESS(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data)); - EXPECT_BYTEARRAY_NOT_EQUAL(server_shared_secret.data, client_shared_secret.data, kem->shared_secret_key_length); - } + /* Test a successful round-trip: keygen->enc->dec */ + EXPECT_PQ_KEM_SUCCESS(kem->generate_keypair(kem, public_key.data, private_key.data)); + EXPECT_PQ_KEM_SUCCESS(kem->encapsulate(kem, ciphertext.data, client_shared_secret.data, public_key.data)); + EXPECT_PQ_KEM_SUCCESS(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data)); + EXPECT_BYTEARRAY_EQUAL(server_shared_secret.data, client_shared_secret.data, kem->shared_secret_key_length); + + /* By design, if an invalid private key + ciphertext pair is provided to decapsulate(), + * the function should still succeed (return S2N_SUCCESS); however, the shared secret + * that was "decapsulated" will be a garbage random value. */ + ciphertext.data[0] ^= 1; /* Flip a bit to invalidate the ciphertext */ + + EXPECT_PQ_KEM_SUCCESS(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data)); + EXPECT_BYTEARRAY_NOT_EQUAL(server_shared_secret.data, client_shared_secret.data, kem->shared_secret_key_length); } else { -#if defined(S2N_NO_PQ) - EXPECT_FAILURE_WITH_ERRNO(kem->generate_keypair(kem, public_key.data, private_key.data), S2N_ERR_UNIMPLEMENTED); - EXPECT_FAILURE_WITH_ERRNO(kem->encapsulate(kem, ciphertext.data, client_shared_secret.data, public_key.data), S2N_ERR_UNIMPLEMENTED); - EXPECT_FAILURE_WITH_ERRNO(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data), S2N_ERR_UNIMPLEMENTED); -#else - EXPECT_FAILURE_WITH_ERRNO(kem->generate_keypair(kem, public_key.data, private_key.data), S2N_ERR_PQ_DISABLED); - EXPECT_FAILURE_WITH_ERRNO(kem->encapsulate(kem, ciphertext.data, client_shared_secret.data, public_key.data), S2N_ERR_PQ_DISABLED); - EXPECT_FAILURE_WITH_ERRNO(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data), S2N_ERR_PQ_DISABLED); -#endif + EXPECT_FAILURE_WITH_ERRNO(kem->generate_keypair(kem, public_key.data, private_key.data), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); + EXPECT_FAILURE_WITH_ERRNO(kem->encapsulate(kem, ciphertext.data, client_shared_secret.data, public_key.data), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); + EXPECT_FAILURE_WITH_ERRNO(kem->decapsulate(kem, server_shared_secret.data, ciphertext.data, private_key.data), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); } } diff --git a/tests/unit/s2n_security_policies_test.c b/tests/unit/s2n_security_policies_test.c index d8f4e90d010..7ef0abb827f 100644 --- a/tests/unit/s2n_security_policies_test.c +++ b/tests/unit/s2n_security_policies_test.c @@ -15,9 +15,9 @@ #include "tls/s2n_security_policies.h" +#include "crypto/s2n_pq.h" #include "crypto/s2n_rsa_pss.h" #include "crypto/s2n_rsa_signing.h" -#include "pq-crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_kem.h" diff --git a/tests/unit/s2n_server_key_share_extension_test.c b/tests/unit/s2n_server_key_share_extension_test.c index cf204373cf7..44569f0f81e 100644 --- a/tests/unit/s2n_server_key_share_extension_test.c +++ b/tests/unit/s2n_server_key_share_extension_test.c @@ -16,7 +16,7 @@ #include #include "api/s2n.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "s2n_test.h" #include "stuffer/s2n_stuffer.h" #include "testlib/s2n_nist_kats.h" @@ -567,7 +567,7 @@ int main(int argc, char **argv) EXPECT_SUCCESS(s2n_stuffer_init(&iana_stuffer, &iana_blob)); EXPECT_SUCCESS(s2n_stuffer_write_uint16(&iana_stuffer, test_security_policy.kem_preferences->tls13_kem_groups[0]->iana_id)); - EXPECT_FAILURE_WITH_ERRNO(s2n_server_key_share_extension.recv(client_conn, &iana_stuffer), S2N_ERR_PQ_DISABLED); + EXPECT_FAILURE_WITH_ERRNO(s2n_server_key_share_extension.recv(client_conn, &iana_stuffer), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); EXPECT_SUCCESS(s2n_connection_free(client_conn)); } @@ -782,7 +782,7 @@ int main(int argc, char **argv) EXPECT_NOT_NULL(conn = s2n_connection_new(S2N_SERVER)); if (!s2n_pq_is_enabled()) { - EXPECT_FAILURE_WITH_ERRNO(s2n_server_key_share_send_check_pq_hybrid(conn), S2N_ERR_PQ_DISABLED); + EXPECT_FAILURE_WITH_ERRNO(s2n_server_key_share_send_check_pq_hybrid(conn), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); } if (s2n_pq_is_enabled()) { diff --git a/tests/unit/s2n_tls13_hybrid_shared_secret_test.c b/tests/unit/s2n_tls13_hybrid_shared_secret_test.c index ebd1a3e152f..0d99e96a2b5 100644 --- a/tests/unit/s2n_tls13_hybrid_shared_secret_test.c +++ b/tests/unit/s2n_tls13_hybrid_shared_secret_test.c @@ -18,7 +18,7 @@ #include "api/s2n.h" #include "crypto/s2n_ecc_evp.h" #include "crypto/s2n_hash.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tests/s2n_test.h" #include "tests/testlib/s2n_testlib.h" #include "tls/s2n_cipher_suites.h" diff --git a/tests/unit/s2n_tls13_pq_handshake_test.c b/tests/unit/s2n_tls13_pq_handshake_test.c index 0d843a25fd4..74f26f0c288 100644 --- a/tests/unit/s2n_tls13_pq_handshake_test.c +++ b/tests/unit/s2n_tls13_pq_handshake_test.c @@ -14,8 +14,8 @@ */ #include "api/s2n.h" +#include "crypto/s2n_pq.h" #include "crypto/s2n_rsa_signing.h" -#include "pq-crypto/s2n_pq.h" #include "s2n_test.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_ecc_preferences.h" diff --git a/tls/extensions/s2n_client_key_share.c b/tls/extensions/s2n_client_key_share.c index 946da930d61..1226ee15d4f 100644 --- a/tls/extensions/s2n_client_key_share.c +++ b/tls/extensions/s2n_client_key_share.c @@ -15,8 +15,8 @@ #include "tls/extensions/s2n_client_key_share.h" +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "stuffer/s2n_stuffer.h" #include "tls/extensions/s2n_key_share.h" #include "tls/s2n_kem_preferences.h" @@ -106,7 +106,7 @@ static int s2n_generate_pq_hybrid_key_share(struct s2n_stuffer *out, struct s2n_ POSIX_ENSURE_REF(kem_group_params); /* This function should never be called when PQ is disabled */ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); const struct s2n_kem_group *kem_group = kem_group_params->kem_group; POSIX_ENSURE_REF(kem_group); diff --git a/tls/extensions/s2n_client_pq_kem.c b/tls/extensions/s2n_client_pq_kem.c index 5ad72e642cb..83d3d9150a7 100644 --- a/tls/extensions/s2n_client_pq_kem.c +++ b/tls/extensions/s2n_client_pq_kem.c @@ -18,7 +18,7 @@ #include #include -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tls/s2n_kem.h" #include "tls/s2n_security_policies.h" #include "tls/s2n_tls.h" diff --git a/tls/extensions/s2n_client_supported_groups.c b/tls/extensions/s2n_client_supported_groups.c index 3e88d41e61e..60a2a90ce7e 100644 --- a/tls/extensions/s2n_client_supported_groups.c +++ b/tls/extensions/s2n_client_supported_groups.c @@ -18,7 +18,7 @@ #include #include -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tls/extensions/s2n_ec_point_format.h" #include "tls/s2n_security_policies.h" #include "tls/s2n_tls.h" diff --git a/tls/extensions/s2n_server_key_share.c b/tls/extensions/s2n_server_key_share.c index 5b41b32c90b..819b852116f 100644 --- a/tls/extensions/s2n_server_key_share.c +++ b/tls/extensions/s2n_server_key_share.c @@ -15,7 +15,7 @@ #include "tls/extensions/s2n_server_key_share.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tls/s2n_security_policies.h" #include "tls/s2n_tls.h" #include "tls/s2n_tls13.h" @@ -39,7 +39,7 @@ static int s2n_server_key_share_generate_pq_hybrid(struct s2n_connection *conn, POSIX_ENSURE_REF(out); POSIX_ENSURE_REF(conn); - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); struct s2n_kem_group_params *server_kem_group_params = &conn->kex_params.server_kem_group_params; struct s2n_kem_params *client_kem_params = &conn->kex_params.client_kem_group_params.kem_params; @@ -74,7 +74,7 @@ int s2n_server_key_share_send_check_pq_hybrid(struct s2n_connection *conn) { POSIX_ENSURE_REF(conn); - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); POSIX_ENSURE_REF(conn->kex_params.server_kem_group_params.kem_group); POSIX_ENSURE_REF(conn->kex_params.server_kem_group_params.kem_params.kem); @@ -165,7 +165,7 @@ static int s2n_server_key_share_recv_pq_hybrid(struct s2n_connection *conn, uint /* If PQ is disabled, the client should not have sent any PQ IDs * in the supported_groups list of the initial ClientHello */ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); const struct s2n_kem_preferences *kem_pref = NULL; POSIX_GUARD(s2n_connection_get_kem_preferences(conn, &kem_pref)); diff --git a/tls/s2n_cipher_suites.c b/tls/s2n_cipher_suites.c index c00e7852217..bbf8dbcd20e 100644 --- a/tls/s2n_cipher_suites.c +++ b/tls/s2n_cipher_suites.c @@ -18,8 +18,8 @@ #include "crypto/s2n_cipher.h" #include "crypto/s2n_openssl.h" +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "tls/s2n_auth_selection.h" #include "tls/s2n_kex.h" #include "tls/s2n_psk.h" diff --git a/tls/s2n_config.c b/tls/s2n_config.c index b5e2c27b2cc..45bb6790995 100644 --- a/tls/s2n_config.c +++ b/tls/s2n_config.c @@ -20,8 +20,8 @@ #include "crypto/s2n_certificate.h" #include "crypto/s2n_fips.h" #include "crypto/s2n_hkdf.h" +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "tls/s2n_cipher_preferences.h" #include "tls/s2n_internal.h" #include "tls/s2n_ktls.h" diff --git a/tls/s2n_kem.c b/tls/s2n_kem.c index e044e34e734..59d26d48b06 100644 --- a/tls/s2n_kem.c +++ b/tls/s2n_kem.c @@ -15,35 +15,14 @@ #include "tls/s2n_kem.h" -#include "pq-crypto/s2n_kyber_evp.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_kyber_evp.h" +#include "crypto/s2n_pq.h" #include "stuffer/s2n_stuffer.h" #include "tls/extensions/s2n_key_share.h" #include "tls/s2n_tls_parameters.h" #include "utils/s2n_mem.h" #include "utils/s2n_safety.h" -#if defined(S2N_NO_PQ) /* If S2N_NO_PQ, pq-crypto won't be compiled so define relevant stubs here. */ - -int s2n_kyber_evp_generate_keypair(IN const struct s2n_kem *kem, OUT uint8_t *public_key, OUT uint8_t *secret_key) -{ - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); -} - -int s2n_kyber_evp_encapsulate(IN const struct s2n_kem *kem, OUT uint8_t *ciphertext, OUT uint8_t *shared_secret, - IN const uint8_t *public_key) -{ - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); -} - -int s2n_kyber_evp_decapsulate(IN const struct s2n_kem *kem, OUT uint8_t *shared_secret, IN const uint8_t *ciphertext, - IN const uint8_t *secret_key) -{ - POSIX_BAIL(S2N_ERR_UNIMPLEMENTED); -} - -#endif - /* The KEM IDs and names come from https://tools.ietf.org/html/draft-campagna-tls-bike-sike-hybrid */ const struct s2n_kem s2n_kyber_512_r3 = { diff --git a/tls/s2n_kex.c b/tls/s2n_kex.c index ac7529c5371..d4fb59fb00f 100644 --- a/tls/s2n_kex.c +++ b/tls/s2n_kex.c @@ -15,7 +15,7 @@ #include "tls/s2n_kex.h" -#include "pq-crypto/s2n_pq.h" +#include "crypto/s2n_pq.h" #include "tls/s2n_cipher_preferences.h" #include "tls/s2n_cipher_suites.h" #include "tls/s2n_client_key_exchange.h" @@ -121,7 +121,7 @@ static S2N_RESULT s2n_configure_kem(const struct s2n_cipher_suite *cipher_suite, RESULT_ENSURE_REF(cipher_suite); RESULT_ENSURE_REF(conn); - RESULT_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + RESULT_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); const struct s2n_kem_preferences *kem_preferences = NULL; RESULT_GUARD_POSIX(s2n_connection_get_kem_preferences(conn, &kem_preferences)); diff --git a/tls/s2n_server_hello_retry.c b/tls/s2n_server_hello_retry.c index 7ba57f8423e..54d8dcb022d 100644 --- a/tls/s2n_server_hello_retry.c +++ b/tls/s2n_server_hello_retry.c @@ -14,8 +14,8 @@ */ #include +#include "crypto/s2n_pq.h" #include "error/s2n_errno.h" -#include "pq-crypto/s2n_pq.h" #include "tls/s2n_cipher_suites.h" #include "tls/s2n_server_extensions.h" #include "tls/s2n_tls.h" @@ -102,7 +102,7 @@ int s2n_server_hello_retry_recv(struct s2n_connection *conn) if (kem_group != NULL) { /* If PQ is disabled, the client should not have sent any PQ IDs * in the supported_groups list of the initial ClientHello */ - POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_PQ_DISABLED); + POSIX_ENSURE(s2n_pq_is_enabled(), S2N_ERR_NO_SUPPORTED_LIBCRYPTO_API); new_key_share_requested = (kem_group != conn->kex_params.client_kem_group_params.kem_group); } diff --git a/utils/s2n_init.c b/utils/s2n_init.c index 0347ae12ecb..3c561106ea8 100644 --- a/utils/s2n_init.c +++ b/utils/s2n_init.c @@ -19,7 +19,6 @@ #include "crypto/s2n_locking.h" #include "error/s2n_errno.h" #include "openssl/opensslv.h" -#include "pq-crypto/s2n_pq.h" #include "tls/extensions/s2n_client_key_share.h" #include "tls/extensions/s2n_extension_type.h" #include "tls/s2n_cipher_suites.h" @@ -77,7 +76,6 @@ int s2n_init(void) POSIX_GUARD(s2n_security_policies_init()); POSIX_GUARD(s2n_config_defaults_init()); POSIX_GUARD(s2n_extension_type_init()); - POSIX_GUARD_RESULT(s2n_pq_init()); POSIX_GUARD_RESULT(s2n_tls13_empty_transcripts_init()); POSIX_GUARD_RESULT(s2n_atomic_init());