| // SPDX-License-Identifier: MIT |
| |
| #include "sha3.h" |
| #include "sha3x4.h" |
| |
| #include "xkcp_dispatch.h" |
| |
| #include <oqs/common.h> |
| #include <oqs/oqsconfig.h> |
| |
| #if OQS_USE_PTHREADS |
| #include <pthread.h> |
| #endif |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #define KECCAK_X4_CTX_ALIGNMENT 32 |
| #define _KECCAK_X4_CTX_BYTES (800+sizeof(uint64_t)) |
| #define KECCAK_X4_CTX_BYTES (KECCAK_X4_CTX_ALIGNMENT * \ |
| ((_KECCAK_X4_CTX_BYTES + KECCAK_X4_CTX_ALIGNMENT - 1)/KECCAK_X4_CTX_ALIGNMENT)) |
| |
| #if OQS_USE_PTHREADS |
| static pthread_once_t dispatch_once_control = PTHREAD_ONCE_INIT; |
| #endif |
| |
| static KeccakX4InitFn *Keccak_X4_Initialize_ptr = NULL; |
| static KeccakX4AddByteFn *Keccak_X4_AddByte_ptr = NULL; |
| static KeccakX4AddBytesFn *Keccak_X4_AddBytes_ptr = NULL; |
| static KeccakX4PermuteFn *Keccak_X4_Permute_ptr = NULL; |
| static KeccakX4ExtractBytesFn *Keccak_X4_ExtractBytes_ptr = NULL; |
| |
| static void Keccak_X4_Dispatch(void) { |
| // TODO: Simplify this when we have a Windows-compatible AVX2 implementation of SHA3 |
| #if defined(OQS_DIST_X86_64_BUILD) |
| #if defined(OQS_ENABLE_SHA3_xkcp_low_avx2) |
| if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2)) { |
| Keccak_X4_Initialize_ptr = &KeccakP1600times4_InitializeAll_avx2; |
| Keccak_X4_AddByte_ptr = &KeccakP1600times4_AddByte_avx2; |
| Keccak_X4_AddBytes_ptr = &KeccakP1600times4_AddBytes_avx2; |
| Keccak_X4_Permute_ptr = &KeccakP1600times4_PermuteAll_24rounds_avx2; |
| Keccak_X4_ExtractBytes_ptr = &KeccakP1600times4_ExtractBytes_avx2; |
| } else { |
| Keccak_X4_Initialize_ptr = &KeccakP1600times4_InitializeAll_serial; |
| Keccak_X4_AddByte_ptr = &KeccakP1600times4_AddByte_serial; |
| Keccak_X4_AddBytes_ptr = &KeccakP1600times4_AddBytes_serial; |
| Keccak_X4_Permute_ptr = &KeccakP1600times4_PermuteAll_24rounds_serial; |
| Keccak_X4_ExtractBytes_ptr = &KeccakP1600times4_ExtractBytes_serial; |
| } |
| #else // Windows |
| Keccak_X4_Initialize_ptr = &KeccakP1600times4_InitializeAll_serial; |
| Keccak_X4_AddByte_ptr = &KeccakP1600times4_AddByte_serial; |
| Keccak_X4_AddBytes_ptr = &KeccakP1600times4_AddBytes_serial; |
| Keccak_X4_Permute_ptr = &KeccakP1600times4_PermuteAll_24rounds_serial; |
| Keccak_X4_ExtractBytes_ptr = &KeccakP1600times4_ExtractBytes_serial; |
| #endif |
| #else |
| Keccak_X4_Initialize_ptr = &KeccakP1600times4_InitializeAll; |
| Keccak_X4_AddByte_ptr = &KeccakP1600times4_AddByte; |
| Keccak_X4_AddBytes_ptr = &KeccakP1600times4_AddBytes; |
| Keccak_X4_Permute_ptr = &KeccakP1600times4_PermuteAll_24rounds; |
| Keccak_X4_ExtractBytes_ptr = &KeccakP1600times4_ExtractBytes; |
| #endif |
| } |
| |
| static void keccak_x4_inc_reset(uint64_t *s) { |
| #if OQS_USE_PTHREADS |
| pthread_once(&dispatch_once_control, Keccak_X4_Dispatch); |
| #else |
| if (Keccak_X4_Initialize_ptr == NULL) { |
| Keccak_X4_Dispatch(); |
| } |
| #endif |
| (*Keccak_X4_Initialize_ptr)(s); |
| s[100] = 0; |
| } |
| |
| static void keccak_x4_inc_absorb(uint64_t *s, uint32_t r, |
| const uint8_t *in0, const uint8_t *in1, const uint8_t *in2, const uint8_t *in3, size_t inlen) { |
| uint64_t c = r - s[100]; |
| |
| if (s[100] && inlen >= c) { |
| (*Keccak_X4_AddBytes_ptr)(s, 0, in0, (unsigned int)s[100], (unsigned int)c); |
| (*Keccak_X4_AddBytes_ptr)(s, 1, in1, (unsigned int)s[100], (unsigned int)c); |
| (*Keccak_X4_AddBytes_ptr)(s, 2, in2, (unsigned int)s[100], (unsigned int)c); |
| (*Keccak_X4_AddBytes_ptr)(s, 3, in3, (unsigned int)s[100], (unsigned int)c); |
| (*Keccak_X4_Permute_ptr)(s); |
| inlen -= c; |
| in0 += c; |
| in1 += c; |
| in2 += c; |
| in3 += c; |
| s[100] = 0; |
| } |
| |
| while (inlen >= r) { |
| (*Keccak_X4_AddBytes_ptr)(s, 0, in0, 0, (unsigned int)r); |
| (*Keccak_X4_AddBytes_ptr)(s, 1, in1, 0, (unsigned int)r); |
| (*Keccak_X4_AddBytes_ptr)(s, 2, in2, 0, (unsigned int)r); |
| (*Keccak_X4_AddBytes_ptr)(s, 3, in3, 0, (unsigned int)r); |
| (*Keccak_X4_Permute_ptr)(s); |
| inlen -= r; |
| in0 += r; |
| in1 += r; |
| in2 += r; |
| in3 += r; |
| } |
| |
| (*Keccak_X4_AddBytes_ptr)(s, 0, in0, (unsigned int)s[100], (unsigned int)inlen); |
| (*Keccak_X4_AddBytes_ptr)(s, 1, in1, (unsigned int)s[100], (unsigned int)inlen); |
| (*Keccak_X4_AddBytes_ptr)(s, 2, in2, (unsigned int)s[100], (unsigned int)inlen); |
| (*Keccak_X4_AddBytes_ptr)(s, 3, in3, (unsigned int)s[100], (unsigned int)inlen); |
| s[100] += inlen; |
| } |
| |
| static void keccak_x4_inc_finalize(uint64_t *s, uint32_t r, uint8_t p) { |
| (*Keccak_X4_AddByte_ptr)(s, 0, p, (unsigned int)s[100]); |
| (*Keccak_X4_AddByte_ptr)(s, 1, p, (unsigned int)s[100]); |
| (*Keccak_X4_AddByte_ptr)(s, 2, p, (unsigned int)s[100]); |
| (*Keccak_X4_AddByte_ptr)(s, 3, p, (unsigned int)s[100]); |
| |
| (*Keccak_X4_AddByte_ptr)(s, 0, 0x80, (unsigned int)(r - 1)); |
| (*Keccak_X4_AddByte_ptr)(s, 1, 0x80, (unsigned int)(r - 1)); |
| (*Keccak_X4_AddByte_ptr)(s, 2, 0x80, (unsigned int)(r - 1)); |
| (*Keccak_X4_AddByte_ptr)(s, 3, 0x80, (unsigned int)(r - 1)); |
| |
| s[100] = 0; |
| } |
| |
| static void keccak_x4_inc_squeeze(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, |
| size_t outlen, uint64_t *s, uint32_t r) { |
| |
| while (outlen > s[100]) { |
| (*Keccak_X4_ExtractBytes_ptr)(s, 0, out0, (unsigned int)(r - s[100]), (unsigned int)s[100]); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 1, out1, (unsigned int)(r - s[100]), (unsigned int)s[100]); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 2, out2, (unsigned int)(r - s[100]), (unsigned int)s[100]); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 3, out3, (unsigned int)(r - s[100]), (unsigned int)s[100]); |
| (*Keccak_X4_Permute_ptr)(s); |
| out0 += s[100]; |
| out1 += s[100]; |
| out2 += s[100]; |
| out3 += s[100]; |
| outlen -= s[100]; |
| s[100] = r; |
| } |
| |
| (*Keccak_X4_ExtractBytes_ptr)(s, 0, out0, (unsigned int)(r - s[100]), (unsigned int)outlen); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 1, out1, (unsigned int)(r - s[100]), (unsigned int)outlen); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 2, out2, (unsigned int)(r - s[100]), (unsigned int)outlen); |
| (*Keccak_X4_ExtractBytes_ptr)(s, 3, out3, (unsigned int)(r - s[100]), (unsigned int)outlen); |
| |
| s[100] -= outlen; |
| } |
| |
| /********** SHAKE128 ***********/ |
| |
| void OQS_SHA3_shake128_x4(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, size_t outlen, const uint8_t *in0, const uint8_t *in1, const uint8_t *in2, const uint8_t *in3, size_t inlen) { |
| OQS_SHA3_shake128_x4_inc_ctx s; |
| OQS_SHA3_shake128_x4_inc_init(&s); |
| OQS_SHA3_shake128_x4_inc_absorb(&s, in0, in1, in2, in3, inlen); |
| OQS_SHA3_shake128_x4_inc_finalize(&s); |
| OQS_SHA3_shake128_x4_inc_squeeze(out0, out1, out2, out3, outlen, &s); |
| OQS_SHA3_shake128_x4_inc_ctx_release(&s); |
| } |
| |
| /* SHAKE128 incremental */ |
| |
| void OQS_SHA3_shake128_x4_inc_init(OQS_SHA3_shake128_x4_inc_ctx *state) { |
| state->ctx = OQS_MEM_aligned_alloc(KECCAK_X4_CTX_ALIGNMENT, KECCAK_X4_CTX_BYTES); |
| if (state->ctx == NULL) { |
| exit(111); |
| } |
| keccak_x4_inc_reset((uint64_t *)state->ctx); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_absorb(OQS_SHA3_shake128_x4_inc_ctx *state, const uint8_t *in0, const uint8_t *in1, const uint8_t *in2, const uint8_t *in3, size_t inlen) { |
| keccak_x4_inc_absorb((uint64_t *)state->ctx, OQS_SHA3_SHAKE128_RATE, in0, in1, in2, in3, inlen); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_finalize(OQS_SHA3_shake128_x4_inc_ctx *state) { |
| keccak_x4_inc_finalize((uint64_t *)state->ctx, OQS_SHA3_SHAKE128_RATE, 0x1F); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_squeeze(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, size_t outlen, OQS_SHA3_shake128_x4_inc_ctx *state) { |
| keccak_x4_inc_squeeze(out0, out1, out2, out3, outlen, (uint64_t *)state->ctx, OQS_SHA3_SHAKE128_RATE); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_ctx_clone(OQS_SHA3_shake128_x4_inc_ctx *dest, const OQS_SHA3_shake128_x4_inc_ctx *src) { |
| memcpy(dest->ctx, src->ctx, KECCAK_X4_CTX_BYTES); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_ctx_release(OQS_SHA3_shake128_x4_inc_ctx *state) { |
| OQS_MEM_aligned_free(state->ctx); |
| } |
| |
| void OQS_SHA3_shake128_x4_inc_ctx_reset(OQS_SHA3_shake128_x4_inc_ctx *state) { |
| keccak_x4_inc_reset((uint64_t *)state->ctx); |
| } |
| |
| /********** SHAKE256 ***********/ |
| |
| void OQS_SHA3_shake256_x4(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, size_t outlen, const uint8_t *in0, const uint8_t *in1, const uint8_t *in2, const uint8_t *in3, size_t inlen) { |
| OQS_SHA3_shake256_x4_inc_ctx s; |
| OQS_SHA3_shake256_x4_inc_init(&s); |
| OQS_SHA3_shake256_x4_inc_absorb(&s, in0, in1, in2, in3, inlen); |
| OQS_SHA3_shake256_x4_inc_finalize(&s); |
| OQS_SHA3_shake256_x4_inc_squeeze(out0, out1, out2, out3, outlen, &s); |
| OQS_SHA3_shake256_x4_inc_ctx_release(&s); |
| } |
| |
| /* SHAKE256 incremental */ |
| |
| void OQS_SHA3_shake256_x4_inc_init(OQS_SHA3_shake256_x4_inc_ctx *state) { |
| state->ctx = OQS_MEM_aligned_alloc(KECCAK_X4_CTX_ALIGNMENT, KECCAK_X4_CTX_BYTES); |
| if (state->ctx == NULL) { |
| exit(111); |
| } |
| keccak_x4_inc_reset((uint64_t *)state->ctx); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_absorb(OQS_SHA3_shake256_x4_inc_ctx *state, const uint8_t *in0, const uint8_t *in1, const uint8_t *in2, const uint8_t *in3, size_t inlen) { |
| keccak_x4_inc_absorb((uint64_t *)state->ctx, OQS_SHA3_SHAKE256_RATE, in0, in1, in2, in3, inlen); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_finalize(OQS_SHA3_shake256_x4_inc_ctx *state) { |
| keccak_x4_inc_finalize((uint64_t *)state->ctx, OQS_SHA3_SHAKE256_RATE, 0x1F); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_squeeze(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, size_t outlen, OQS_SHA3_shake256_x4_inc_ctx *state) { |
| keccak_x4_inc_squeeze(out0, out1, out2, out3, outlen, (uint64_t *)state->ctx, OQS_SHA3_SHAKE256_RATE); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_ctx_clone(OQS_SHA3_shake256_x4_inc_ctx *dest, const OQS_SHA3_shake256_x4_inc_ctx *src) { |
| memcpy(dest->ctx, src->ctx, KECCAK_X4_CTX_BYTES); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_ctx_release(OQS_SHA3_shake256_x4_inc_ctx *state) { |
| OQS_MEM_aligned_free(state->ctx); |
| } |
| |
| void OQS_SHA3_shake256_x4_inc_ctx_reset(OQS_SHA3_shake256_x4_inc_ctx *state) { |
| keccak_x4_inc_reset((uint64_t *)state->ctx); |
| } |