| //! Fixsliced implementations of AES-128, AES-192 and AES-256 (32-bit) |
| //! adapted from the C implementation |
| //! |
| //! All implementations are fully bitsliced and do not rely on any |
| //! Look-Up Table (LUT). |
| //! |
| //! See the paper at <https://eprint.iacr.org/2020/1123.pdf> for more details. |
| //! |
| //! # Author (original C code) |
| //! |
| //! Alexandre Adomnicai, Nanyang Technological University, Singapore |
| //! <[email protected]> |
| //! |
| //! Originally licensed MIT. Relicensed as Apache 2.0+MIT with permission. |
| |
| #![allow(clippy::unreadable_literal)] |
| |
| use crate::Block; |
| use cipher::{consts::U2, generic_array::GenericArray}; |
| |
| /// AES block batch size for this implementation |
| pub(crate) type FixsliceBlocks = U2; |
| |
| pub(crate) type BatchBlocks = GenericArray<Block, FixsliceBlocks>; |
| |
| /// AES-128 round keys |
| pub(crate) type FixsliceKeys128 = [u32; 88]; |
| |
| /// AES-192 round keys |
| pub(crate) type FixsliceKeys192 = [u32; 104]; |
| |
| /// AES-256 round keys |
| pub(crate) type FixsliceKeys256 = [u32; 120]; |
| |
| /// 256-bit internal state |
| pub(crate) type State = [u32; 8]; |
| |
| /// Fully bitsliced AES-128 key schedule to match the fully-fixsliced representation. |
| pub(crate) fn aes128_key_schedule(key: &[u8; 16]) -> FixsliceKeys128 { |
| let mut rkeys = [0u32; 88]; |
| |
| bitslice(&mut rkeys[..8], key, key); |
| |
| let mut rk_off = 0; |
| for rcon in 0..10 { |
| memshift32(&mut rkeys, rk_off); |
| rk_off += 8; |
| |
| sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
| sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
| |
| if rcon < 8 { |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon); |
| } else { |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 8); |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 7); |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 5); |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon - 4); |
| } |
| |
| xor_columns(&mut rkeys, rk_off, 8, ror_distance(1, 3)); |
| } |
| |
| // Adjust to match fixslicing format |
| #[cfg(aes_compact)] |
| { |
| for i in (8..88).step_by(16) { |
| inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
| } |
| } |
| #[cfg(not(aes_compact))] |
| { |
| for i in (8..72).step_by(32) { |
| inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
| inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]); |
| inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]); |
| } |
| inv_shift_rows_1(&mut rkeys[72..80]); |
| } |
| |
| // Account for NOTs removed from sub_bytes |
| for i in 1..11 { |
| sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
| } |
| |
| rkeys |
| } |
| |
| /// Fully bitsliced AES-192 key schedule to match the fully-fixsliced representation. |
| pub(crate) fn aes192_key_schedule(key: &[u8; 24]) -> FixsliceKeys192 { |
| let mut rkeys = [0u32; 104]; |
| let mut tmp = [0u32; 8]; |
| |
| bitslice(&mut rkeys[..8], &key[..16], &key[..16]); |
| bitslice(&mut tmp, &key[8..], &key[8..]); |
| |
| let mut rcon = 0; |
| let mut rk_off = 8; |
| |
| loop { |
| for i in 0..8 { |
| rkeys[rk_off + i] = |
| (0x0f0f0f0f & (tmp[i] >> 4)) | (0xf0f0f0f0 & (rkeys[(rk_off - 8) + i] << 4)); |
| } |
| |
| sub_bytes(&mut tmp); |
| sub_bytes_nots(&mut tmp); |
| |
| add_round_constant_bit(&mut tmp, rcon); |
| rcon += 1; |
| |
| for i in 0..8 { |
| let mut ti = rkeys[rk_off + i]; |
| ti ^= 0x30303030 & ror(tmp[i], ror_distance(1, 1)); |
| ti ^= 0xc0c0c0c0 & (ti << 2); |
| tmp[i] = ti; |
| } |
| rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp); |
| rk_off += 8; |
| |
| for i in 0..8 { |
| let ui = tmp[i]; |
| let mut ti = (0x0f0f0f0f & (rkeys[(rk_off - 16) + i] >> 4)) | (0xf0f0f0f0 & (ui << 4)); |
| ti ^= 0x03030303 & (ui >> 6); |
| tmp[i] = |
| ti ^ (0xfcfcfcfc & (ti << 2)) ^ (0xf0f0f0f0 & (ti << 4)) ^ (0xc0c0c0c0 & (ti << 6)); |
| } |
| rkeys[rk_off..(rk_off + 8)].copy_from_slice(&tmp); |
| rk_off += 8; |
| |
| sub_bytes(&mut tmp); |
| sub_bytes_nots(&mut tmp); |
| |
| add_round_constant_bit(&mut tmp, rcon); |
| rcon += 1; |
| |
| for i in 0..8 { |
| let mut ti = (0x0f0f0f0f & (rkeys[(rk_off - 16) + i] >> 4)) |
| | (0xf0f0f0f0 & (rkeys[(rk_off - 8) + i] << 4)); |
| ti ^= 0x03030303 & ror(tmp[i], ror_distance(1, 3)); |
| rkeys[rk_off + i] = |
| ti ^ (0xfcfcfcfc & (ti << 2)) ^ (0xf0f0f0f0 & (ti << 4)) ^ (0xc0c0c0c0 & (ti << 6)); |
| } |
| rk_off += 8; |
| |
| if rcon >= 8 { |
| break; |
| } |
| |
| for i in 0..8 { |
| let ui = rkeys[(rk_off - 8) + i]; |
| let mut ti = rkeys[(rk_off - 16) + i]; |
| ti ^= 0x30303030 & (ui >> 2); |
| ti ^= 0xc0c0c0c0 & (ti << 2); |
| tmp[i] = ti; |
| } |
| } |
| |
| // Adjust to match fixslicing format |
| #[cfg(aes_compact)] |
| { |
| for i in (8..104).step_by(16) { |
| inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
| } |
| } |
| #[cfg(not(aes_compact))] |
| { |
| for i in (0..96).step_by(32) { |
| inv_shift_rows_1(&mut rkeys[(i + 8)..(i + 16)]); |
| inv_shift_rows_2(&mut rkeys[(i + 16)..(i + 24)]); |
| inv_shift_rows_3(&mut rkeys[(i + 24)..(i + 32)]); |
| } |
| } |
| |
| // Account for NOTs removed from sub_bytes |
| for i in 1..13 { |
| sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
| } |
| |
| rkeys |
| } |
| |
| /// Fully bitsliced AES-256 key schedule to match the fully-fixsliced representation. |
| pub(crate) fn aes256_key_schedule(key: &[u8; 32]) -> FixsliceKeys256 { |
| let mut rkeys = [0u32; 120]; |
| |
| bitslice(&mut rkeys[..8], &key[..16], &key[..16]); |
| bitslice(&mut rkeys[8..16], &key[16..], &key[16..]); |
| |
| let mut rk_off = 8; |
| |
| let mut rcon = 0; |
| loop { |
| memshift32(&mut rkeys, rk_off); |
| rk_off += 8; |
| |
| sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
| sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
| |
| add_round_constant_bit(&mut rkeys[rk_off..(rk_off + 8)], rcon); |
| xor_columns(&mut rkeys, rk_off, 16, ror_distance(1, 3)); |
| rcon += 1; |
| |
| if rcon == 7 { |
| break; |
| } |
| |
| memshift32(&mut rkeys, rk_off); |
| rk_off += 8; |
| |
| sub_bytes(&mut rkeys[rk_off..(rk_off + 8)]); |
| sub_bytes_nots(&mut rkeys[rk_off..(rk_off + 8)]); |
| |
| xor_columns(&mut rkeys, rk_off, 16, ror_distance(0, 3)); |
| } |
| |
| // Adjust to match fixslicing format |
| #[cfg(aes_compact)] |
| { |
| for i in (8..120).step_by(16) { |
| inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
| } |
| } |
| #[cfg(not(aes_compact))] |
| { |
| for i in (8..104).step_by(32) { |
| inv_shift_rows_1(&mut rkeys[i..(i + 8)]); |
| inv_shift_rows_2(&mut rkeys[(i + 8)..(i + 16)]); |
| inv_shift_rows_3(&mut rkeys[(i + 16)..(i + 24)]); |
| } |
| inv_shift_rows_1(&mut rkeys[104..112]); |
| } |
| |
| // Account for NOTs removed from sub_bytes |
| for i in 1..15 { |
| sub_bytes_nots(&mut rkeys[(i * 8)..(i * 8 + 8)]); |
| } |
| |
| rkeys |
| } |
| |
| /// Fully-fixsliced AES-128 decryption (the InvShiftRows is completely omitted). |
| /// |
| /// Decrypts four blocks in-place and in parallel. |
| pub(crate) fn aes128_decrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[80..]); |
| inv_sub_bytes(&mut state); |
| |
| #[cfg(not(aes_compact))] |
| { |
| inv_shift_rows_2(&mut state); |
| } |
| |
| let mut rk_off = 72; |
| loop { |
| #[cfg(aes_compact)] |
| { |
| inv_shift_rows_2(&mut state); |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_1(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| if rk_off == 0 { |
| break; |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_0(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| #[cfg(not(aes_compact))] |
| { |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_3(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_2(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| } |
| } |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Fully-fixsliced AES-128 encryption (the ShiftRows is completely omitted). |
| /// |
| /// Encrypts four blocks in-place and in parallel. |
| pub(crate) fn aes128_encrypt(rkeys: &FixsliceKeys128, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| let mut rk_off = 8; |
| loop { |
| sub_bytes(&mut state); |
| mix_columns_1(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| #[cfg(aes_compact)] |
| { |
| shift_rows_2(&mut state); |
| } |
| |
| if rk_off == 80 { |
| break; |
| } |
| |
| #[cfg(not(aes_compact))] |
| { |
| sub_bytes(&mut state); |
| mix_columns_2(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| sub_bytes(&mut state); |
| mix_columns_3(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| sub_bytes(&mut state); |
| mix_columns_0(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| #[cfg(not(aes_compact))] |
| { |
| shift_rows_2(&mut state); |
| } |
| |
| sub_bytes(&mut state); |
| add_round_key(&mut state, &rkeys[80..]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Fully-fixsliced AES-192 decryption (the InvShiftRows is completely omitted). |
| /// |
| /// Decrypts four blocks in-place and in parallel. |
| pub(crate) fn aes192_decrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[96..]); |
| inv_sub_bytes(&mut state); |
| |
| let mut rk_off = 88; |
| loop { |
| #[cfg(aes_compact)] |
| { |
| inv_shift_rows_2(&mut state); |
| } |
| #[cfg(not(aes_compact))] |
| { |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_3(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_2(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_1(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| if rk_off == 0 { |
| break; |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_0(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| } |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Fully-fixsliced AES-192 encryption (the ShiftRows is completely omitted). |
| /// |
| /// Encrypts four blocks in-place and in parallel. |
| pub(crate) fn aes192_encrypt(rkeys: &FixsliceKeys192, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| let mut rk_off = 8; |
| loop { |
| sub_bytes(&mut state); |
| mix_columns_1(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| #[cfg(aes_compact)] |
| { |
| shift_rows_2(&mut state); |
| } |
| #[cfg(not(aes_compact))] |
| { |
| sub_bytes(&mut state); |
| mix_columns_2(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| sub_bytes(&mut state); |
| mix_columns_3(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| if rk_off == 96 { |
| break; |
| } |
| |
| sub_bytes(&mut state); |
| mix_columns_0(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| sub_bytes(&mut state); |
| add_round_key(&mut state, &rkeys[96..]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Fully-fixsliced AES-256 decryption (the InvShiftRows is completely omitted). |
| /// |
| /// Decrypts four blocks in-place and in parallel. |
| pub(crate) fn aes256_decrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[112..]); |
| inv_sub_bytes(&mut state); |
| |
| #[cfg(not(aes_compact))] |
| { |
| inv_shift_rows_2(&mut state); |
| } |
| |
| let mut rk_off = 104; |
| loop { |
| #[cfg(aes_compact)] |
| { |
| inv_shift_rows_2(&mut state); |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_1(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| if rk_off == 0 { |
| break; |
| } |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_0(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| #[cfg(not(aes_compact))] |
| { |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_3(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| inv_mix_columns_2(&mut state); |
| inv_sub_bytes(&mut state); |
| rk_off -= 8; |
| } |
| } |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Fully-fixsliced AES-256 encryption (the ShiftRows is completely omitted). |
| /// |
| /// Encrypts four blocks in-place and in parallel. |
| pub(crate) fn aes256_encrypt(rkeys: &FixsliceKeys256, blocks: &BatchBlocks) -> BatchBlocks { |
| let mut state = State::default(); |
| |
| bitslice(&mut state, &blocks[0], &blocks[1]); |
| |
| add_round_key(&mut state, &rkeys[..8]); |
| |
| let mut rk_off = 8; |
| loop { |
| sub_bytes(&mut state); |
| mix_columns_1(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| #[cfg(aes_compact)] |
| { |
| shift_rows_2(&mut state); |
| } |
| |
| if rk_off == 112 { |
| break; |
| } |
| |
| #[cfg(not(aes_compact))] |
| { |
| sub_bytes(&mut state); |
| mix_columns_2(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| |
| sub_bytes(&mut state); |
| mix_columns_3(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| sub_bytes(&mut state); |
| mix_columns_0(&mut state); |
| add_round_key(&mut state, &rkeys[rk_off..(rk_off + 8)]); |
| rk_off += 8; |
| } |
| |
| #[cfg(not(aes_compact))] |
| { |
| shift_rows_2(&mut state); |
| } |
| |
| sub_bytes(&mut state); |
| add_round_key(&mut state, &rkeys[112..]); |
| |
| inv_bitslice(&state) |
| } |
| |
| /// Note that the 4 bitwise NOT (^= 0xffffffff) are accounted for here so that it is a true |
| /// inverse of 'sub_bytes'. |
| fn inv_sub_bytes(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| |
| // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler |
| // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4) |
| |
| let u7 = state[0]; |
| let u6 = state[1]; |
| let u5 = state[2]; |
| let u4 = state[3]; |
| let u3 = state[4]; |
| let u2 = state[5]; |
| let u1 = state[6]; |
| let u0 = state[7]; |
| |
| let t23 = u0 ^ u3; |
| let t8 = u1 ^ t23; |
| let m2 = t23 & t8; |
| let t4 = u4 ^ t8; |
| let t22 = u1 ^ u3; |
| let t2 = u0 ^ u1; |
| let t1 = u3 ^ u4; |
| // t23 -> stack |
| let t9 = u7 ^ t1; |
| // t8 -> stack |
| let m7 = t22 & t9; |
| // t9 -> stack |
| let t24 = u4 ^ u7; |
| // m7 -> stack |
| let t10 = t2 ^ t24; |
| // u4 -> stack |
| let m14 = t2 & t10; |
| let r5 = u6 ^ u7; |
| // m2 -> stack |
| let t3 = t1 ^ r5; |
| // t2 -> stack |
| let t13 = t2 ^ r5; |
| let t19 = t22 ^ r5; |
| // t3 -> stack |
| let t17 = u2 ^ t19; |
| // t4 -> stack |
| let t25 = u2 ^ t1; |
| let r13 = u1 ^ u6; |
| // t25 -> stack |
| let t20 = t24 ^ r13; |
| // t17 -> stack |
| let m9 = t20 & t17; |
| // t20 -> stack |
| let r17 = u2 ^ u5; |
| // t22 -> stack |
| let t6 = t22 ^ r17; |
| // t13 -> stack |
| let m1 = t13 & t6; |
| let y5 = u0 ^ r17; |
| let m4 = t19 & y5; |
| let m5 = m4 ^ m1; |
| let m17 = m5 ^ t24; |
| let r18 = u5 ^ u6; |
| let t27 = t1 ^ r18; |
| let t15 = t10 ^ t27; |
| // t6 -> stack |
| let m11 = t1 & t15; |
| let m15 = m14 ^ m11; |
| let m21 = m17 ^ m15; |
| // t1 -> stack |
| // t4 <- stack |
| let m12 = t4 & t27; |
| let m13 = m12 ^ m11; |
| let t14 = t10 ^ r18; |
| let m3 = t14 ^ m1; |
| // m2 <- stack |
| let m16 = m3 ^ m2; |
| let m20 = m16 ^ m13; |
| // u4 <- stack |
| let r19 = u2 ^ u4; |
| let t16 = r13 ^ r19; |
| // t3 <- stack |
| let t26 = t3 ^ t16; |
| let m6 = t3 & t16; |
| let m8 = t26 ^ m6; |
| // t10 -> stack |
| // m7 <- stack |
| let m18 = m8 ^ m7; |
| let m22 = m18 ^ m13; |
| let m25 = m22 & m20; |
| let m26 = m21 ^ m25; |
| let m10 = m9 ^ m6; |
| let m19 = m10 ^ m15; |
| // t25 <- stack |
| let m23 = m19 ^ t25; |
| let m28 = m23 ^ m25; |
| let m24 = m22 ^ m23; |
| let m30 = m26 & m24; |
| let m39 = m23 ^ m30; |
| let m48 = m39 & y5; |
| let m57 = m39 & t19; |
| // m48 -> stack |
| let m36 = m24 ^ m25; |
| let m31 = m20 & m23; |
| let m27 = m20 ^ m21; |
| let m32 = m27 & m31; |
| let m29 = m28 & m27; |
| let m37 = m21 ^ m29; |
| // m39 -> stack |
| let m42 = m37 ^ m39; |
| let m52 = m42 & t15; |
| // t27 -> stack |
| // t1 <- stack |
| let m61 = m42 & t1; |
| let p0 = m52 ^ m61; |
| let p16 = m57 ^ m61; |
| // m57 -> stack |
| // t20 <- stack |
| let m60 = m37 & t20; |
| // p16 -> stack |
| // t17 <- stack |
| let m51 = m37 & t17; |
| let m33 = m27 ^ m25; |
| let m38 = m32 ^ m33; |
| let m43 = m37 ^ m38; |
| let m49 = m43 & t16; |
| let p6 = m49 ^ m60; |
| let p13 = m49 ^ m51; |
| let m58 = m43 & t3; |
| // t9 <- stack |
| let m50 = m38 & t9; |
| // t22 <- stack |
| let m59 = m38 & t22; |
| // p6 -> stack |
| let p1 = m58 ^ m59; |
| let p7 = p0 ^ p1; |
| let m34 = m21 & m22; |
| let m35 = m24 & m34; |
| let m40 = m35 ^ m36; |
| let m41 = m38 ^ m40; |
| let m45 = m42 ^ m41; |
| // t27 <- stack |
| let m53 = m45 & t27; |
| let p8 = m50 ^ m53; |
| let p23 = p7 ^ p8; |
| // t4 <- stack |
| let m62 = m45 & t4; |
| let p14 = m49 ^ m62; |
| let s6 = p14 ^ p23; |
| // t10 <- stack |
| let m54 = m41 & t10; |
| let p2 = m54 ^ m62; |
| let p22 = p2 ^ p7; |
| let s0 = p13 ^ p22; |
| let p17 = m58 ^ p2; |
| let p15 = m54 ^ m59; |
| // t2 <- stack |
| let m63 = m41 & t2; |
| // m39 <- stack |
| let m44 = m39 ^ m40; |
| // p17 -> stack |
| // t6 <- stack |
| let m46 = m44 & t6; |
| let p5 = m46 ^ m51; |
| // p23 -> stack |
| let p18 = m63 ^ p5; |
| let p24 = p5 ^ p7; |
| // m48 <- stack |
| let p12 = m46 ^ m48; |
| let s3 = p12 ^ p22; |
| // t13 <- stack |
| let m55 = m44 & t13; |
| let p9 = m55 ^ m63; |
| // p16 <- stack |
| let s7 = p9 ^ p16; |
| // t8 <- stack |
| let m47 = m40 & t8; |
| let p3 = m47 ^ m50; |
| let p19 = p2 ^ p3; |
| let s5 = p19 ^ p24; |
| let p11 = p0 ^ p3; |
| let p26 = p9 ^ p11; |
| // t23 <- stack |
| let m56 = m40 & t23; |
| let p4 = m48 ^ m56; |
| // p6 <- stack |
| let p20 = p4 ^ p6; |
| let p29 = p15 ^ p20; |
| let s1 = p26 ^ p29; |
| // m57 <- stack |
| let p10 = m57 ^ p4; |
| let p27 = p10 ^ p18; |
| // p23 <- stack |
| let s4 = p23 ^ p27; |
| let p25 = p6 ^ p10; |
| let p28 = p11 ^ p25; |
| // p17 <- stack |
| let s2 = p17 ^ p28; |
| |
| state[0] = s7; |
| state[1] = s6; |
| state[2] = s5; |
| state[3] = s4; |
| state[4] = s3; |
| state[5] = s2; |
| state[6] = s1; |
| state[7] = s0; |
| } |
| |
| /// Bitsliced implementation of the AES Sbox based on Boyar, Peralta and Calik. |
| /// |
| /// See: <http://www.cs.yale.edu/homes/peralta/CircuitStuff/SLP_AES_113.txt> |
| /// |
| /// Note that the 4 bitwise NOT (^= 0xffffffff) are moved to the key schedule. |
| fn sub_bytes(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| |
| // Scheduled using https://github.com/Ko-/aes-armcortexm/tree/public/scheduler |
| // Inline "stack" comments reflect suggested stores and loads (ARM Cortex-M3 and M4) |
| |
| let u7 = state[0]; |
| let u6 = state[1]; |
| let u5 = state[2]; |
| let u4 = state[3]; |
| let u3 = state[4]; |
| let u2 = state[5]; |
| let u1 = state[6]; |
| let u0 = state[7]; |
| |
| let y14 = u3 ^ u5; |
| let y13 = u0 ^ u6; |
| let y12 = y13 ^ y14; |
| let t1 = u4 ^ y12; |
| let y15 = t1 ^ u5; |
| let t2 = y12 & y15; |
| let y6 = y15 ^ u7; |
| let y20 = t1 ^ u1; |
| // y12 -> stack |
| let y9 = u0 ^ u3; |
| // y20 -> stack |
| let y11 = y20 ^ y9; |
| // y9 -> stack |
| let t12 = y9 & y11; |
| // y6 -> stack |
| let y7 = u7 ^ y11; |
| let y8 = u0 ^ u5; |
| let t0 = u1 ^ u2; |
| let y10 = y15 ^ t0; |
| // y15 -> stack |
| let y17 = y10 ^ y11; |
| // y14 -> stack |
| let t13 = y14 & y17; |
| let t14 = t13 ^ t12; |
| // y17 -> stack |
| let y19 = y10 ^ y8; |
| // y10 -> stack |
| let t15 = y8 & y10; |
| let t16 = t15 ^ t12; |
| let y16 = t0 ^ y11; |
| // y11 -> stack |
| let y21 = y13 ^ y16; |
| // y13 -> stack |
| let t7 = y13 & y16; |
| // y16 -> stack |
| let y18 = u0 ^ y16; |
| let y1 = t0 ^ u7; |
| let y4 = y1 ^ u3; |
| // u7 -> stack |
| let t5 = y4 & u7; |
| let t6 = t5 ^ t2; |
| let t18 = t6 ^ t16; |
| let t22 = t18 ^ y19; |
| let y2 = y1 ^ u0; |
| let t10 = y2 & y7; |
| let t11 = t10 ^ t7; |
| let t20 = t11 ^ t16; |
| let t24 = t20 ^ y18; |
| let y5 = y1 ^ u6; |
| let t8 = y5 & y1; |
| let t9 = t8 ^ t7; |
| let t19 = t9 ^ t14; |
| let t23 = t19 ^ y21; |
| let y3 = y5 ^ y8; |
| // y6 <- stack |
| let t3 = y3 & y6; |
| let t4 = t3 ^ t2; |
| // y20 <- stack |
| let t17 = t4 ^ y20; |
| let t21 = t17 ^ t14; |
| let t26 = t21 & t23; |
| let t27 = t24 ^ t26; |
| let t31 = t22 ^ t26; |
| let t25 = t21 ^ t22; |
| // y4 -> stack |
| let t28 = t25 & t27; |
| let t29 = t28 ^ t22; |
| let z14 = t29 & y2; |
| let z5 = t29 & y7; |
| let t30 = t23 ^ t24; |
| let t32 = t31 & t30; |
| let t33 = t32 ^ t24; |
| let t35 = t27 ^ t33; |
| let t36 = t24 & t35; |
| let t38 = t27 ^ t36; |
| let t39 = t29 & t38; |
| let t40 = t25 ^ t39; |
| let t43 = t29 ^ t40; |
| // y16 <- stack |
| let z3 = t43 & y16; |
| let tc12 = z3 ^ z5; |
| // tc12 -> stack |
| // y13 <- stack |
| let z12 = t43 & y13; |
| let z13 = t40 & y5; |
| let z4 = t40 & y1; |
| let tc6 = z3 ^ z4; |
| let t34 = t23 ^ t33; |
| let t37 = t36 ^ t34; |
| let t41 = t40 ^ t37; |
| // y10 <- stack |
| let z8 = t41 & y10; |
| let z17 = t41 & y8; |
| let t44 = t33 ^ t37; |
| // y15 <- stack |
| let z0 = t44 & y15; |
| // z17 -> stack |
| // y12 <- stack |
| let z9 = t44 & y12; |
| let z10 = t37 & y3; |
| let z1 = t37 & y6; |
| let tc5 = z1 ^ z0; |
| let tc11 = tc6 ^ tc5; |
| // y4 <- stack |
| let z11 = t33 & y4; |
| let t42 = t29 ^ t33; |
| let t45 = t42 ^ t41; |
| // y17 <- stack |
| let z7 = t45 & y17; |
| let tc8 = z7 ^ tc6; |
| // y14 <- stack |
| let z16 = t45 & y14; |
| // y11 <- stack |
| let z6 = t42 & y11; |
| let tc16 = z6 ^ tc8; |
| // z14 -> stack |
| // y9 <- stack |
| let z15 = t42 & y9; |
| let tc20 = z15 ^ tc16; |
| let tc1 = z15 ^ z16; |
| let tc2 = z10 ^ tc1; |
| let tc21 = tc2 ^ z11; |
| let tc3 = z9 ^ tc2; |
| let s0 = tc3 ^ tc16; |
| let s3 = tc3 ^ tc11; |
| let s1 = s3 ^ tc16; |
| let tc13 = z13 ^ tc1; |
| // u7 <- stack |
| let z2 = t33 & u7; |
| let tc4 = z0 ^ z2; |
| let tc7 = z12 ^ tc4; |
| let tc9 = z8 ^ tc7; |
| let tc10 = tc8 ^ tc9; |
| // z14 <- stack |
| let tc17 = z14 ^ tc10; |
| let s5 = tc21 ^ tc17; |
| let tc26 = tc17 ^ tc20; |
| // z17 <- stack |
| let s2 = tc26 ^ z17; |
| // tc12 <- stack |
| let tc14 = tc4 ^ tc12; |
| let tc18 = tc13 ^ tc14; |
| let s6 = tc10 ^ tc18; |
| let s7 = z12 ^ tc18; |
| let s4 = tc14 ^ s3; |
| |
| state[0] = s7; |
| state[1] = s6; |
| state[2] = s5; |
| state[3] = s4; |
| state[4] = s3; |
| state[5] = s2; |
| state[6] = s1; |
| state[7] = s0; |
| } |
| |
| /// NOT operations that are omitted in S-box |
| #[inline] |
| fn sub_bytes_nots(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| state[0] ^= 0xffffffff; |
| state[1] ^= 0xffffffff; |
| state[5] ^= 0xffffffff; |
| state[6] ^= 0xffffffff; |
| } |
| |
| /// Computation of the MixColumns transformation in the fixsliced representation, with different |
| /// rotations used according to the round number mod 4. |
| /// |
| /// Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm. |
| macro_rules! define_mix_columns { |
| ( |
| $name:ident, |
| $name_inv:ident, |
| $first_rotate:path, |
| $second_rotate:path |
| ) => { |
| #[rustfmt::skip] |
| fn $name(state: &mut State) { |
| let (a0, a1, a2, a3, a4, a5, a6, a7) = ( |
| state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7] |
| ); |
| let (b0, b1, b2, b3, b4, b5, b6, b7) = ( |
| $first_rotate(a0), |
| $first_rotate(a1), |
| $first_rotate(a2), |
| $first_rotate(a3), |
| $first_rotate(a4), |
| $first_rotate(a5), |
| $first_rotate(a6), |
| $first_rotate(a7), |
| ); |
| let (c0, c1, c2, c3, c4, c5, c6, c7) = ( |
| a0 ^ b0, |
| a1 ^ b1, |
| a2 ^ b2, |
| a3 ^ b3, |
| a4 ^ b4, |
| a5 ^ b5, |
| a6 ^ b6, |
| a7 ^ b7, |
| ); |
| state[0] = b0 ^ c7 ^ $second_rotate(c0); |
| state[1] = b1 ^ c0 ^ c7 ^ $second_rotate(c1); |
| state[2] = b2 ^ c1 ^ $second_rotate(c2); |
| state[3] = b3 ^ c2 ^ c7 ^ $second_rotate(c3); |
| state[4] = b4 ^ c3 ^ c7 ^ $second_rotate(c4); |
| state[5] = b5 ^ c4 ^ $second_rotate(c5); |
| state[6] = b6 ^ c5 ^ $second_rotate(c6); |
| state[7] = b7 ^ c6 ^ $second_rotate(c7); |
| } |
| |
| #[rustfmt::skip] |
| fn $name_inv(state: &mut State) { |
| let (a0, a1, a2, a3, a4, a5, a6, a7) = ( |
| state[0], state[1], state[2], state[3], state[4], state[5], state[6], state[7] |
| ); |
| let (b0, b1, b2, b3, b4, b5, b6, b7) = ( |
| $first_rotate(a0), |
| $first_rotate(a1), |
| $first_rotate(a2), |
| $first_rotate(a3), |
| $first_rotate(a4), |
| $first_rotate(a5), |
| $first_rotate(a6), |
| $first_rotate(a7), |
| ); |
| let (c0, c1, c2, c3, c4, c5, c6, c7) = ( |
| a0 ^ b0, |
| a1 ^ b1, |
| a2 ^ b2, |
| a3 ^ b3, |
| a4 ^ b4, |
| a5 ^ b5, |
| a6 ^ b6, |
| a7 ^ b7, |
| ); |
| let (d0, d1, d2, d3, d4, d5, d6, d7) = ( |
| a0 ^ c7, |
| a1 ^ c0 ^ c7, |
| a2 ^ c1, |
| a3 ^ c2 ^ c7, |
| a4 ^ c3 ^ c7, |
| a5 ^ c4, |
| a6 ^ c5, |
| a7 ^ c6, |
| ); |
| let (e0, e1, e2, e3, e4, e5, e6, e7) = ( |
| c0 ^ d6, |
| c1 ^ d6 ^ d7, |
| c2 ^ d0 ^ d7, |
| c3 ^ d1 ^ d6, |
| c4 ^ d2 ^ d6 ^ d7, |
| c5 ^ d3 ^ d7, |
| c6 ^ d4, |
| c7 ^ d5, |
| ); |
| state[0] = d0 ^ e0 ^ $second_rotate(e0); |
| state[1] = d1 ^ e1 ^ $second_rotate(e1); |
| state[2] = d2 ^ e2 ^ $second_rotate(e2); |
| state[3] = d3 ^ e3 ^ $second_rotate(e3); |
| state[4] = d4 ^ e4 ^ $second_rotate(e4); |
| state[5] = d5 ^ e5 ^ $second_rotate(e5); |
| state[6] = d6 ^ e6 ^ $second_rotate(e6); |
| state[7] = d7 ^ e7 ^ $second_rotate(e7); |
| } |
| } |
| } |
| |
| define_mix_columns!( |
| mix_columns_0, |
| inv_mix_columns_0, |
| rotate_rows_1, |
| rotate_rows_2 |
| ); |
| |
| define_mix_columns!( |
| mix_columns_1, |
| inv_mix_columns_1, |
| rotate_rows_and_columns_1_1, |
| rotate_rows_and_columns_2_2 |
| ); |
| |
| #[cfg(not(aes_compact))] |
| define_mix_columns!( |
| mix_columns_2, |
| inv_mix_columns_2, |
| rotate_rows_and_columns_1_2, |
| rotate_rows_2 |
| ); |
| |
| #[cfg(not(aes_compact))] |
| define_mix_columns!( |
| mix_columns_3, |
| inv_mix_columns_3, |
| rotate_rows_and_columns_1_3, |
| rotate_rows_and_columns_2_2 |
| ); |
| |
| #[inline] |
| fn delta_swap_1(a: &mut u32, shift: u32, mask: u32) { |
| let t = (*a ^ ((*a) >> shift)) & mask; |
| *a ^= t ^ (t << shift); |
| } |
| |
| #[inline] |
| fn delta_swap_2(a: &mut u32, b: &mut u32, shift: u32, mask: u32) { |
| let t = (*a ^ ((*b) >> shift)) & mask; |
| *a ^= t; |
| *b ^= t << shift; |
| } |
| |
| /// Applies ShiftRows once on an AES state (or key). |
| #[cfg(any(not(aes_compact), feature = "hazmat"))] |
| #[inline] |
| fn shift_rows_1(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| for x in state.iter_mut() { |
| delta_swap_1(x, 4, 0x0c0f0300); |
| delta_swap_1(x, 2, 0x33003300); |
| } |
| } |
| |
| /// Applies ShiftRows twice on an AES state (or key). |
| #[inline] |
| fn shift_rows_2(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| for x in state.iter_mut() { |
| delta_swap_1(x, 4, 0x0f000f00); |
| } |
| } |
| |
| /// Applies ShiftRows three times on an AES state (or key). |
| #[inline] |
| fn shift_rows_3(state: &mut [u32]) { |
| debug_assert_eq!(state.len(), 8); |
| for x in state.iter_mut() { |
| delta_swap_1(x, 4, 0x030f0c00); |
| delta_swap_1(x, 2, 0x33003300); |
| } |
| } |
| |
| #[inline(always)] |
| fn inv_shift_rows_1(state: &mut [u32]) { |
| shift_rows_3(state); |
| } |
| |
| #[inline(always)] |
| fn inv_shift_rows_2(state: &mut [u32]) { |
| shift_rows_2(state); |
| } |
| |
| #[cfg(not(aes_compact))] |
| #[inline(always)] |
| fn inv_shift_rows_3(state: &mut [u32]) { |
| shift_rows_1(state); |
| } |
| |
| /// XOR the columns after the S-box during the key schedule round function. |
| /// |
| /// The `idx_xor` parameter refers to the index of the previous round key that is |
| /// involved in the XOR computation (should be 8 and 16 for AES-128 and AES-256, |
| /// respectively). |
| /// |
| /// The `idx_ror` parameter refers to the rotation value, which varies between the |
| /// different key schedules. |
| fn xor_columns(rkeys: &mut [u32], offset: usize, idx_xor: usize, idx_ror: u32) { |
| for i in 0..8 { |
| let off_i = offset + i; |
| let rk = rkeys[off_i - idx_xor] ^ (0x03030303 & ror(rkeys[off_i], idx_ror)); |
| rkeys[off_i] = |
| rk ^ (0xfcfcfcfc & (rk << 2)) ^ (0xf0f0f0f0 & (rk << 4)) ^ (0xc0c0c0c0 & (rk << 6)); |
| } |
| } |
| |
| /// Bitslice two 128-bit input blocks input0, input1 into a 256-bit internal state. |
| fn bitslice(output: &mut [u32], input0: &[u8], input1: &[u8]) { |
| debug_assert_eq!(output.len(), 8); |
| debug_assert_eq!(input0.len(), 16); |
| debug_assert_eq!(input1.len(), 16); |
| |
| // Bitslicing is a bit index manipulation. 256 bits of data means each bit is positioned at an |
| // 8-bit index. AES data is 2 blocks, each one a 4x4 column-major matrix of bytes, so the |
| // index is initially ([b]lock, [c]olumn, [r]ow, [p]osition): |
| // b0 c1 c0 r1 r0 p2 p1 p0 |
| // |
| // The desired bitsliced data groups first by bit position, then row, column, block: |
| // p2 p1 p0 r1 r0 c1 c0 b0 |
| |
| // Interleave the columns on input (note the order of input) |
| // b0 c1 c0 __ __ __ __ __ => c1 c0 b0 __ __ __ __ __ |
| let mut t0 = u32::from_le_bytes(input0[0x00..0x04].try_into().unwrap()); |
| let mut t2 = u32::from_le_bytes(input0[0x04..0x08].try_into().unwrap()); |
| let mut t4 = u32::from_le_bytes(input0[0x08..0x0c].try_into().unwrap()); |
| let mut t6 = u32::from_le_bytes(input0[0x0c..0x10].try_into().unwrap()); |
| let mut t1 = u32::from_le_bytes(input1[0x00..0x04].try_into().unwrap()); |
| let mut t3 = u32::from_le_bytes(input1[0x04..0x08].try_into().unwrap()); |
| let mut t5 = u32::from_le_bytes(input1[0x08..0x0c].try_into().unwrap()); |
| let mut t7 = u32::from_le_bytes(input1[0x0c..0x10].try_into().unwrap()); |
| |
| // Bit Index Swap 5 <-> 0: |
| // __ __ b0 __ __ __ __ p0 => __ __ p0 __ __ __ __ b0 |
| let m0 = 0x55555555; |
| delta_swap_2(&mut t1, &mut t0, 1, m0); |
| delta_swap_2(&mut t3, &mut t2, 1, m0); |
| delta_swap_2(&mut t5, &mut t4, 1, m0); |
| delta_swap_2(&mut t7, &mut t6, 1, m0); |
| |
| // Bit Index Swap 6 <-> 1: |
| // __ c0 __ __ __ __ p1 __ => __ p1 __ __ __ __ c0 __ |
| let m1 = 0x33333333; |
| delta_swap_2(&mut t2, &mut t0, 2, m1); |
| delta_swap_2(&mut t3, &mut t1, 2, m1); |
| delta_swap_2(&mut t6, &mut t4, 2, m1); |
| delta_swap_2(&mut t7, &mut t5, 2, m1); |
| |
| // Bit Index Swap 7 <-> 2: |
| // c1 __ __ __ __ p2 __ __ => p2 __ __ __ __ c1 __ __ |
| let m2 = 0x0f0f0f0f; |
| delta_swap_2(&mut t4, &mut t0, 4, m2); |
| delta_swap_2(&mut t5, &mut t1, 4, m2); |
| delta_swap_2(&mut t6, &mut t2, 4, m2); |
| delta_swap_2(&mut t7, &mut t3, 4, m2); |
| |
| // Final bitsliced bit index, as desired: |
| // p2 p1 p0 r1 r0 c1 c0 b0 |
| output[0] = t0; |
| output[1] = t1; |
| output[2] = t2; |
| output[3] = t3; |
| output[4] = t4; |
| output[5] = t5; |
| output[6] = t6; |
| output[7] = t7; |
| } |
| |
| /// Un-bitslice a 256-bit internal state into two 128-bit blocks of output. |
| fn inv_bitslice(input: &[u32]) -> BatchBlocks { |
| debug_assert_eq!(input.len(), 8); |
| |
| // Unbitslicing is a bit index manipulation. 256 bits of data means each bit is positioned at |
| // an 8-bit index. AES data is 2 blocks, each one a 4x4 column-major matrix of bytes, so the |
| // desired index for the output is ([b]lock, [c]olumn, [r]ow, [p]osition): |
| // b0 c1 c0 r1 r0 p2 p1 p0 |
| // |
| // The initially bitsliced data groups first by bit position, then row, column, block: |
| // p2 p1 p0 r1 r0 c1 c0 b0 |
| |
| let mut t0 = input[0]; |
| let mut t1 = input[1]; |
| let mut t2 = input[2]; |
| let mut t3 = input[3]; |
| let mut t4 = input[4]; |
| let mut t5 = input[5]; |
| let mut t6 = input[6]; |
| let mut t7 = input[7]; |
| |
| // TODO: these bit index swaps are identical to those in 'packing' |
| |
| // Bit Index Swap 5 <-> 0: |
| // __ __ p0 __ __ __ __ b0 => __ __ b0 __ __ __ __ p0 |
| let m0 = 0x55555555; |
| delta_swap_2(&mut t1, &mut t0, 1, m0); |
| delta_swap_2(&mut t3, &mut t2, 1, m0); |
| delta_swap_2(&mut t5, &mut t4, 1, m0); |
| delta_swap_2(&mut t7, &mut t6, 1, m0); |
| |
| // Bit Index Swap 6 <-> 1: |
| // __ p1 __ __ __ __ c0 __ => __ c0 __ __ __ __ p1 __ |
| let m1 = 0x33333333; |
| delta_swap_2(&mut t2, &mut t0, 2, m1); |
| delta_swap_2(&mut t3, &mut t1, 2, m1); |
| delta_swap_2(&mut t6, &mut t4, 2, m1); |
| delta_swap_2(&mut t7, &mut t5, 2, m1); |
| |
| // Bit Index Swap 7 <-> 2: |
| // p2 __ __ __ __ c1 __ __ => c1 __ __ __ __ p2 __ __ |
| let m2 = 0x0f0f0f0f; |
| delta_swap_2(&mut t4, &mut t0, 4, m2); |
| delta_swap_2(&mut t5, &mut t1, 4, m2); |
| delta_swap_2(&mut t6, &mut t2, 4, m2); |
| delta_swap_2(&mut t7, &mut t3, 4, m2); |
| |
| let mut output = BatchBlocks::default(); |
| // De-interleave the columns on output (note the order of output) |
| // c1 c0 b0 __ __ __ __ __ => b0 c1 c0 __ __ __ __ __ |
| output[0][0x00..0x04].copy_from_slice(&t0.to_le_bytes()); |
| output[0][0x04..0x08].copy_from_slice(&t2.to_le_bytes()); |
| output[0][0x08..0x0c].copy_from_slice(&t4.to_le_bytes()); |
| output[0][0x0c..0x10].copy_from_slice(&t6.to_le_bytes()); |
| output[1][0x00..0x04].copy_from_slice(&t1.to_le_bytes()); |
| output[1][0x04..0x08].copy_from_slice(&t3.to_le_bytes()); |
| output[1][0x08..0x0c].copy_from_slice(&t5.to_le_bytes()); |
| output[1][0x0c..0x10].copy_from_slice(&t7.to_le_bytes()); |
| |
| // Final AES bit index, as desired: |
| // b0 c1 c0 r1 r0 p2 p1 p0 |
| output |
| } |
| |
| /// Copy 32-bytes within the provided slice to an 8-byte offset |
| fn memshift32(buffer: &mut [u32], src_offset: usize) { |
| debug_assert_eq!(src_offset % 8, 0); |
| |
| let dst_offset = src_offset + 8; |
| debug_assert!(dst_offset + 8 <= buffer.len()); |
| |
| for i in (0..8).rev() { |
| buffer[dst_offset + i] = buffer[src_offset + i]; |
| } |
| } |
| |
| /// XOR the round key to the internal state. The round keys are expected to be |
| /// pre-computed and to be packed in the fixsliced representation. |
| #[inline] |
| fn add_round_key(state: &mut State, rkey: &[u32]) { |
| debug_assert_eq!(rkey.len(), 8); |
| for (a, b) in state.iter_mut().zip(rkey) { |
| *a ^= b; |
| } |
| } |
| |
| #[inline(always)] |
| fn add_round_constant_bit(state: &mut [u32], bit: usize) { |
| state[bit] ^= 0x0000c000; |
| } |
| |
| #[inline(always)] |
| fn ror(x: u32, y: u32) -> u32 { |
| x.rotate_right(y) |
| } |
| |
| #[inline(always)] |
| fn ror_distance(rows: u32, cols: u32) -> u32 { |
| (rows << 3) + (cols << 1) |
| } |
| |
| #[inline(always)] |
| fn rotate_rows_1(x: u32) -> u32 { |
| ror(x, ror_distance(1, 0)) |
| } |
| |
| #[inline(always)] |
| fn rotate_rows_2(x: u32) -> u32 { |
| ror(x, ror_distance(2, 0)) |
| } |
| |
| #[inline(always)] |
| #[rustfmt::skip] |
| fn rotate_rows_and_columns_1_1(x: u32) -> u32 { |
| (ror(x, ror_distance(1, 1)) & 0x3f3f3f3f) | |
| (ror(x, ror_distance(0, 1)) & 0xc0c0c0c0) |
| } |
| |
| #[cfg(not(aes_compact))] |
| #[inline(always)] |
| #[rustfmt::skip] |
| fn rotate_rows_and_columns_1_2(x: u32) -> u32 { |
| (ror(x, ror_distance(1, 2)) & 0x0f0f0f0f) | |
| (ror(x, ror_distance(0, 2)) & 0xf0f0f0f0) |
| } |
| |
| #[cfg(not(aes_compact))] |
| #[inline(always)] |
| #[rustfmt::skip] |
| fn rotate_rows_and_columns_1_3(x: u32) -> u32 { |
| (ror(x, ror_distance(1, 3)) & 0x03030303) | |
| (ror(x, ror_distance(0, 3)) & 0xfcfcfcfc) |
| } |
| |
| #[inline(always)] |
| #[rustfmt::skip] |
| fn rotate_rows_and_columns_2_2(x: u32) -> u32 { |
| (ror(x, ror_distance(2, 2)) & 0x0f0f0f0f) | |
| (ror(x, ror_distance(1, 2)) & 0xf0f0f0f0) |
| } |
| |
| /// Low-level "hazmat" AES functions. |
| /// |
| /// Note: this isn't actually used in the `Aes128`/`Aes192`/`Aes256` |
| /// implementations in this crate, but instead provides raw access to |
| /// the AES round function gated under the `hazmat` crate feature. |
| #[cfg(feature = "hazmat")] |
| pub(crate) mod hazmat { |
| use super::{ |
| bitslice, inv_bitslice, inv_mix_columns_0, inv_shift_rows_1, inv_sub_bytes, mix_columns_0, |
| shift_rows_1, sub_bytes, sub_bytes_nots, State, |
| }; |
| use crate::{Block, Block8}; |
| |
| /// XOR the `src` block into the `dst` block in-place. |
| fn xor_in_place(dst: &mut Block, src: &Block) { |
| for (a, b) in dst.iter_mut().zip(src.as_slice()) { |
| *a ^= *b; |
| } |
| } |
| |
| /// Perform a bitslice operation, loading a single block. |
| fn bitslice_block(block: &Block) -> State { |
| let mut state = State::default(); |
| bitslice(&mut state, block, block); |
| state |
| } |
| |
| /// Perform an inverse bitslice operation, extracting a single block. |
| fn inv_bitslice_block(block: &mut Block, state: &State) { |
| let out = inv_bitslice(state); |
| block.copy_from_slice(&out[0]); |
| } |
| |
| /// AES cipher (encrypt) round function. |
| #[inline] |
| pub(crate) fn cipher_round(block: &mut Block, round_key: &Block) { |
| let mut state = bitslice_block(block); |
| sub_bytes(&mut state); |
| sub_bytes_nots(&mut state); |
| shift_rows_1(&mut state); |
| mix_columns_0(&mut state); |
| inv_bitslice_block(block, &state); |
| xor_in_place(block, round_key); |
| } |
| |
| /// AES cipher (encrypt) round function: parallel version. |
| #[inline] |
| pub(crate) fn cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { |
| for (chunk, keys) in blocks.chunks_exact_mut(2).zip(round_keys.chunks_exact(2)) { |
| let mut state = State::default(); |
| bitslice(&mut state, &chunk[0], &chunk[1]); |
| sub_bytes(&mut state); |
| sub_bytes_nots(&mut state); |
| shift_rows_1(&mut state); |
| mix_columns_0(&mut state); |
| let res = inv_bitslice(&state); |
| |
| for i in 0..2 { |
| chunk[i] = res[i]; |
| xor_in_place(&mut chunk[i], &keys[i]); |
| } |
| } |
| } |
| |
| /// AES cipher (encrypt) round function. |
| #[inline] |
| pub(crate) fn equiv_inv_cipher_round(block: &mut Block, round_key: &Block) { |
| let mut state = bitslice_block(block); |
| sub_bytes_nots(&mut state); |
| inv_sub_bytes(&mut state); |
| inv_shift_rows_1(&mut state); |
| inv_mix_columns_0(&mut state); |
| inv_bitslice_block(block, &state); |
| xor_in_place(block, round_key); |
| } |
| |
| /// AES cipher (encrypt) round function: parallel version. |
| #[inline] |
| pub(crate) fn equiv_inv_cipher_round_par(blocks: &mut Block8, round_keys: &Block8) { |
| for (chunk, keys) in blocks.chunks_exact_mut(2).zip(round_keys.chunks_exact(2)) { |
| let mut state = State::default(); |
| bitslice(&mut state, &chunk[0], &chunk[1]); |
| sub_bytes_nots(&mut state); |
| inv_sub_bytes(&mut state); |
| inv_shift_rows_1(&mut state); |
| inv_mix_columns_0(&mut state); |
| let res = inv_bitslice(&state); |
| |
| for i in 0..2 { |
| chunk[i] = res[i]; |
| xor_in_place(&mut chunk[i], &keys[i]); |
| } |
| } |
| } |
| |
| /// AES mix columns function. |
| #[inline] |
| pub(crate) fn mix_columns(block: &mut Block) { |
| let mut state = bitslice_block(block); |
| mix_columns_0(&mut state); |
| inv_bitslice_block(block, &state); |
| } |
| |
| /// AES inverse mix columns function. |
| #[inline] |
| pub(crate) fn inv_mix_columns(block: &mut Block) { |
| let mut state = bitslice_block(block); |
| inv_mix_columns_0(&mut state); |
| inv_bitslice_block(block, &state); |
| } |
| } |