blob: 25686b2bbc89aafce47471137aa1340a6b120c1f [file] [log] [blame]
#[cfg(feature = "simd")]
use std::mem::transmute;
use std::ops::{Add, BitXor, Mul};
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "simd", repr(simd))]
#[allow(non_camel_case_types)]
pub struct u64x2(pub u64, pub u64);
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "simd", repr(simd))]
#[allow(non_camel_case_types)]
struct u32x4(u32, u32, u32, u32);
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "simd", repr(simd))]
#[allow(non_camel_case_types)]
struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8);
#[cfg(feature = "simd")]
extern "platform-intrinsic" {
fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2;
fn simd_add<T>(x: T, y: T) -> T;
fn simd_mul<T>(x: T, y: T) -> T;
fn simd_xor<T>(x: T, y: T) -> T;
fn simd_shr<T>(x: T, y: T) -> T;
fn simd_shl<T>(x: T, y: T) -> T;
fn simd_shuffle16<T, U>(x: T, y: T, idx: [u32; 16]) -> U;
}
impl Add for u64x2 {
type Output = Self;
#[cfg(feature = "simd")]
#[inline(always)]
fn add(self, r: Self) -> Self { unsafe { simd_add(self, r) } }
#[cfg(not(feature = "simd"))]
#[inline(always)]
fn add(self, r: Self) -> Self {
u64x2(self.0.wrapping_add(r.0), self.1.wrapping_add(r.1))
}
}
impl Mul for u64x2 {
type Output = Self;
#[cfg(feature = "simd")]
#[inline(always)]
fn mul(self, r: Self) -> Self { unsafe { simd_mul(self, r) } }
#[cfg(not(feature = "simd"))]
fn mul(self, r: Self) -> Self {
u64x2(self.0.wrapping_mul(r.0), self.1.wrapping_mul(r.1))
}
}
impl BitXor for u64x2 {
type Output = Self;
#[cfg(feature = "simd")]
#[inline(always)]
fn bitxor(self, r: Self) -> u64x2 { unsafe { simd_xor(self, r) } }
#[cfg(not(feature = "simd"))]
#[inline(always)]
fn bitxor(self, r: Self) -> u64x2 { u64x2(self.0 ^ r.0, self.1 ^ r.1) }
}
#[cfg_attr(rustfmt, rustfmt_skip)]
#[cfg(feature = "simd")]
impl u8x16 {
#[inline(always)]
fn rotr_32_u64x2(self) -> u64x2 {
unsafe {
let rv: Self = simd_shuffle16(self, self, [4, 5, 6, 7, 0, 1, 2, 3,
12, 13, 14, 15, 8, 9, 10, 11]);
transmute(rv)
}
}
#[inline(always)]
fn rotr_24_u64x2(self) -> u64x2 {
// recall that rotating right = rotating towards lsb
unsafe {
let rv: Self = simd_shuffle16(self, self, [3, 4, 5, 6, 7, 0, 1, 2,
11, 12, 13, 14, 15, 8, 9, 10]);
transmute(rv)
}
}
#[inline(always)]
fn rotr_16_u64x2(self) -> u64x2 {
unsafe {
let rv: Self = simd_shuffle16(self, self, [2, 3, 4, 5, 6, 7, 0, 1,
10, 11, 12, 13, 14, 15, 8, 9]);
transmute(rv)
}
}
#[inline(always)]
fn rotr_8_u64x2(self) -> u64x2 {
unsafe {
let rv: Self = simd_shuffle16(self, self, [1, 2, 3, 4, 5, 6, 7, 0,
9, 10, 11, 12, 13, 14, 15, 8]);
transmute(rv)
}
}
}
fn lo(n: u64) -> u64 { n & 0xffffffff }
impl u64x2 {
#[cfg(feature = "simd")]
#[inline(always)]
fn as_u8x16(self) -> u8x16 { unsafe { transmute(self) } }
#[cfg(feature = "simd")]
#[inline(always)]
pub fn lower_mult(self, r: Self) -> Self {
unsafe {
let (lhs, rhs): (u32x4, u32x4) = (transmute(self), transmute(r));
x86_mm_mul_epu32(lhs, rhs)
}
}
#[cfg(not(feature = "simd"))]
#[inline(always)]
pub fn lower_mult(self, r: Self) -> Self {
u64x2(lo(self.0) * lo(r.0), lo(self.1) * lo(r.1))
}
#[cfg(feature = "simd")]
#[inline(always)]
pub fn rotate_right(self, n: u32) -> Self {
match n {
32 => self.as_u8x16().rotr_32_u64x2(),
24 => self.as_u8x16().rotr_24_u64x2(),
16 => self.as_u8x16().rotr_16_u64x2(),
8 => self.as_u8x16().rotr_8_u64x2(),
_ => unsafe {
let k = (64 - n) as u64;
simd_shl(self, u64x2(k, k)) ^
simd_shr(self, u64x2(n as u64, n as u64))
},
}
}
#[cfg(not(feature = "simd"))]
#[inline(always)]
pub fn rotate_right(self, n: u32) -> Self {
u64x2(self.0.rotate_right(n), self.1.rotate_right(n))
}
#[inline(always)]
pub fn cross_swap(self, r: Self) -> (Self, Self) {
let u64x2(v4, v5) = self; // so the rule is:
let u64x2(v6, v7) = r; // +--+
(u64x2(v7, v4), u64x2(v5, v6)) // \/
// /\
// 1 0
}
}
#[cfg(test)]
mod test {
use super::u64x2;
// XXX Use function call to workaround rust-lang/rust#33764.
fn t0() -> u64x2 {
u64x2(0xdeadbeef_01234567, 0xcafe3210_babe9932)
}
fn t1() -> u64x2 {
u64x2(0x09990578_01234567, 0x1128f9a9_88e89448)
}
#[test]
fn test_rotate_right() {
for &s in [8 as u32, 16, 24, 32].iter() {
assert_eq!(t0().rotate_right(s).0, t0().0.rotate_right(s));
assert_eq!(t0().rotate_right(s).1, t0().1.rotate_right(s));
}
}
#[test]
fn test_lower_mult() {
use super::lo;
assert_eq!(t0().lower_mult(t1()), t1().lower_mult(t0()));
assert_eq!(t0().lower_mult(t1()).0, lo(t0().0) * lo(t1().0));
assert_eq!(t0().lower_mult(t1()).1, lo(t0().1) * lo(t1().1));
}
}