vendor/regex/src/vector/avx2.rs - toolchain/rustc - Git at Google

 #![allow(dead_code)]

 use std::arch::x86_64::*;
 use std::fmt;

 #[derive(Clone, Copy, Debug)]
 pub struct AVX2VectorBuilder(());

 impl AVX2VectorBuilder {
     pub fn new() -> Option<AVX2VectorBuilder> {
         if is_x86_feature_detected!("avx2") {
             Some(AVX2VectorBuilder(()))
         } else {
             None
         }
     }

     /// Create a new u8x32 AVX2 vector where all of the bytes are set to
     /// the given value.
     #[inline]
     pub fn u8x32_splat(self, n: u8) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe { u8x32::splat(n) }
     }

     /// Load 32 bytes from the given slice, with bounds checks.
     #[inline]
     pub fn u8x32_load_unaligned(self, slice: &[u8]) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe { u8x32::load_unaligned(slice) }
     }

     /// Load 32 bytes from the given slice, without bounds checks.
     #[inline]
     pub unsafe fn u8x32_load_unchecked_unaligned(self, slice: &[u8]) -> u8x32 {
         // Safe because we know AVX2 is enabled, but still unsafe
         // because we aren't doing bounds checks.
         u8x32::load_unchecked_unaligned(slice)
     }

     /// Load 32 bytes from the given slice, with bound and alignment checks.
     #[inline]
     pub fn u8x32_load(self, slice: &[u8]) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe { u8x32::load(slice) }
     }

     /// Load 32 bytes from the given slice, without bound or alignment checks.
     #[inline]
     pub unsafe fn u8x32_load_unchecked(self, slice: &[u8]) -> u8x32 {
         // Safe because we know AVX2 is enabled, but still unsafe
         // because we aren't doing bounds checks.
         u8x32::load_unchecked(slice)
     }
 }

 #[derive(Clone, Copy)]
 #[allow(non_camel_case_types)]
 pub union u8x32 {
     vector: __m256i,
     bytes: [u8; 32],
 }

 impl u8x32 {
     #[inline]
     unsafe fn splat(n: u8) -> u8x32 {
         u8x32 { vector: _mm256_set1_epi8(n as i8) }
     }

     #[inline]
     unsafe fn load_unaligned(slice: &[u8]) -> u8x32 {
         assert!(slice.len() >= 32);
         u8x32::load_unchecked_unaligned(slice)
     }

     #[inline]
     unsafe fn load_unchecked_unaligned(slice: &[u8]) -> u8x32 {
         let p = slice.as_ptr() as *const u8 as *const __m256i;
         u8x32 { vector: _mm256_loadu_si256(p) }
     }

     #[inline]
     unsafe fn load(slice: &[u8]) -> u8x32 {
         assert!(slice.len() >= 32);
         assert!(slice.as_ptr() as usize % 32 == 0);
         u8x32::load_unchecked(slice)
     }

     #[inline]
     unsafe fn load_unchecked(slice: &[u8]) -> u8x32 {
         let p = slice.as_ptr() as *const u8 as *const __m256i;
         u8x32 { vector: _mm256_load_si256(p) }
     }

     #[inline]
     pub fn extract(self, i: usize) -> u8 {
         // Safe because `bytes` is always accessible.
         unsafe { self.bytes[i] }
     }

     #[inline]
     pub fn replace(&mut self, i: usize, byte: u8) {
         // Safe because `bytes` is always accessible.
         unsafe { self.bytes[i] = byte; }
     }

     #[inline]
     pub fn shuffle(self, indices: u8x32) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             u8x32 { vector: _mm256_shuffle_epi8(self.vector, indices.vector) }
         }
     }

     #[inline]
     pub fn ne(self, other: u8x32) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             let boolv = _mm256_cmpeq_epi8(self.vector, other.vector);
             let ones = _mm256_set1_epi8(0xFF as u8 as i8);
             u8x32 { vector: _mm256_andnot_si256(boolv, ones) }
         }
     }

     #[inline]
     pub fn and(self, other: u8x32) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             u8x32 { vector: _mm256_and_si256(self.vector, other.vector) }
         }
     }

     #[inline]
     pub fn movemask(self) -> u32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             _mm256_movemask_epi8(self.vector) as u32
         }
     }

     #[inline]
     pub fn alignr_14(self, other: u8x32) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             // Credit goes to jneem for figuring this out:
             // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
             //
             // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
             // PALIGNR instructions, which is not what we want, so we need to
             // do some extra shuffling.
             let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
             let v = _mm256_alignr_epi8(self.vector, v, 14);
             u8x32 { vector: v }
         }
     }

     #[inline]
     pub fn alignr_15(self, other: u8x32) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             // Credit goes to jneem for figuring this out:
             // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
             //
             // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
             // PALIGNR instructions, which is not what we want, so we need to
             // do some extra shuffling.
             let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
             let v = _mm256_alignr_epi8(self.vector, v, 15);
             u8x32 { vector: v }
         }
     }

     #[inline]
     pub fn bit_shift_right_4(self) -> u8x32 {
         // Safe because we know AVX2 is enabled.
         unsafe {
             u8x32 { vector: _mm256_srli_epi16(self.vector, 4) }
         }
     }
 }

 impl fmt::Debug for u8x32 {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         // Safe because `bytes` is always accessible.
         unsafe { self.bytes.fmt(f) }
     }
 }
	#![allow(dead_code)]

	use std::arch::x86_64::*;
	use std::fmt;

	#[derive(Clone, Copy, Debug)]
	pub struct AVX2VectorBuilder(());

	impl AVX2VectorBuilder {
	pub fn new() -> Option<AVX2VectorBuilder> {
	if is_x86_feature_detected!("avx2") {
	Some(AVX2VectorBuilder(()))
	} else {
	None
	}
	}

	/// Create a new u8x32 AVX2 vector where all of the bytes are set to
	/// the given value.
	#[inline]
	pub fn u8x32_splat(self, n: u8) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe { u8x32::splat(n) }
	}

	/// Load 32 bytes from the given slice, with bounds checks.
	#[inline]
	pub fn u8x32_load_unaligned(self, slice: &[u8]) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe { u8x32::load_unaligned(slice) }
	}

	/// Load 32 bytes from the given slice, without bounds checks.
	#[inline]
	pub unsafe fn u8x32_load_unchecked_unaligned(self, slice: &[u8]) -> u8x32 {
	// Safe because we know AVX2 is enabled, but still unsafe
	// because we aren't doing bounds checks.
	u8x32::load_unchecked_unaligned(slice)
	}

	/// Load 32 bytes from the given slice, with bound and alignment checks.
	#[inline]
	pub fn u8x32_load(self, slice: &[u8]) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe { u8x32::load(slice) }
	}

	/// Load 32 bytes from the given slice, without bound or alignment checks.
	#[inline]
	pub unsafe fn u8x32_load_unchecked(self, slice: &[u8]) -> u8x32 {
	// Safe because we know AVX2 is enabled, but still unsafe
	// because we aren't doing bounds checks.
	u8x32::load_unchecked(slice)
	}
	}

	#[derive(Clone, Copy)]
	#[allow(non_camel_case_types)]
	pub union u8x32 {
	vector: __m256i,
	bytes: [u8; 32],
	}

	impl u8x32 {
	#[inline]
	unsafe fn splat(n: u8) -> u8x32 {
	u8x32 { vector: _mm256_set1_epi8(n as i8) }
	}

	#[inline]
	unsafe fn load_unaligned(slice: &[u8]) -> u8x32 {
	assert!(slice.len() >= 32);
	u8x32::load_unchecked_unaligned(slice)
	}

	#[inline]
	unsafe fn load_unchecked_unaligned(slice: &[u8]) -> u8x32 {
	let p = slice.as_ptr() as const u8 as const __m256i;
	u8x32 { vector: _mm256_loadu_si256(p) }
	}

	#[inline]
	unsafe fn load(slice: &[u8]) -> u8x32 {
	assert!(slice.len() >= 32);
	assert!(slice.as_ptr() as usize % 32 == 0);
	u8x32::load_unchecked(slice)
	}

	#[inline]
	unsafe fn load_unchecked(slice: &[u8]) -> u8x32 {
	let p = slice.as_ptr() as const u8 as const __m256i;
	u8x32 { vector: _mm256_load_si256(p) }
	}

	#[inline]
	pub fn extract(self, i: usize) -> u8 {
	// Safe because `bytes` is always accessible.
	unsafe { self.bytes[i] }
	}

	#[inline]
	pub fn replace(&mut self, i: usize, byte: u8) {
	// Safe because `bytes` is always accessible.
	unsafe { self.bytes[i] = byte; }
	}

	#[inline]
	pub fn shuffle(self, indices: u8x32) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	u8x32 { vector: _mm256_shuffle_epi8(self.vector, indices.vector) }
	}
	}

	#[inline]
	pub fn ne(self, other: u8x32) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	let boolv = _mm256_cmpeq_epi8(self.vector, other.vector);
	let ones = _mm256_set1_epi8(0xFF as u8 as i8);
	u8x32 { vector: _mm256_andnot_si256(boolv, ones) }
	}
	}

	#[inline]
	pub fn and(self, other: u8x32) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	u8x32 { vector: _mm256_and_si256(self.vector, other.vector) }
	}
	}

	#[inline]
	pub fn movemask(self) -> u32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	_mm256_movemask_epi8(self.vector) as u32
	}
	}

	#[inline]
	pub fn alignr_14(self, other: u8x32) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	// Credit goes to jneem for figuring this out:
	// https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
	//
	// TL;DR avx2's PALIGNR instruction is actually just two 128-bit
	// PALIGNR instructions, which is not what we want, so we need to
	// do some extra shuffling.
	let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
	let v = _mm256_alignr_epi8(self.vector, v, 14);
	u8x32 { vector: v }
	}
	}

	#[inline]
	pub fn alignr_15(self, other: u8x32) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	// Credit goes to jneem for figuring this out:
	// https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
	//
	// TL;DR avx2's PALIGNR instruction is actually just two 128-bit
	// PALIGNR instructions, which is not what we want, so we need to
	// do some extra shuffling.
	let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
	let v = _mm256_alignr_epi8(self.vector, v, 15);
	u8x32 { vector: v }
	}
	}

	#[inline]
	pub fn bit_shift_right_4(self) -> u8x32 {
	// Safe because we know AVX2 is enabled.
	unsafe {
	u8x32 { vector: _mm256_srli_epi16(self.vector, 4) }
	}
	}
	}

	impl fmt::Debug for u8x32 {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	// Safe because `bytes` is always accessible.
	unsafe { self.bytes.fmt(f) }
	}
	}