Blame - src/state_id.rs - platform/external/rust/crates/regex-automata

blob: c9bac1941bc3fdee592d77395ded277143041742 [file] [log] [blame]

Jakub Kotur	3bceaeb	2020-12-21 17:28:16 +0100	[diff] [blame]	1	use core::fmt::Debug;
				2	use core::hash::Hash;
				3	use core::mem::size_of;
				4
				5	use byteorder::{ByteOrder, NativeEndian};
				6
				7	#[cfg(feature = "std")]
				8	pub use self::std::*;
				9
				10	#[cfg(feature = "std")]
				11	mod std {
				12	use byteorder::ByteOrder;
				13	use core::mem::size_of;
				14	use error::{Error, Result};
				15
				16	use super::StateID;
				17
				18	/// Check that the premultiplication of the given state identifier can
				19	/// fit into the representation indicated by `S`. If it cannot, or if it
				20	/// overflows `usize` itself, then an error is returned.
				21	pub fn premultiply_overflow_error<S: StateID>(
				22	last_state: S,
				23	alphabet_len: usize,
				24	) -> Result<()> {
				25	let requested = match last_state.to_usize().checked_mul(alphabet_len) {
				26	Some(requested) => requested,
				27	None => return Err(Error::premultiply_overflow(0, 0)),
				28	};
				29	if requested > S::max_id() {
				30	return Err(Error::premultiply_overflow(S::max_id(), requested));
				31	}
				32	Ok(())
				33	}
				34
				35	/// Allocate the next sequential identifier for a fresh state given
				36	/// the previously constructed state identified by `current`. If the
				37	/// next sequential identifier would overflow `usize` or the chosen
				38	/// representation indicated by `S`, then an error is returned.
				39	pub fn next_state_id<S: StateID>(current: S) -> Result<S> {
				40	let next = match current.to_usize().checked_add(1) {
				41	Some(next) => next,
				42	None => return Err(Error::state_id_overflow(::std::usize::MAX)),
				43	};
				44	if next > S::max_id() {
				45	return Err(Error::state_id_overflow(S::max_id()));
				46	}
				47	Ok(S::from_usize(next))
				48	}
				49
				50	/// Convert the given `usize` to the chosen state identifier
				51	/// representation. If the given value cannot fit in the chosen
				52	/// representation, then an error is returned.
				53	pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
				54	if value > S::max_id() {
				55	Err(Error::state_id_overflow(S::max_id()))
				56	} else {
				57	Ok(S::from_usize(value))
				58	}
				59	}
				60
				61	/// Write the given identifier to the given slice of bytes using the
				62	/// specified endianness. The given slice must have length at least
				63	/// `size_of::<S>()`.
				64	///
				65	/// The given state identifier representation must have size 1, 2, 4 or 8.
				66	pub fn write_state_id_bytes<E: ByteOrder, S: StateID>(
				67	slice: &mut [u8],
				68	id: S,
				69	) {
				70	assert!(
				71	1 == size_of::<S>()
				72	\|\| 2 == size_of::<S>()
				73	\|\| 4 == size_of::<S>()
				74	\|\| 8 == size_of::<S>()
				75	);
				76
				77	match size_of::<S>() {
				78	1 => slice[0] = id.to_usize() as u8,
				79	2 => E::write_u16(slice, id.to_usize() as u16),
				80	4 => E::write_u32(slice, id.to_usize() as u32),
				81	8 => E::write_u64(slice, id.to_usize() as u64),
				82	_ => unreachable!(),
				83	}
				84	}
				85	}
				86
				87	/// Return the unique identifier for a DFA's dead state in the chosen
				88	/// representation indicated by `S`.
				89	pub fn dead_id<S: StateID>() -> S {
				90	S::from_usize(0)
				91	}
				92
				93	/// A trait describing the representation of a DFA's state identifier.
				94	///
				95	/// The purpose of this trait is to safely express both the possible state
				96	/// identifier representations that can be used in a DFA and to convert between
				97	/// state identifier representations and types that can be used to efficiently
				98	/// index memory (such as `usize`).
				99	///
				100	/// In general, one should not need to implement this trait explicitly. In
				101	/// particular, this crate provides implementations for `u8`, `u16`, `u32`,
				102	/// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can
				103	/// represent all corresponding values in a `usize`.)
				104	///
				105	/// # Safety
				106	///
				107	/// This trait is unsafe because the correctness of its implementations may be
				108	/// relied upon by other unsafe code. For example, one possible way to
				109	/// implement this trait incorrectly would be to return a maximum identifier
				110	/// in `max_id` that is greater than the real maximum identifier. This will
				111	/// likely result in wrap-on-overflow semantics in release mode, which can in
				112	/// turn produce incorrect state identifiers. Those state identifiers may then
				113	/// in turn access out-of-bounds memory in a DFA's search routine, where bounds
				114	/// checks are explicitly elided for performance reasons.
				115	pub unsafe trait StateID:
				116	Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
				117	{
				118	/// Convert from a `usize` to this implementation's representation.
				119	///
				120	/// Implementors may assume that `n <= Self::max_id`. That is, implementors
				121	/// do not need to check whether `n` can fit inside this implementation's
				122	/// representation.
				123	fn from_usize(n: usize) -> Self;
				124
				125	/// Convert this implementation's representation to a `usize`.
				126	///
				127	/// Implementors must not return a `usize` value greater than
				128	/// `Self::max_id` and must not permit overflow when converting between the
				129	/// implementor's representation and `usize`. In general, the preferred
				130	/// way for implementors to achieve this is to simply not provide
				131	/// implementations of `StateID` that cannot fit into the target platform's
				132	/// `usize`.
				133	fn to_usize(self) -> usize;
				134
				135	/// Return the maximum state identifier supported by this representation.
				136	///
				137	/// Implementors must return a correct bound. Doing otherwise may result
				138	/// in memory unsafety.
				139	fn max_id() -> usize;
				140
				141	/// Read a single state identifier from the given slice of bytes in native
				142	/// endian format.
				143	///
				144	/// Implementors may assume that the given slice has length at least
				145	/// `size_of::<Self>()`.
				146	fn read_bytes(slice: &[u8]) -> Self;
				147
				148	/// Write this state identifier to the given slice of bytes in native
				149	/// endian format.
				150	///
				151	/// Implementors may assume that the given slice has length at least
				152	/// `size_of::<Self>()`.
				153	fn write_bytes(self, slice: &mut [u8]);
				154	}
				155
				156	unsafe impl StateID for usize {
				157	#[inline]
				158	fn from_usize(n: usize) -> usize {
				159	n
				160	}
				161
				162	#[inline]
				163	fn to_usize(self) -> usize {
				164	self
				165	}
				166
				167	#[inline]
				168	fn max_id() -> usize {
				169	::core::usize::MAX
				170	}
				171
				172	#[inline]
				173	fn read_bytes(slice: &[u8]) -> Self {
				174	NativeEndian::read_uint(slice, size_of::<usize>()) as usize
				175	}
				176
				177	#[inline]
				178	fn write_bytes(self, slice: &mut [u8]) {
				179	NativeEndian::write_uint(slice, self as u64, size_of::<usize>())
				180	}
				181	}
				182
				183	unsafe impl StateID for u8 {
				184	#[inline]
				185	fn from_usize(n: usize) -> u8 {
				186	n as u8
				187	}
				188
				189	#[inline]
				190	fn to_usize(self) -> usize {
				191	self as usize
				192	}
				193
				194	#[inline]
				195	fn max_id() -> usize {
				196	::core::u8::MAX as usize
				197	}
				198
				199	#[inline]
				200	fn read_bytes(slice: &[u8]) -> Self {
				201	slice[0]
				202	}
				203
				204	#[inline]
				205	fn write_bytes(self, slice: &mut [u8]) {
				206	slice[0] = self;
				207	}
				208	}
				209
				210	unsafe impl StateID for u16 {
				211	#[inline]
				212	fn from_usize(n: usize) -> u16 {
				213	n as u16
				214	}
				215
				216	#[inline]
				217	fn to_usize(self) -> usize {
				218	self as usize
				219	}
				220
				221	#[inline]
				222	fn max_id() -> usize {
				223	::core::u16::MAX as usize
				224	}
				225
				226	#[inline]
				227	fn read_bytes(slice: &[u8]) -> Self {
				228	NativeEndian::read_u16(slice)
				229	}
				230
				231	#[inline]
				232	fn write_bytes(self, slice: &mut [u8]) {
				233	NativeEndian::write_u16(slice, self)
				234	}
				235	}
				236
				237	#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
				238	unsafe impl StateID for u32 {
				239	#[inline]
				240	fn from_usize(n: usize) -> u32 {
				241	n as u32
				242	}
				243
				244	#[inline]
				245	fn to_usize(self) -> usize {
				246	self as usize
				247	}
				248
				249	#[inline]
				250	fn max_id() -> usize {
				251	::core::u32::MAX as usize
				252	}
				253
				254	#[inline]
				255	fn read_bytes(slice: &[u8]) -> Self {
				256	NativeEndian::read_u32(slice)
				257	}
				258
				259	#[inline]
				260	fn write_bytes(self, slice: &mut [u8]) {
				261	NativeEndian::write_u32(slice, self)
				262	}
				263	}
				264
				265	#[cfg(target_pointer_width = "64")]
				266	unsafe impl StateID for u64 {
				267	#[inline]
				268	fn from_usize(n: usize) -> u64 {
				269	n as u64
				270	}
				271
				272	#[inline]
				273	fn to_usize(self) -> usize {
				274	self as usize
				275	}
				276
				277	#[inline]
				278	fn max_id() -> usize {
				279	::core::u64::MAX as usize
				280	}
				281
				282	#[inline]
				283	fn read_bytes(slice: &[u8]) -> Self {
				284	NativeEndian::read_u64(slice)
				285	}
				286
				287	#[inline]
				288	fn write_bytes(self, slice: &mut [u8]) {
				289	NativeEndian::write_u64(slice, self)
				290	}
				291	}