blob: b2fbf4a17ee3f4906de86e3cbfc22434cec5b4d7 [file] [log] [blame] [edit]
//! Encoded Buffers Implementation
//!
//! This module contains a trait used for converting byte buffers or
//! Rust strings into oniguruma char buffers to search and compile
//! with.
/// Encoded String Buffer
///
/// Represents a buffer of characters with encoding information
/// attached.
pub trait EncodedChars {
/// Pointer to the start of the pattern
///
/// This should point to the first character in the buffer,
/// encoded as an `onig_sys` character.
fn start_ptr(&self) -> *const onig_sys::OnigUChar;
/// Pointer to the limit of the pattern buffer
///
/// This should point just past the final character in the buffer,
/// encoded as an `onig_sys` character.
fn limit_ptr(&self) -> *const onig_sys::OnigUChar;
/// The encoding of the contents of the buffer
fn encoding(&self) -> onig_sys::OnigEncoding {
unsafe { &mut onig_sys::OnigEncodingUTF8 }
}
/// The length of this buffer
fn len(&self) -> usize;
/// Is the buffer empty?
fn is_empty(&self) -> bool {
self.len() == 0
}
}
/// Encoded Charters from a `str` Reference
impl<T> EncodedChars for T
where
T: AsRef<str>,
{
fn start_ptr(&self) -> *const onig_sys::OnigUChar {
self.as_ref().as_bytes().as_ptr()
}
fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
let bytes = self.as_ref().as_bytes();
bytes[bytes.len()..].as_ptr()
}
fn len(&self) -> usize {
self.as_ref().len()
}
}
/// Byte Buffer
///
/// Represents a buffer of bytes, with an encoding.
pub struct EncodedBytes<'a> {
bytes: &'a [u8],
enc: onig_sys::OnigEncoding,
}
impl<'a> EncodedBytes<'a> {
/// New Buffer from Parts
///
/// # Arguments
///
/// * `bytes` - The contents of the buffer
/// * `enc` - The encoding this buffer is in
///
/// # Returns
///
/// A new buffer instance
pub fn from_parts(bytes: &'a [u8], enc: onig_sys::OnigEncoding) -> EncodedBytes<'a> {
EncodedBytes { bytes, enc }
}
/// New ASCII Buffer
///
/// # Arguments
///
/// * `bytes` - The ASCII encoded string
///
/// # Returns
///
/// A new buffer instance
pub fn ascii(bytes: &'a [u8]) -> EncodedBytes<'a> {
EncodedBytes {
bytes,
enc: unsafe { &mut onig_sys::OnigEncodingASCII },
}
}
}
impl<'a> EncodedChars for EncodedBytes<'a> {
fn start_ptr(&self) -> *const onig_sys::OnigUChar {
self.bytes.as_ptr()
}
fn limit_ptr(&self) -> *const onig_sys::OnigUChar {
self.bytes[self.bytes.len()..].as_ptr()
}
fn encoding(&self) -> onig_sys::OnigEncoding {
self.enc
}
fn len(&self) -> usize {
self.bytes.len()
}
}
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
pub fn rust_string_encoding_is_utf8() {
let foo = "foo";
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
foo.encoding()
);
let bar = String::from(".*");
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingUTF8 } as onig_sys::OnigEncoding,
bar.encoding()
);
}
#[test]
pub fn rust_bytes_encoding_is_ascii() {
let fizz = b"fizz";
let buff = EncodedBytes::ascii(fizz);
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
buff.encoding()
);
}
#[test]
pub fn rust_string_ptr_offsets_are_valid() {
let test_string = "hello world";
assert_eq!(
test_string.limit_ptr() as usize - test_string.start_ptr() as usize,
test_string.len()
);
}
#[test]
pub fn rust_bytes_ptr_offsets_are_valid() {
let fozz = b"foo.*bar";
let buff = EncodedBytes::ascii(fozz);
assert_eq!(
buff.limit_ptr() as usize - buff.start_ptr() as usize,
fozz.len()
);
}
#[test]
pub fn byte_buffer_create() {
let buff = b"hello world";
let enc_buffer =
EncodedBytes::from_parts(buff, unsafe { &mut onig_sys::OnigEncodingASCII });
assert_eq!(
unsafe { &mut onig_sys::OnigEncodingASCII } as onig_sys::OnigEncoding,
enc_buffer.encoding()
);
assert_eq!(
enc_buffer.limit_ptr() as usize - enc_buffer.start_ptr() as usize,
buff.len()
);
}
}