vendor/imara-diff/src/sources.rs - toolchain/rustc - Git at Google

 use std::mem::take;
 use std::str::from_utf8_unchecked;

 use crate::TokenSource;

 /// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
 /// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
 /// not included in the emitted tokens.
 /// This means that changing the newline seperator from `\r\n` to `\n`
 /// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
 pub fn lines(data: &str) -> Lines<'_, false> {
     Lines(ByteLines(data.as_bytes()))
 }

 /// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
 /// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
 /// included in the emitted tokens.
 /// This means that changing the newline seperator from `\r\n` to `\n`
 /// (or omitting it fully on the last line) is  detected by [`diff`](crate::diff).
 pub fn lines_with_terminator(data: &str) -> Lines<'_, true> {
     Lines(ByteLines(data.as_bytes()))
 }

 /// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
 /// the lines in `data` as Tokens. A lines is a continous subslice of
 /// `data` which does not contain `\n` (or `\r\n`).
 /// The newline seperator (`\r\n` or `\n`) is not included in the emitted tokens.
 /// This means that changing the newline seperator from `\r\n` to `\n`
 /// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
 pub fn byte_lines_with_terminator(data: &[u8]) -> ByteLines<'_, true> {
     ByteLines(data)
 }

 /// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
 /// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
 /// included in the emitted tokens.
 /// This means that changing the newline seperator from `\r\n` to `\n`
 /// (or omitting it fully on the last line) is  detected by [`diff`](crate::diff).
 pub fn byte_lines(data: &[u8]) -> ByteLines<'_, false> {
     ByteLines(data)
 }

 /// By default a line diff is produced for a string
 impl<'a> TokenSource for &'a str {
     type Token = &'a str;

     type Tokenizer = Lines<'a, false>;

     fn tokenize(&self) -> Self::Tokenizer {
         lines(self)
     }

     fn estimate_tokens(&self) -> u32 {
         lines_with_terminator(self).estimate_tokens()
     }
 }

 /// By default a line diff is produced for a bytes
 impl<'a> TokenSource for &'a [u8] {
     type Token = Self;
     type Tokenizer = ByteLines<'a, false>;

     fn tokenize(&self) -> Self::Tokenizer {
         byte_lines(self)
     }

     fn estimate_tokens(&self) -> u32 {
         byte_lines(self).estimate_tokens()
     }
 }

 /// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a `str` as tokens.
 /// See [`lines`](crate::sources::lines) and [`lines_with_terminator`](crate::sources::lines_with_terminator) for details
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub struct Lines<'a, const INCLUDE_LINE_TERMINATOR: bool>(ByteLines<'a, INCLUDE_LINE_TERMINATOR>);

 impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for Lines<'a, INCLUDE_LINE_TERMINATOR> {
     type Item = &'a str;

     fn next(&mut self) -> Option<Self::Item> {
         // safety invariant: this struct may only contain valid utf8
         // dividing valid utf8 bytes by ascii characters always produces valid utf-8
         self.0.next().map(|it| unsafe { from_utf8_unchecked(it) })
     }
 }

 /// By default a line diff is produced for a string
 impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource for Lines<'a, INCLUDE_LINE_TERMINATOR> {
     type Token = &'a str;

     type Tokenizer = Self;

     fn tokenize(&self) -> Self::Tokenizer {
         *self
     }

     fn estimate_tokens(&self) -> u32 {
         self.0.estimate_tokens()
     }
 }

 /// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a byte slice as tokens.
 /// See [`byte_lines`](crate::sources::lines) and [`byte_lines_with_terminator`](crate::sources::byte_lines_with_terminator) for details
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub struct ByteLines<'a, const INCLUDE_LINE_TERMINATOR: bool>(&'a [u8]);

 impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for ByteLines<'a, INCLUDE_LINE_TERMINATOR> {
     type Item = &'a [u8];

     fn next(&mut self) -> Option<Self::Item> {
         let mut saw_carriage_return = false;
         let mut iter = self.0.iter().enumerate();
         let line_len = loop {
             match iter.next() {
                 Some((i, b'\n')) => break i + 1,
                 None => {
                     return (!self.0.is_empty()).then(|| take(&mut self.0));
                 }
                 Some((_, &it)) => saw_carriage_return = it == b'\r',
             }
         };
         let (mut line, rem) = self.0.split_at(line_len);
         self.0 = rem;
         if !INCLUDE_LINE_TERMINATOR {
             line = &line[..line_len - 1 - saw_carriage_return as usize];
         }
         Some(line)
     }
 }

 /// By default a line diff is produced for a string
 impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource
     for ByteLines<'a, INCLUDE_LINE_TERMINATOR>
 {
     type Token = &'a [u8];

     type Tokenizer = Self;

     fn tokenize(&self) -> Self::Tokenizer {
         *self
     }

     fn estimate_tokens(&self) -> u32 {
         let len: usize = self.take(20).map(|line| line.len()).sum();
         if len == 0 {
             100
         } else {
             (self.0.len() * 20 / len) as u32
         }
     }
 }
	use std::mem::take;
	use std::str::from_utf8_unchecked;

	use crate::TokenSource;

	/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
	/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
	/// not included in the emitted tokens.
	/// This means that changing the newline seperator from `\r\n` to `\n`
	/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
	pub fn lines(data: &str) -> Lines<'_, false> {
	Lines(ByteLines(data.as_bytes()))
	}

	/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
	/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
	/// included in the emitted tokens.
	/// This means that changing the newline seperator from `\r\n` to `\n`
	/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff).
	pub fn lines_with_terminator(data: &str) -> Lines<'_, true> {
	Lines(ByteLines(data.as_bytes()))
	}

	/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
	/// the lines in `data` as Tokens. A lines is a continous subslice of
	/// `data` which does not contain `\n` (or `\r\n`).
	/// The newline seperator (`\r\n` or `\n`) is not included in the emitted tokens.
	/// This means that changing the newline seperator from `\r\n` to `\n`
	/// (or omitting it fully on the last line) is not detected by [`diff`](crate::diff).
	pub fn byte_lines_with_terminator(data: &[u8]) -> ByteLines<'_, true> {
	ByteLines(data)
	}

	/// Returns a [`TokenSource`](crate::intern::TokenSource) that uses
	/// the lines in `data` as Tokens. The newline seperator (`\r\n` or `\n`) is
	/// included in the emitted tokens.
	/// This means that changing the newline seperator from `\r\n` to `\n`
	/// (or omitting it fully on the last line) is detected by [`diff`](crate::diff).
	pub fn byte_lines(data: &[u8]) -> ByteLines<'_, false> {
	ByteLines(data)
	}

	/// By default a line diff is produced for a string
	impl<'a> TokenSource for &'a str {
	type Token = &'a str;

	type Tokenizer = Lines<'a, false>;

	fn tokenize(&self) -> Self::Tokenizer {
	lines(self)
	}

	fn estimate_tokens(&self) -> u32 {
	lines_with_terminator(self).estimate_tokens()
	}
	}

	/// By default a line diff is produced for a bytes
	impl<'a> TokenSource for &'a [u8] {
	type Token = Self;
	type Tokenizer = ByteLines<'a, false>;

	fn tokenize(&self) -> Self::Tokenizer {
	byte_lines(self)
	}

	fn estimate_tokens(&self) -> u32 {
	byte_lines(self).estimate_tokens()
	}
	}

	/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a `str` as tokens.
	/// See [`lines`](crate::sources::lines) and [`lines_with_terminator`](crate::sources::lines_with_terminator) for details
	#[derive(Clone, Copy, PartialEq, Eq)]
	pub struct Lines<'a, const INCLUDE_LINE_TERMINATOR: bool>(ByteLines<'a, INCLUDE_LINE_TERMINATOR>);

	impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for Lines<'a, INCLUDE_LINE_TERMINATOR> {
	type Item = &'a str;

	fn next(&mut self) -> Option<Self::Item> {
	// safety invariant: this struct may only contain valid utf8
	// dividing valid utf8 bytes by ascii characters always produces valid utf-8
	self.0.next().map(\|it\| unsafe { from_utf8_unchecked(it) })
	}
	}

	/// By default a line diff is produced for a string
	impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource for Lines<'a, INCLUDE_LINE_TERMINATOR> {
	type Token = &'a str;

	type Tokenizer = Self;

	fn tokenize(&self) -> Self::Tokenizer {
	*self
	}

	fn estimate_tokens(&self) -> u32 {
	self.0.estimate_tokens()
	}
	}

	/// A [`TokenSource`](crate::intern::TokenSource) that returns the lines of a byte slice as tokens.
	/// See [`byte_lines`](crate::sources::lines) and [`byte_lines_with_terminator`](crate::sources::byte_lines_with_terminator) for details
	#[derive(Clone, Copy, PartialEq, Eq)]
	pub struct ByteLines<'a, const INCLUDE_LINE_TERMINATOR: bool>(&'a [u8]);

	impl<'a, const INCLUDE_LINE_TERMINATOR: bool> Iterator for ByteLines<'a, INCLUDE_LINE_TERMINATOR> {
	type Item = &'a [u8];

	fn next(&mut self) -> Option<Self::Item> {
	let mut saw_carriage_return = false;
	let mut iter = self.0.iter().enumerate();
	let line_len = loop {
	match iter.next() {
	Some((i, b'\n')) => break i + 1,
	None => {
	return (!self.0.is_empty()).then(\|\| take(&mut self.0));
	}
	Some((_, &it)) => saw_carriage_return = it == b'\r',
	}
	};
	let (mut line, rem) = self.0.split_at(line_len);
	self.0 = rem;
	if !INCLUDE_LINE_TERMINATOR {
	line = &line[..line_len - 1 - saw_carriage_return as usize];
	}
	Some(line)
	}
	}

	/// By default a line diff is produced for a string
	impl<'a, const INCLUDE_LINE_TERMINATOR: bool> TokenSource
	for ByteLines<'a, INCLUDE_LINE_TERMINATOR>
	{
	type Token = &'a [u8];

	type Tokenizer = Self;

	fn tokenize(&self) -> Self::Tokenizer {
	*self
	}

	fn estimate_tokens(&self) -> u32 {
	let len: usize = self.take(20).map(\|line\| line.len()).sum();
	if len == 0 {
	100
	} else {
	(self.0.len() * 20 / len) as u32
	}
	}
	}