android/vendor/askalono-0.4.6/src/license.rs - toolchain/cargo-deny - Git at Google

 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0

 use std::{collections::HashMap, fmt};

 use serde::{Deserialize, Serialize};

 use crate::{
     ngram::NgramSet,
     preproc::{apply_aggressive, apply_normalizers},
 };

 /// The type of a license entry (typically in a `Store`).
 #[derive(Clone, Copy, PartialEq, Debug, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum LicenseType {
     /// The canonical text of the license.
     Original,
     /// A license header. There may be more than one in a `Store`.
     Header,
     /// An alternate form of a license. This is intended to be used for
     /// alternate _formats_ of a license, not for variants where the text has
     /// different meaning. Not currently used in askalono's SPDX dataset.
     Alternate,
 }

 impl fmt::Display for LicenseType {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(
             f,
             "{}",
             match *self {
                 LicenseType::Original => "original text",
                 LicenseType::Header => "license header",
                 LicenseType::Alternate => "alternate text",
             }
         )
     }
 }

 /// A structure representing compiled text/matching data.
 ///
 /// This is the key structure used to compare two texts against one another. It
 /// handles pre-processing the text to n-grams, scoring, and optimizing the
 /// result to try to identify specific details about a match.
 ///
 /// # Examples
 ///
 /// Basic scoring of two texts:
 ///
 /// ```
 /// use askalono::TextData;
 ///
 /// let license = TextData::from("My First License");
 /// let sample = TextData::from("copyright 20xx me irl\n\n //  my   first license");
 /// assert_eq!(sample.match_score(&license), 1.0);
 /// ```
 ///
 /// The above example is a perfect match, as identifiable copyright statements
 /// are stripped out during pre-processing.
 ///
 /// Building on that, TextData is able to tell you _where_ in the text a
 /// license is located:
 ///
 /// ```
 /// # use std::error::Error;
 /// # use askalono::TextData;
 /// # fn main() -> Result<(), Box<Error>> {
 /// # let license = TextData::from("My First License");
 /// let sample = TextData::from("copyright 20xx me irl\n// My First License\nfn hello() {\n ...");
 /// let (optimized, score) = sample.optimize_bounds(&license);
 /// assert_eq!((1, 2), optimized.lines_view());
 /// assert!(score > 0.99f32, "license within text matches");
 /// # Ok(())
 /// # }
 /// ```
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct TextData {
     match_data: NgramSet,
     lines_view: (usize, usize),
     lines_normalized: Option<Vec<String>>,
     text_processed: Option<String>,
 }

 const TEXTDATA_TEXT_ERROR: &str = "TextData does not have original text";

 impl TextData {
     /// Create a new TextData structure from a string.
     ///
     /// The given text will be normalized, then smashed down into n-grams for
     /// matching. By default, the normalized text is stored inside the
     /// structure for future diagnostics. This is necessary for optimizing a
     /// match and for diffing against other texts. If you don't want this extra
     /// data, you can call `without_text` throw it out. Generally, as a user of
     /// this library you want to keep the text data, but askalono will throw it
     /// away in its own `Store` as it's not needed.
     pub fn new(text: &str) -> TextData {
         let normalized = apply_normalizers(text);
         let normalized_joined = normalized.join("\n");
         let processed = apply_aggressive(&normalized_joined);
         let match_data = NgramSet::from_str(&processed, 2);

         TextData {
             match_data,
             lines_view: (0, normalized.len()),
             lines_normalized: Some(normalized),
             text_processed: Some(processed),
         }
     }

     /// Consume this `TextData`, returning one without normalized/processed
     /// text stored.
     ///
     /// Unless you know you don't want the text, you probably don't want to use
     /// this. Other methods on `TextData` require that text is present.
     pub fn without_text(self) -> Self {
         TextData {
             match_data: self.match_data,
             lines_view: (0, 0),
             lines_normalized: None,
             text_processed: None,
         }
     }

     /// Get the bounds of the active line view.
     ///
     /// This represents the "active" region of lines that matches are generated
     /// from. The bounds are a 0-indexed `(start, end)` tuple, with inclusive
     /// start and exclusive end indicies. See `optimize_bounds`.
     ///
     /// This is largely for informational purposes; other methods in
     /// `TextView`, such as `lines` and `match_score`, will already account for
     /// the line range. However, it's useful to call it after running
     /// `optimize_bounds` to discover where the input text was discovered.
     pub fn lines_view(&self) -> (usize, usize) {
         self.lines_view
     }

     /// Clone this `TextView`, creating a copy with the given view.
     ///
     /// This will re-generate match data for the given view. It's used in
     /// `optimize_bounds` to shrink/expand the view of the text to discover
     /// bounds.
     ///
     /// Other methods on `TextView` respect this boundary, so it's not needed
     /// outside this struct.
     pub fn with_view(&self, start: usize, end: usize) -> Self {
         let view = &self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR)[start..end];
         let view_joined = view.join("\n");
         let processed = apply_aggressive(&view_joined);
         TextData {
             match_data: NgramSet::from_str(&processed, 2),
             lines_view: (start, end),
             lines_normalized: self.lines_normalized.clone(),
             text_processed: Some(processed),
         }
     }

     /// "Erase" the current lines in view and restore the view to its original
     /// bounds.
     ///
     /// For example, consider a file with two licenses in it. One was identified
     /// (and located) with `optimize_bounds`. Now you want to find the other:
     /// white-out the matched lines, and re-run the overall search to find a
     /// new high score.
     pub fn white_out(&self) -> Self {
         // note that we're not using the view here...
         let lines = self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR);

         // ...because it's used here to exclude lines
         let new_normalized: Vec<String> = lines
             .iter()
             .enumerate()
             .map(|(i, line)| {
                 if i >= self.lines_view.0 && i < self.lines_view.1 {
                     "".to_string()
                 } else {
                     line.clone()
                 }
             })
             .collect();

         let processed = apply_aggressive(&new_normalized.join("\n"));
         TextData {
             match_data: NgramSet::from_str(&processed, 2),
             lines_view: (0, new_normalized.len()),
             lines_normalized: Some(new_normalized),
             text_processed: Some(processed),
         }
     }

     /// Get a slice of the normalized lines in this `TextData`.
     pub fn lines(&self) -> &[String] {
         &self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR)
             [self.lines_view.0..self.lines_view.1]
     }

     #[doc(hidden)]
     pub fn text_processed(&self) -> Option<&str> {
         self.text_processed.as_ref().map(String::as_ref)
     }

     /// Compare this `TextData` with another, returning a similarity score.
     ///
     /// This is what's used during analysis to rank licenses.
     pub fn match_score(&self, other: &TextData) -> f32 {
         self.match_data.dice(&other.match_data)
     }

     #[cfg(feature = "spdx")]
     pub(crate) fn eq_data(&self, other: &Self) -> bool {
         self.match_data.eq(&other.match_data)
     }

     /// Attempt to optimize a known match to locate possible line ranges.
     ///
     /// Returns a new `TextData` struct and a score. The returned struct is a
     /// clone of `self`, with its view set to the best match against `other`.
     ///
     /// This will respect any views set on the TextData (an optimized result
     /// won't go outside the original view).
     ///
     /// Note that this won't be 100% optimal if there are blank lines
     /// surrounding the actual match, since successive blank lines in a range
     /// will likely have the same score.
     ///
     /// You should check the value of `lines_view` on the returned struct to
     /// find the line ranges.
     pub fn optimize_bounds(&self, other: &TextData) -> (Self, f32) {
         assert!(self.lines_normalized.is_some(), "{}", TEXTDATA_TEXT_ERROR);

         let view = self.lines_view;

         // optimize the ending bounds of the text match
         let (end_optimized, _) = self.search_optimize(
             &|end| self.with_view(view.0, end).match_score(other),
             &|end| self.with_view(view.0, end),
         );
         let new_end = end_optimized.lines_view.1;

         // then optimize the starting bounds
         let (optimized, score) = end_optimized.search_optimize(
             &|start| end_optimized.with_view(start, new_end).match_score(other),
             &|start| end_optimized.with_view(start, new_end),
         );
         (optimized, score)
     }

     fn search_optimize(
         &self,
         score: &dyn Fn(usize) -> f32,
         value: &dyn Fn(usize) -> Self,
     ) -> (Self, f32) {
         // cache score checks, since they're kinda expensive
         let mut memo: HashMap<usize, f32> = HashMap::new();
         let mut check_score =
             |index: usize| -> f32 { *memo.entry(index).or_insert_with(|| score(index)) };

         fn search(score: &mut dyn FnMut(usize) -> f32, left: usize, right: usize) -> (usize, f32) {
             if right - left <= 3 {
                 // find the index of the highest score in the remaining items
                 return (left..=right)
                     .map(|x| (x, score(x)))
                     .fold((0usize, 0f32), |acc, x| if x.1 >= acc.1 { x } else { acc });
             }

             let low = (left * 2 + right) / 3;
             let high = (left + right * 2) / 3;
             let score_low = score(low);
             let score_high = score(high);

             if score_low > score_high {
                 search(score, left, high - 1)
             } else {
                 search(score, low + 1, right)
             }
         }

         let optimal = search(&mut check_score, self.lines_view.0, self.lines_view.1);
         (value(optimal.0), optimal.1)
     }
 }

 impl<'a> From<&'a str> for TextData {
     fn from(text: &'a str) -> Self {
         Self::new(text)
     }
 }

 impl<'a> From<String> for TextData {
     fn from(text: String) -> Self {
         Self::new(&text)
     }
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     // psst:
     // cargo test -- --nocapture

     #[test]
     fn optimize_bounds() {
         let license_text = "this is a license text\nor it pretends to be one\nit's just a test";
         let sample_text = "this is a license text\nor it pretends to be one\nit's just a test\nwords\n\nhere is some\ncode\nhello();\n\n//a comment too";
         let license = TextData::from(license_text).without_text();
         let sample = TextData::from(sample_text);

         let (optimized, _) = sample.optimize_bounds(&license);
         println!("{:?}", optimized.lines_view);
         println!("{:?}", optimized.lines_normalized);
         assert_eq!((0, 3), optimized.lines_view);

         // add more to the string, try again (avoid int trunc screwups)
         let sample_text = format!("{}\none more line", sample_text);
         let sample = TextData::from(sample_text.as_str());
         let (optimized, _) = sample.optimize_bounds(&license);
         println!("{:?}", optimized.lines_view);
         println!("{:?}", optimized.lines_normalized);
         assert_eq!((0, 3), optimized.lines_view);

         // add to the beginning too
         let sample_text = format!("some content\nat\n\nthe beginning\n{}", sample_text);
         let sample = TextData::from(sample_text.as_str());
         let (optimized, _) = sample.optimize_bounds(&license);
         println!("{:?}", optimized.lines_view);
         println!("{:?}", optimized.lines_normalized);
         // end bounds at 7 and 8 have the same score, since they're empty lines (not
         // counted). askalono is not smart enough to trim this as close as it
         // can.
         assert!(
             (4, 7) == optimized.lines_view || (4, 8) == optimized.lines_view,
             "bounds are (4, 7) or (4, 8)"
         );
     }

     // if a view is set on the text data, optimize_bounds must not find text
     // outside of that range
     #[test]
     fn optimize_doesnt_grow_view() {
         let sample_text = "0\n1\n2\naaa aaa\naaa\naaa\naaa\n7\n8";
         let license_text = "aaa aaa aaa aaa aaa";
         let sample = TextData::from(sample_text);
         let license = TextData::from(license_text).without_text();

         // sanity: the optimized bounds should be at (3, 7)
         let (optimized, _) = sample.optimize_bounds(&license);
         assert_eq!((3, 7), optimized.lines_view);

         // this should still work
         let sample = sample.with_view(3, 7);
         let (optimized, _) = sample.optimize_bounds(&license);
         assert_eq!((3, 7), optimized.lines_view);

         // but if we shrink the view further, it shouldn't be outside that range
         let sample = sample.with_view(4, 6);
         let (optimized, _) = sample.optimize_bounds(&license);
         assert_eq!((4, 6), optimized.lines_view);

         // restoring the view should still be OK too
         let sample = sample.with_view(0, 9);
         let (optimized, _) = sample.optimize_bounds(&license);
         assert_eq!((3, 7), optimized.lines_view);
     }

     // ensure we don't choke on small TextData matches
     #[test]
     fn match_small() {
         let a = TextData::from("a b");
         let b = TextData::from("a\nlong\nlicense\nfile\n\n\n\n\nabcdefg");

         let x = a.match_score(&b);
         let y = b.match_score(&a);

         assert_eq!(x, y);
     }

     // don't choke on empty TextData either
     #[test]
     fn match_empty() {
         let a = TextData::from("");
         let b = TextData::from("a\nlong\nlicense\nfile\n\n\n\n\nabcdefg");

         let x = a.match_score(&b);
         let y = b.match_score(&a);

         assert_eq!(x, y);
     }

     #[test]
     fn view_and_white_out() {
         let a = TextData::from("aaa\nbbb\nccc\nddd");
         assert_eq!(Some("aaa bbb ccc ddd"), a.text_processed());

         let b = a.with_view(1, 3);
         assert_eq!(2, b.lines().len());
         assert_eq!(Some("bbb ccc"), b.text_processed());

         let c = b.white_out();
         assert_eq!(Some("aaa ddd"), c.text_processed());
     }
 }
	// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
	// SPDX-License-Identifier: Apache-2.0

	use std::{collections::HashMap, fmt};

	use serde::{Deserialize, Serialize};

	use crate::{
	ngram::NgramSet,
	preproc::{apply_aggressive, apply_normalizers},
	};

	/// The type of a license entry (typically in a `Store`).
	#[derive(Clone, Copy, PartialEq, Debug, Serialize, Deserialize)]
	#[serde(rename_all = "lowercase")]
	pub enum LicenseType {
	/// The canonical text of the license.
	Original,
	/// A license header. There may be more than one in a `Store`.
	Header,
	/// An alternate form of a license. This is intended to be used for
	/// alternate _formats_ of a license, not for variants where the text has
	/// different meaning. Not currently used in askalono's SPDX dataset.
	Alternate,
	}

	impl fmt::Display for LicenseType {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	write!(
	f,
	"{}",
	match *self {
	LicenseType::Original => "original text",
	LicenseType::Header => "license header",
	LicenseType::Alternate => "alternate text",
	}
	)
	}
	}

	/// A structure representing compiled text/matching data.
	///
	/// This is the key structure used to compare two texts against one another. It
	/// handles pre-processing the text to n-grams, scoring, and optimizing the
	/// result to try to identify specific details about a match.
	///
	/// # Examples
	///
	/// Basic scoring of two texts:
	///
	/// ```
	/// use askalono::TextData;
	///
	/// let license = TextData::from("My First License");
	/// let sample = TextData::from("copyright 20xx me irl\n\n // my first license");
	/// assert_eq!(sample.match_score(&license), 1.0);
	/// ```
	///
	/// The above example is a perfect match, as identifiable copyright statements
	/// are stripped out during pre-processing.
	///
	/// Building on that, TextData is able to tell you _where_ in the text a
	/// license is located:
	///
	/// ```
	/// # use std::error::Error;
	/// # use askalono::TextData;
	/// # fn main() -> Result<(), Box<Error>> {
	/// # let license = TextData::from("My First License");
	/// let sample = TextData::from("copyright 20xx me irl\n// My First License\nfn hello() {\n ...");
	/// let (optimized, score) = sample.optimize_bounds(&license);
	/// assert_eq!((1, 2), optimized.lines_view());
	/// assert!(score > 0.99f32, "license within text matches");
	/// # Ok(())
	/// # }
	/// ```
	#[derive(Serialize, Deserialize, Clone, Debug)]
	pub struct TextData {
	match_data: NgramSet,
	lines_view: (usize, usize),
	lines_normalized: Option<Vec<String>>,
	text_processed: Option<String>,
	}

	const TEXTDATA_TEXT_ERROR: &str = "TextData does not have original text";

	impl TextData {
	/// Create a new TextData structure from a string.
	///
	/// The given text will be normalized, then smashed down into n-grams for
	/// matching. By default, the normalized text is stored inside the
	/// structure for future diagnostics. This is necessary for optimizing a
	/// match and for diffing against other texts. If you don't want this extra
	/// data, you can call `without_text` throw it out. Generally, as a user of
	/// this library you want to keep the text data, but askalono will throw it
	/// away in its own `Store` as it's not needed.
	pub fn new(text: &str) -> TextData {
	let normalized = apply_normalizers(text);
	let normalized_joined = normalized.join("\n");
	let processed = apply_aggressive(&normalized_joined);
	let match_data = NgramSet::from_str(&processed, 2);

	TextData {
	match_data,
	lines_view: (0, normalized.len()),
	lines_normalized: Some(normalized),
	text_processed: Some(processed),
	}
	}

	/// Consume this `TextData`, returning one without normalized/processed
	/// text stored.
	///
	/// Unless you know you don't want the text, you probably don't want to use
	/// this. Other methods on `TextData` require that text is present.
	pub fn without_text(self) -> Self {
	TextData {
	match_data: self.match_data,
	lines_view: (0, 0),
	lines_normalized: None,
	text_processed: None,
	}
	}

	/// Get the bounds of the active line view.
	///
	/// This represents the "active" region of lines that matches are generated
	/// from. The bounds are a 0-indexed `(start, end)` tuple, with inclusive
	/// start and exclusive end indicies. See `optimize_bounds`.
	///
	/// This is largely for informational purposes; other methods in
	/// `TextView`, such as `lines` and `match_score`, will already account for
	/// the line range. However, it's useful to call it after running
	/// `optimize_bounds` to discover where the input text was discovered.
	pub fn lines_view(&self) -> (usize, usize) {
	self.lines_view
	}

	/// Clone this `TextView`, creating a copy with the given view.
	///
	/// This will re-generate match data for the given view. It's used in
	/// `optimize_bounds` to shrink/expand the view of the text to discover
	/// bounds.
	///
	/// Other methods on `TextView` respect this boundary, so it's not needed
	/// outside this struct.
	pub fn with_view(&self, start: usize, end: usize) -> Self {
	let view = &self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR)[start..end];
	let view_joined = view.join("\n");
	let processed = apply_aggressive(&view_joined);
	TextData {
	match_data: NgramSet::from_str(&processed, 2),
	lines_view: (start, end),
	lines_normalized: self.lines_normalized.clone(),
	text_processed: Some(processed),
	}
	}

	/// "Erase" the current lines in view and restore the view to its original
	/// bounds.
	///
	/// For example, consider a file with two licenses in it. One was identified
	/// (and located) with `optimize_bounds`. Now you want to find the other:
	/// white-out the matched lines, and re-run the overall search to find a
	/// new high score.
	pub fn white_out(&self) -> Self {
	// note that we're not using the view here...
	let lines = self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR);

	// ...because it's used here to exclude lines
	let new_normalized: Vec<String> = lines
	.iter()
	.enumerate()
	.map(\|(i, line)\| {
	if i >= self.lines_view.0 && i < self.lines_view.1 {
	"".to_string()
	} else {
	line.clone()
	}
	})
	.collect();

	let processed = apply_aggressive(&new_normalized.join("\n"));
	TextData {
	match_data: NgramSet::from_str(&processed, 2),
	lines_view: (0, new_normalized.len()),
	lines_normalized: Some(new_normalized),
	text_processed: Some(processed),
	}
	}

	/// Get a slice of the normalized lines in this `TextData`.
	pub fn lines(&self) -> &[String] {
	&self.lines_normalized.as_ref().expect(TEXTDATA_TEXT_ERROR)
	[self.lines_view.0..self.lines_view.1]
	}

	#[doc(hidden)]
	pub fn text_processed(&self) -> Option<&str> {
	self.text_processed.as_ref().map(String::as_ref)
	}

	/// Compare this `TextData` with another, returning a similarity score.
	///
	/// This is what's used during analysis to rank licenses.
	pub fn match_score(&self, other: &TextData) -> f32 {
	self.match_data.dice(&other.match_data)
	}

	#[cfg(feature = "spdx")]
	pub(crate) fn eq_data(&self, other: &Self) -> bool {
	self.match_data.eq(&other.match_data)
	}

	/// Attempt to optimize a known match to locate possible line ranges.
	///
	/// Returns a new `TextData` struct and a score. The returned struct is a
	/// clone of `self`, with its view set to the best match against `other`.
	///
	/// This will respect any views set on the TextData (an optimized result
	/// won't go outside the original view).
	///
	/// Note that this won't be 100% optimal if there are blank lines
	/// surrounding the actual match, since successive blank lines in a range
	/// will likely have the same score.
	///
	/// You should check the value of `lines_view` on the returned struct to
	/// find the line ranges.
	pub fn optimize_bounds(&self, other: &TextData) -> (Self, f32) {
	assert!(self.lines_normalized.is_some(), "{}", TEXTDATA_TEXT_ERROR);

	let view = self.lines_view;

	// optimize the ending bounds of the text match
	let (end_optimized, _) = self.search_optimize(
	&\|end\| self.with_view(view.0, end).match_score(other),
	&\|end\| self.with_view(view.0, end),
	);
	let new_end = end_optimized.lines_view.1;

	// then optimize the starting bounds
	let (optimized, score) = end_optimized.search_optimize(
	&\|start\| end_optimized.with_view(start, new_end).match_score(other),
	&\|start\| end_optimized.with_view(start, new_end),
	);
	(optimized, score)
	}

	fn search_optimize(
	&self,
	score: &dyn Fn(usize) -> f32,
	value: &dyn Fn(usize) -> Self,
	) -> (Self, f32) {
	// cache score checks, since they're kinda expensive
	let mut memo: HashMap<usize, f32> = HashMap::new();
	let mut check_score =
	\|index: usize\| -> f32 { *memo.entry(index).or_insert_with(\|\| score(index)) };

	fn search(score: &mut dyn FnMut(usize) -> f32, left: usize, right: usize) -> (usize, f32) {
	if right - left <= 3 {
	// find the index of the highest score in the remaining items
	return (left..=right)
	.map(\|x\| (x, score(x)))
	.fold((0usize, 0f32), \|acc, x\| if x.1 >= acc.1 { x } else { acc });
	}

	let low = (left * 2 + right) / 3;
	let high = (left + right * 2) / 3;
	let score_low = score(low);
	let score_high = score(high);

	if score_low > score_high {
	search(score, left, high - 1)
	} else {
	search(score, low + 1, right)
	}
	}

	let optimal = search(&mut check_score, self.lines_view.0, self.lines_view.1);
	(value(optimal.0), optimal.1)
	}
	}

	impl<'a> From<&'a str> for TextData {
	fn from(text: &'a str) -> Self {
	Self::new(text)
	}
	}

	impl<'a> From<String> for TextData {
	fn from(text: String) -> Self {
	Self::new(&text)
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	// psst:
	// cargo test -- --nocapture

	#[test]
	fn optimize_bounds() {
	let license_text = "this is a license text\nor it pretends to be one\nit's just a test";
	let sample_text = "this is a license text\nor it pretends to be one\nit's just a test\nwords\n\nhere is some\ncode\nhello();\n\n//a comment too";
	let license = TextData::from(license_text).without_text();
	let sample = TextData::from(sample_text);

	let (optimized, _) = sample.optimize_bounds(&license);
	println!("{:?}", optimized.lines_view);
	println!("{:?}", optimized.lines_normalized);
	assert_eq!((0, 3), optimized.lines_view);

	// add more to the string, try again (avoid int trunc screwups)
	let sample_text = format!("{}\none more line", sample_text);
	let sample = TextData::from(sample_text.as_str());
	let (optimized, _) = sample.optimize_bounds(&license);
	println!("{:?}", optimized.lines_view);
	println!("{:?}", optimized.lines_normalized);
	assert_eq!((0, 3), optimized.lines_view);

	// add to the beginning too
	let sample_text = format!("some content\nat\n\nthe beginning\n{}", sample_text);
	let sample = TextData::from(sample_text.as_str());
	let (optimized, _) = sample.optimize_bounds(&license);
	println!("{:?}", optimized.lines_view);
	println!("{:?}", optimized.lines_normalized);
	// end bounds at 7 and 8 have the same score, since they're empty lines (not
	// counted). askalono is not smart enough to trim this as close as it
	// can.
	assert!(
	(4, 7) == optimized.lines_view \|\| (4, 8) == optimized.lines_view,
	"bounds are (4, 7) or (4, 8)"
	);
	}

	// if a view is set on the text data, optimize_bounds must not find text
	// outside of that range
	#[test]
	fn optimize_doesnt_grow_view() {
	let sample_text = "0\n1\n2\naaa aaa\naaa\naaa\naaa\n7\n8";
	let license_text = "aaa aaa aaa aaa aaa";
	let sample = TextData::from(sample_text);
	let license = TextData::from(license_text).without_text();

	// sanity: the optimized bounds should be at (3, 7)
	let (optimized, _) = sample.optimize_bounds(&license);
	assert_eq!((3, 7), optimized.lines_view);

	// this should still work
	let sample = sample.with_view(3, 7);
	let (optimized, _) = sample.optimize_bounds(&license);
	assert_eq!((3, 7), optimized.lines_view);

	// but if we shrink the view further, it shouldn't be outside that range
	let sample = sample.with_view(4, 6);
	let (optimized, _) = sample.optimize_bounds(&license);
	assert_eq!((4, 6), optimized.lines_view);

	// restoring the view should still be OK too
	let sample = sample.with_view(0, 9);
	let (optimized, _) = sample.optimize_bounds(&license);
	assert_eq!((3, 7), optimized.lines_view);
	}

	// ensure we don't choke on small TextData matches
	#[test]
	fn match_small() {
	let a = TextData::from("a b");
	let b = TextData::from("a\nlong\nlicense\nfile\n\n\n\n\nabcdefg");

	let x = a.match_score(&b);
	let y = b.match_score(&a);

	assert_eq!(x, y);
	}

	// don't choke on empty TextData either
	#[test]
	fn match_empty() {
	let a = TextData::from("");
	let b = TextData::from("a\nlong\nlicense\nfile\n\n\n\n\nabcdefg");

	let x = a.match_score(&b);
	let y = b.match_score(&a);

	assert_eq!(x, y);
	}

	#[test]
	fn view_and_white_out() {
	let a = TextData::from("aaa\nbbb\nccc\nddd");
	assert_eq!(Some("aaa bbb ccc ddd"), a.text_processed());

	let b = a.with_view(1, 3);
	assert_eq!(2, b.lines().len());
	assert_eq!(Some("bbb ccc"), b.text_processed());

	let c = b.white_out();
	assert_eq!(Some("aaa ddd"), c.text_processed());
	}
	}