vendor/regex-syntax-0.5.6/src/unicode.rs - toolchain/rustc - Git at Google

 use std::cmp::Ordering;
 use std::result;

 use ucd_util::{self, PropertyValues};

 use hir;
 use unicode_tables::age;
 use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
 use unicode_tables::general_category;
 use unicode_tables::property_bool;
 use unicode_tables::property_names::PROPERTY_NAMES;
 use unicode_tables::property_values::PROPERTY_VALUES;
 use unicode_tables::script;
 use unicode_tables::script_extension;

 type Result<T> = result::Result<T, Error>;

 /// An error that occurs when dealing with Unicode.
 ///
 /// We don't impl the Error trait here because these always get converted
 /// into other public errors. (This error type isn't exported.)
 #[derive(Debug)]
 pub enum Error {
     PropertyNotFound,
     PropertyValueNotFound,
 }

 /// Encode the given Unicode character to `dst` as a single UTF-8 sequence.
 ///
 /// If `dst` is not long enough, then `None` is returned. Otherwise, the number
 /// of bytes written is returned.
 pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option<usize> {
     // TODO: Remove this function once we move to at least Rust 1.15, which
     // provides char::encode_utf8 for us.
     const TAG_CONT: u8 = 0b1000_0000;
     const TAG_TWO: u8 = 0b1100_0000;
     const TAG_THREE: u8 = 0b1110_0000;
     const TAG_FOUR: u8 = 0b1111_0000;

     let code = character as u32;
     if code <= 0x7F && !dst.is_empty() {
         dst[0] = code as u8;
         Some(1)
     } else if code <= 0x7FF && dst.len() >= 2 {
         dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO;
         dst[1] = (code & 0x3F) as u8 | TAG_CONT;
         Some(2)
     } else if code <= 0xFFFF && dst.len() >= 3  {
         dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE;
         dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
         dst[2] = (code & 0x3F) as u8 | TAG_CONT;
         Some(3)
     } else if dst.len() >= 4 {
         dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR;
         dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
         dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
         dst[3] = (code & 0x3F) as u8 | TAG_CONT;
         Some(4)
     } else {
         None
     }
 }

 /// An iterator over a codepoint's simple case equivalence class.
 #[derive(Debug)]
 pub struct SimpleFoldIter(::std::slice::Iter<'static, char>);

 impl Iterator for SimpleFoldIter {
     type Item = char;

     fn next(&mut self) -> Option<char> {
         self.0.next().map(|c| *c)
     }
 }

 /// Return an iterator over the equivalence class of simple case mappings
 /// for the given codepoint. The equivalence class does not include the
 /// given codepoint.
 ///
 /// If the equivalence class is empty, then this returns the next scalar
 /// value that has a non-empty equivalence class, if it exists. If no such
 /// scalar value exists, then `None` is returned. The point of this behavior
 /// is to permit callers to avoid calling `simple_fold` more than they need
 /// to, since there is some cost to fetching the equivalence class.
 pub fn simple_fold(c: char) -> result::Result<SimpleFoldIter, Option<char>> {
     CASE_FOLDING_SIMPLE
         .binary_search_by_key(&c, |&(c1, _)| c1)
         .map(|i| SimpleFoldIter(CASE_FOLDING_SIMPLE[i].1.iter()))
         .map_err(|i| {
             if i >= CASE_FOLDING_SIMPLE.len() {
                 None
             } else {
                 Some(CASE_FOLDING_SIMPLE[i].0)
             }
         })
 }

 /// Returns true if and only if the given (inclusive) range contains at least
 /// one Unicode scalar value that has a non-empty non-trivial simple case
 /// mapping.
 ///
 /// This function panics if `end < start`.
 pub fn contains_simple_case_mapping(start: char, end: char) -> bool {
     assert!(start <= end);
     CASE_FOLDING_SIMPLE
         .binary_search_by(|&(c, _)| {
             if start <= c && c <= end {
                 Ordering::Equal
             } else if c > end {
                 Ordering::Greater
             } else {
                 Ordering::Less
             }
         }).is_ok()
 }

 /// A query for finding a character class defined by Unicode. This supports
 /// either use of a property name directly, or lookup by property value. The
 /// former generally refers to Binary properties (see UTS#44, Table 8), but
 /// as a special exception (see UTS#18, Section 1.2) both general categories
 /// (an enumeration) and scripts (a catalog) are supported as if each of their
 /// possible values were a binary property.
 ///
 /// In all circumstances, property names and values are normalized and
 /// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
 ///
 /// The lifetime `'a` refers to the shorter of the lifetimes of property name
 /// and property value.
 #[derive(Debug)]
 pub enum ClassQuery<'a> {
     /// Return a class corresponding to a Unicode binary property, named by
     /// a single letter.
     OneLetter(char),
     /// Return a class corresponding to a Unicode binary property.
     ///
     /// Note that, by special exception (see UTS#18, Section 1.2), both
     /// general category values and script values are permitted here as if
     /// they were a binary property.
     Binary(&'a str),
     /// Return a class corresponding to all codepoints whose property
     /// (identified by `property_name`) corresponds to the given value
     /// (identified by `property_value`).
     ByValue {
         /// A property name.
         property_name: &'a str,
         /// A property value.
         property_value: &'a str,
     },
 }

 impl<'a> ClassQuery<'a> {
     fn canonicalize(&self) -> Result<CanonicalClassQuery> {
         match *self {
             ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
             ClassQuery::Binary(name) => self.canonical_binary(name),
             ClassQuery::ByValue { property_name, property_value } => {
                 let property_name = normalize(property_name);
                 let property_value = normalize(property_value);

                 let canon_name = match canonical_prop(&property_name) {
                     None => return Err(Error::PropertyNotFound),
                     Some(canon_name) => canon_name,
                 };
                 Ok(match canon_name {
                     "General_Category" => {
                         let canon = match canonical_gencat(&property_value) {
                             None => return Err(Error::PropertyValueNotFound),
                             Some(canon) => canon,
                         };
                         CanonicalClassQuery::GeneralCategory(canon)
                     }
                     "Script" => {
                         let canon = match canonical_script(&property_value) {
                             None => return Err(Error::PropertyValueNotFound),
                             Some(canon) => canon,
                         };
                         CanonicalClassQuery::Script(canon)
                     }
                     _ => {
                         let vals = match property_values(canon_name) {
                             None => return Err(Error::PropertyValueNotFound),
                             Some(vals) => vals,
                         };
                         let canon_val = match canonical_value(
                             vals,
                             &property_value,
                         ) {
                             None => return Err(Error::PropertyValueNotFound),
                             Some(canon_val) => canon_val,
                         };
                         CanonicalClassQuery::ByValue {
                             property_name: canon_name,
                             property_value: canon_val,
                         }
                     }
                 })
             }
         }
     }

     fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
         let norm = normalize(name);

         if let Some(canon) = canonical_prop(&norm) {
             return Ok(CanonicalClassQuery::Binary(canon));
         }
         if let Some(canon) = canonical_gencat(&norm) {
             return Ok(CanonicalClassQuery::GeneralCategory(canon));
         }
         if let Some(canon) = canonical_script(&norm) {
             return Ok(CanonicalClassQuery::Script(canon));
         }
         Err(Error::PropertyNotFound)
     }
 }

 /// Like ClassQuery, but its parameters have been canonicalized. This also
 /// differentiates binary properties from flattened general categories and
 /// scripts.
 #[derive(Debug, Eq, PartialEq)]
 enum CanonicalClassQuery {
     /// The canonical binary property name.
     Binary(&'static str),
     /// The canonical general category name.
     GeneralCategory(&'static str),
     /// The canonical script name.
     Script(&'static str),
     /// An arbitrary association between property and value, both of which
     /// have been canonicalized.
     ///
     /// Note that by construction, the property name of ByValue will never
     /// be General_Category or Script. Those two cases are subsumed by the
     /// eponymous variants.
     ByValue {
         /// The canonical property name.
         property_name: &'static str,
         /// The canonical property value.
         property_value: &'static str,
     },
 }

 /// Looks up a Unicode class given a query. If one doesn't exist, then
 /// `None` is returned.
 pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
     use self::CanonicalClassQuery::*;

     match try!(query.canonicalize()) {
         Binary(name) => {
             property_set(property_bool::BY_NAME, name)
                 .map(hir_class)
                 .ok_or(Error::PropertyNotFound)
         }
         GeneralCategory("Any") => {
             Ok(hir_class(&[('\0', '\u{10FFFF}')]))
         }
         GeneralCategory("Assigned") => {
             let mut cls =
                 try!(property_set(general_category::BY_NAME, "Unassigned")
                     .map(hir_class)
                     .ok_or(Error::PropertyNotFound));
             cls.negate();
             Ok(cls)
         }
         GeneralCategory("ASCII") => {
             Ok(hir_class(&[('\0', '\x7F')]))
         }
         GeneralCategory(name) => {
             property_set(general_category::BY_NAME, name)
                 .map(hir_class)
                 .ok_or(Error::PropertyValueNotFound)
         }
         Script(name) => {
             property_set(script::BY_NAME, name)
                 .map(hir_class)
                 .ok_or(Error::PropertyValueNotFound)
         }
         ByValue { property_name: "Age", property_value } => {
             let mut class = hir::ClassUnicode::empty();
             for set in try!(ages(property_value)) {
                 class.union(&hir_class(set));
             }
             Ok(class)
         }
         ByValue { property_name: "Script_Extensions", property_value } => {
             property_set(script_extension::BY_NAME, property_value)
                 .map(hir_class)
                 .ok_or(Error::PropertyValueNotFound)
         }
         _ => {
             // What else should we support?
             Err(Error::PropertyNotFound)
         }
     }
 }

 /// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
 pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
     let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
         .iter()
         .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
         .collect();
     hir::ClassUnicode::new(hir_ranges)
 }

 fn canonical_prop(normalized_name: &str) -> Option<&'static str> {
     ucd_util::canonical_property_name(PROPERTY_NAMES, normalized_name)
 }

 fn canonical_gencat(normalized_value: &str) -> Option<&'static str> {
     match normalized_value {
         "any" => Some("Any"),
         "assigned" => Some("Assigned"),
         "ascii" => Some("ASCII"),
         _ => {
             let gencats = property_values("General_Category").unwrap();
             canonical_value(gencats, normalized_value)
         }
     }
 }

 fn canonical_script(normalized_value: &str) -> Option<&'static str> {
     let scripts = property_values("Script").unwrap();
     canonical_value(scripts, normalized_value)
 }

 fn canonical_value(
     vals: PropertyValues,
     normalized_value: &str,
 ) -> Option<&'static str> {
     ucd_util::canonical_property_value(vals, normalized_value)
 }

 fn normalize(x: &str) -> String {
     let mut x = x.to_string();
     ucd_util::symbolic_name_normalize(&mut x);
     x
 }

 fn property_values(
     canonical_property_name: &'static str,
 ) -> Option<PropertyValues>
 {
     ucd_util::property_values(PROPERTY_VALUES, canonical_property_name)
 }

 fn property_set(
     name_map: &'static [(&'static str, &'static [(char, char)])],
     canonical: &'static str,
 ) -> Option<&'static [(char, char)]> {
     name_map
         .binary_search_by_key(&canonical, |x| x.0)
         .ok()
         .map(|i| name_map[i].1)
 }

 /// An iterator over Unicode Age sets. Each item corresponds to a set of
 /// codepoints that were added in a particular revision of Unicode. The
 /// iterator yields items in chronological order.
 #[derive(Debug)]
 struct AgeIter {
     ages: &'static [(&'static str, &'static [(char, char)])],
 }

 fn ages(canonical_age: &str) -> Result<AgeIter> {
     const AGES: &'static [(&'static str, &'static [(char, char)])] = &[
         ("V1_1", age::V1_1),
         ("V2_0", age::V2_0),
         ("V2_1", age::V2_1),
         ("V3_0", age::V3_0),
         ("V3_1", age::V3_1),
         ("V3_2", age::V3_2),
         ("V4_0", age::V4_0),
         ("V4_1", age::V4_1),
         ("V5_0", age::V5_0),
         ("V5_1", age::V5_1),
         ("V5_2", age::V5_2),
         ("V6_0", age::V6_0),
         ("V6_1", age::V6_1),
         ("V6_2", age::V6_2),
         ("V6_3", age::V6_3),
         ("V7_0", age::V7_0),
         ("V8_0", age::V8_0),
         ("V9_0", age::V9_0),
         ("V10_0", age::V10_0),
     ];
     assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");

     let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
     match pos {
         None => Err(Error::PropertyValueNotFound),
         Some(i) => Ok(AgeIter { ages: &AGES[..i+1] }),
     }
 }

 impl Iterator for AgeIter {
     type Item = &'static [(char, char)];

     fn next(&mut self) -> Option<&'static [(char, char)]> {
         if self.ages.is_empty() {
             None
         } else {
             let set = self.ages[0];
             self.ages = &self.ages[1..];
             Some(set.1)
         }
     }
 }

 #[cfg(test)]
 mod tests {
     use super::{contains_simple_case_mapping, simple_fold};

     #[test]
     fn simple_fold_k() {
         let xs: Vec<char> = simple_fold('k').unwrap().collect();
         assert_eq!(xs, vec!['K', 'K']);

         let xs: Vec<char> = simple_fold('K').unwrap().collect();
         assert_eq!(xs, vec!['k', 'K']);

         let xs: Vec<char> = simple_fold('K').unwrap().collect();
         assert_eq!(xs, vec!['K', 'k']);
     }

     #[test]
     fn simple_fold_a() {
         let xs: Vec<char> = simple_fold('a').unwrap().collect();
         assert_eq!(xs, vec!['A']);

         let xs: Vec<char> = simple_fold('A').unwrap().collect();
         assert_eq!(xs, vec!['a']);
     }

     #[test]
     fn simple_fold_empty() {
         assert_eq!(Some('A'), simple_fold('?').unwrap_err());
         assert_eq!(Some('A'), simple_fold('@').unwrap_err());
         assert_eq!(Some('a'), simple_fold('[').unwrap_err());
         assert_eq!(Some('Ⰰ'), simple_fold('☃').unwrap_err());
     }

     #[test]
     fn simple_fold_max() {
         assert_eq!(None, simple_fold('\u{10FFFE}').unwrap_err());
         assert_eq!(None, simple_fold('\u{10FFFF}').unwrap_err());
     }

     #[test]
     fn range_contains() {
         assert!(contains_simple_case_mapping('A', 'A'));
         assert!(contains_simple_case_mapping('Z', 'Z'));
         assert!(contains_simple_case_mapping('A', 'Z'));
         assert!(contains_simple_case_mapping('@', 'A'));
         assert!(contains_simple_case_mapping('Z', '['));
         assert!(contains_simple_case_mapping('☃', 'Ⰰ'));

         assert!(!contains_simple_case_mapping('[', '['));
         assert!(!contains_simple_case_mapping('[', '`'));

         assert!(!contains_simple_case_mapping('☃', '☃'));
     }

     #[test]
     fn regression_466() {
         use super::{CanonicalClassQuery, ClassQuery};

         let q = ClassQuery::OneLetter('C');
         assert_eq!(
             q.canonicalize().unwrap(),
             CanonicalClassQuery::GeneralCategory("Other"));
     }
 }
	use std::cmp::Ordering;
	use std::result;

	use ucd_util::{self, PropertyValues};

	use hir;
	use unicode_tables::age;
	use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
	use unicode_tables::general_category;
	use unicode_tables::property_bool;
	use unicode_tables::property_names::PROPERTY_NAMES;
	use unicode_tables::property_values::PROPERTY_VALUES;
	use unicode_tables::script;
	use unicode_tables::script_extension;

	type Result<T> = result::Result<T, Error>;

	/// An error that occurs when dealing with Unicode.
	///
	/// We don't impl the Error trait here because these always get converted
	/// into other public errors. (This error type isn't exported.)
	#[derive(Debug)]
	pub enum Error {
	PropertyNotFound,
	PropertyValueNotFound,
	}

	/// Encode the given Unicode character to `dst` as a single UTF-8 sequence.
	///
	/// If `dst` is not long enough, then `None` is returned. Otherwise, the number
	/// of bytes written is returned.
	pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option<usize> {
	// TODO: Remove this function once we move to at least Rust 1.15, which
	// provides char::encode_utf8 for us.
	const TAG_CONT: u8 = 0b1000_0000;
	const TAG_TWO: u8 = 0b1100_0000;
	const TAG_THREE: u8 = 0b1110_0000;
	const TAG_FOUR: u8 = 0b1111_0000;

	let code = character as u32;
	if code <= 0x7F && !dst.is_empty() {
	dst[0] = code as u8;
	Some(1)
	} else if code <= 0x7FF && dst.len() >= 2 {
	dst[0] = (code >> 6 & 0x1F) as u8 \| TAG_TWO;
	dst[1] = (code & 0x3F) as u8 \| TAG_CONT;
	Some(2)
	} else if code <= 0xFFFF && dst.len() >= 3 {
	dst[0] = (code >> 12 & 0x0F) as u8 \| TAG_THREE;
	dst[1] = (code >> 6 & 0x3F) as u8 \| TAG_CONT;
	dst[2] = (code & 0x3F) as u8 \| TAG_CONT;
	Some(3)
	} else if dst.len() >= 4 {
	dst[0] = (code >> 18 & 0x07) as u8 \| TAG_FOUR;
	dst[1] = (code >> 12 & 0x3F) as u8 \| TAG_CONT;
	dst[2] = (code >> 6 & 0x3F) as u8 \| TAG_CONT;
	dst[3] = (code & 0x3F) as u8 \| TAG_CONT;
	Some(4)
	} else {
	None
	}
	}

	/// An iterator over a codepoint's simple case equivalence class.
	#[derive(Debug)]
	pub struct SimpleFoldIter(::std::slice::Iter<'static, char>);

	impl Iterator for SimpleFoldIter {
	type Item = char;

	fn next(&mut self) -> Option<char> {
	self.0.next().map(\|c\| *c)
	}
	}

	/// Return an iterator over the equivalence class of simple case mappings
	/// for the given codepoint. The equivalence class does not include the
	/// given codepoint.
	///
	/// If the equivalence class is empty, then this returns the next scalar
	/// value that has a non-empty equivalence class, if it exists. If no such
	/// scalar value exists, then `None` is returned. The point of this behavior
	/// is to permit callers to avoid calling `simple_fold` more than they need
	/// to, since there is some cost to fetching the equivalence class.
	pub fn simple_fold(c: char) -> result::Result<SimpleFoldIter, Option<char>> {
	CASE_FOLDING_SIMPLE
	.binary_search_by_key(&c, \|&(c1, _)\| c1)
	.map(\|i\| SimpleFoldIter(CASE_FOLDING_SIMPLE[i].1.iter()))
	.map_err(\|i\| {
	if i >= CASE_FOLDING_SIMPLE.len() {
	None
	} else {
	Some(CASE_FOLDING_SIMPLE[i].0)
	}
	})
	}

	/// Returns true if and only if the given (inclusive) range contains at least
	/// one Unicode scalar value that has a non-empty non-trivial simple case
	/// mapping.
	///
	/// This function panics if `end < start`.
	pub fn contains_simple_case_mapping(start: char, end: char) -> bool {
	assert!(start <= end);
	CASE_FOLDING_SIMPLE
	.binary_search_by(\|&(c, _)\| {
	if start <= c && c <= end {
	Ordering::Equal
	} else if c > end {
	Ordering::Greater
	} else {
	Ordering::Less
	}
	}).is_ok()
	}

	/// A query for finding a character class defined by Unicode. This supports
	/// either use of a property name directly, or lookup by property value. The
	/// former generally refers to Binary properties (see UTS#44, Table 8), but
	/// as a special exception (see UTS#18, Section 1.2) both general categories
	/// (an enumeration) and scripts (a catalog) are supported as if each of their
	/// possible values were a binary property.
	///
	/// In all circumstances, property names and values are normalized and
	/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
	///
	/// The lifetime `'a` refers to the shorter of the lifetimes of property name
	/// and property value.
	#[derive(Debug)]
	pub enum ClassQuery<'a> {
	/// Return a class corresponding to a Unicode binary property, named by
	/// a single letter.
	OneLetter(char),
	/// Return a class corresponding to a Unicode binary property.
	///
	/// Note that, by special exception (see UTS#18, Section 1.2), both
	/// general category values and script values are permitted here as if
	/// they were a binary property.
	Binary(&'a str),
	/// Return a class corresponding to all codepoints whose property
	/// (identified by `property_name`) corresponds to the given value
	/// (identified by `property_value`).
	ByValue {
	/// A property name.
	property_name: &'a str,
	/// A property value.
	property_value: &'a str,
	},
	}

	impl<'a> ClassQuery<'a> {
	fn canonicalize(&self) -> Result<CanonicalClassQuery> {
	match *self {
	ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
	ClassQuery::Binary(name) => self.canonical_binary(name),
	ClassQuery::ByValue { property_name, property_value } => {
	let property_name = normalize(property_name);
	let property_value = normalize(property_value);

	let canon_name = match canonical_prop(&property_name) {
	None => return Err(Error::PropertyNotFound),
	Some(canon_name) => canon_name,
	};
	Ok(match canon_name {
	"General_Category" => {
	let canon = match canonical_gencat(&property_value) {
	None => return Err(Error::PropertyValueNotFound),
	Some(canon) => canon,
	};
	CanonicalClassQuery::GeneralCategory(canon)
	}
	"Script" => {
	let canon = match canonical_script(&property_value) {
	None => return Err(Error::PropertyValueNotFound),
	Some(canon) => canon,
	};
	CanonicalClassQuery::Script(canon)
	}
	_ => {
	let vals = match property_values(canon_name) {
	None => return Err(Error::PropertyValueNotFound),
	Some(vals) => vals,
	};
	let canon_val = match canonical_value(
	vals,
	&property_value,
	) {
	None => return Err(Error::PropertyValueNotFound),
	Some(canon_val) => canon_val,
	};
	CanonicalClassQuery::ByValue {
	property_name: canon_name,
	property_value: canon_val,
	}
	}
	})
	}
	}
	}

	fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
	let norm = normalize(name);

	if let Some(canon) = canonical_prop(&norm) {
	return Ok(CanonicalClassQuery::Binary(canon));
	}
	if let Some(canon) = canonical_gencat(&norm) {
	return Ok(CanonicalClassQuery::GeneralCategory(canon));
	}
	if let Some(canon) = canonical_script(&norm) {
	return Ok(CanonicalClassQuery::Script(canon));
	}
	Err(Error::PropertyNotFound)
	}
	}

	/// Like ClassQuery, but its parameters have been canonicalized. This also
	/// differentiates binary properties from flattened general categories and
	/// scripts.
	#[derive(Debug, Eq, PartialEq)]
	enum CanonicalClassQuery {
	/// The canonical binary property name.
	Binary(&'static str),
	/// The canonical general category name.
	GeneralCategory(&'static str),
	/// The canonical script name.
	Script(&'static str),
	/// An arbitrary association between property and value, both of which
	/// have been canonicalized.
	///
	/// Note that by construction, the property name of ByValue will never
	/// be General_Category or Script. Those two cases are subsumed by the
	/// eponymous variants.
	ByValue {
	/// The canonical property name.
	property_name: &'static str,
	/// The canonical property value.
	property_value: &'static str,
	},
	}

	/// Looks up a Unicode class given a query. If one doesn't exist, then
	/// `None` is returned.
	pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
	use self::CanonicalClassQuery::*;

	match try!(query.canonicalize()) {
	Binary(name) => {
	property_set(property_bool::BY_NAME, name)
	.map(hir_class)
	.ok_or(Error::PropertyNotFound)
	}
	GeneralCategory("Any") => {
	Ok(hir_class(&[('\0', '\u{10FFFF}')]))
	}
	GeneralCategory("Assigned") => {
	let mut cls =
	try!(property_set(general_category::BY_NAME, "Unassigned")
	.map(hir_class)
	.ok_or(Error::PropertyNotFound));
	cls.negate();
	Ok(cls)
	}
	GeneralCategory("ASCII") => {
	Ok(hir_class(&[('\0', '\x7F')]))
	}
	GeneralCategory(name) => {
	property_set(general_category::BY_NAME, name)
	.map(hir_class)
	.ok_or(Error::PropertyValueNotFound)
	}
	Script(name) => {
	property_set(script::BY_NAME, name)
	.map(hir_class)
	.ok_or(Error::PropertyValueNotFound)
	}
	ByValue { property_name: "Age", property_value } => {
	let mut class = hir::ClassUnicode::empty();
	for set in try!(ages(property_value)) {
	class.union(&hir_class(set));
	}
	Ok(class)
	}
	ByValue { property_name: "Script_Extensions", property_value } => {
	property_set(script_extension::BY_NAME, property_value)
	.map(hir_class)
	.ok_or(Error::PropertyValueNotFound)
	}
	_ => {
	// What else should we support?
	Err(Error::PropertyNotFound)
	}
	}
	}

	/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
	pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
	let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
	.iter()
	.map(\|&(s, e)\| hir::ClassUnicodeRange::new(s, e))
	.collect();
	hir::ClassUnicode::new(hir_ranges)
	}

	fn canonical_prop(normalized_name: &str) -> Option<&'static str> {
	ucd_util::canonical_property_name(PROPERTY_NAMES, normalized_name)
	}

	fn canonical_gencat(normalized_value: &str) -> Option<&'static str> {
	match normalized_value {
	"any" => Some("Any"),
	"assigned" => Some("Assigned"),
	"ascii" => Some("ASCII"),
	_ => {
	let gencats = property_values("General_Category").unwrap();
	canonical_value(gencats, normalized_value)
	}
	}
	}

	fn canonical_script(normalized_value: &str) -> Option<&'static str> {
	let scripts = property_values("Script").unwrap();
	canonical_value(scripts, normalized_value)
	}

	fn canonical_value(
	vals: PropertyValues,
	normalized_value: &str,
	) -> Option<&'static str> {
	ucd_util::canonical_property_value(vals, normalized_value)
	}

	fn normalize(x: &str) -> String {
	let mut x = x.to_string();
	ucd_util::symbolic_name_normalize(&mut x);
	x
	}

	fn property_values(
	canonical_property_name: &'static str,
	) -> Option<PropertyValues>
	{
	ucd_util::property_values(PROPERTY_VALUES, canonical_property_name)
	}

	fn property_set(
	name_map: &'static [(&'static str, &'static [(char, char)])],
	canonical: &'static str,
	) -> Option<&'static [(char, char)]> {
	name_map
	.binary_search_by_key(&canonical, \|x\| x.0)
	.ok()
	.map(\|i\| name_map[i].1)
	}

	/// An iterator over Unicode Age sets. Each item corresponds to a set of
	/// codepoints that were added in a particular revision of Unicode. The
	/// iterator yields items in chronological order.
	#[derive(Debug)]
	struct AgeIter {
	ages: &'static [(&'static str, &'static [(char, char)])],
	}

	fn ages(canonical_age: &str) -> Result<AgeIter> {
	const AGES: &'static [(&'static str, &'static [(char, char)])] = &[
	("V1_1", age::V1_1),
	("V2_0", age::V2_0),
	("V2_1", age::V2_1),
	("V3_0", age::V3_0),
	("V3_1", age::V3_1),
	("V3_2", age::V3_2),
	("V4_0", age::V4_0),
	("V4_1", age::V4_1),
	("V5_0", age::V5_0),
	("V5_1", age::V5_1),
	("V5_2", age::V5_2),
	("V6_0", age::V6_0),
	("V6_1", age::V6_1),
	("V6_2", age::V6_2),
	("V6_3", age::V6_3),
	("V7_0", age::V7_0),
	("V8_0", age::V8_0),
	("V9_0", age::V9_0),
	("V10_0", age::V10_0),
	];
	assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");

	let pos = AGES.iter().position(\|&(age, _)\| canonical_age == age);
	match pos {
	None => Err(Error::PropertyValueNotFound),
	Some(i) => Ok(AgeIter { ages: &AGES[..i+1] }),
	}
	}

	impl Iterator for AgeIter {
	type Item = &'static [(char, char)];

	fn next(&mut self) -> Option<&'static [(char, char)]> {
	if self.ages.is_empty() {
	None
	} else {
	let set = self.ages[0];
	self.ages = &self.ages[1..];
	Some(set.1)
	}
	}
	}

	#[cfg(test)]
	mod tests {
	use super::{contains_simple_case_mapping, simple_fold};

	#[test]
	fn simple_fold_k() {
	let xs: Vec<char> = simple_fold('k').unwrap().collect();
	assert_eq!(xs, vec!['K', 'K']);

	let xs: Vec<char> = simple_fold('K').unwrap().collect();
	assert_eq!(xs, vec!['k', 'K']);

	let xs: Vec<char> = simple_fold('K').unwrap().collect();
	assert_eq!(xs, vec!['K', 'k']);
	}

	#[test]
	fn simple_fold_a() {
	let xs: Vec<char> = simple_fold('a').unwrap().collect();
	assert_eq!(xs, vec!['A']);

	let xs: Vec<char> = simple_fold('A').unwrap().collect();
	assert_eq!(xs, vec!['a']);
	}

	#[test]
	fn simple_fold_empty() {
	assert_eq!(Some('A'), simple_fold('?').unwrap_err());
	assert_eq!(Some('A'), simple_fold('@').unwrap_err());
	assert_eq!(Some('a'), simple_fold('[').unwrap_err());
	assert_eq!(Some('Ⰰ'), simple_fold('☃').unwrap_err());
	}

	#[test]
	fn simple_fold_max() {
	assert_eq!(None, simple_fold('\u{10FFFE}').unwrap_err());
	assert_eq!(None, simple_fold('\u{10FFFF}').unwrap_err());
	}

	#[test]
	fn range_contains() {
	assert!(contains_simple_case_mapping('A', 'A'));
	assert!(contains_simple_case_mapping('Z', 'Z'));
	assert!(contains_simple_case_mapping('A', 'Z'));
	assert!(contains_simple_case_mapping('@', 'A'));
	assert!(contains_simple_case_mapping('Z', '['));
	assert!(contains_simple_case_mapping('☃', 'Ⰰ'));

	assert!(!contains_simple_case_mapping('[', '['));
	assert!(!contains_simple_case_mapping('[', '`'));

	assert!(!contains_simple_case_mapping('☃', '☃'));
	}

	#[test]
	fn regression_466() {
	use super::{CanonicalClassQuery, ClassQuery};

	let q = ClassQuery::OneLetter('C');
	assert_eq!(
	q.canonicalize().unwrap(),
	CanonicalClassQuery::GeneralCategory("Other"));
	}
	}