src/osstringext.rs - platform/external/rust/crates/clap - Git at Google

 use std::ffi::OsStr;
 #[cfg(not(any(target_os = "windows", target_arch = "wasm32")))]
 use std::os::unix::ffi::OsStrExt;
 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
 use INVALID_UTF8;

 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
 pub trait OsStrExt3 {
     fn from_bytes(b: &[u8]) -> &Self;
     fn as_bytes(&self) -> &[u8];
 }

 #[doc(hidden)]
 pub trait OsStrExt2 {
     fn starts_with(&self, s: &[u8]) -> bool;
     fn split_at_byte(&self, b: u8) -> (&OsStr, &OsStr);
     fn split_at(&self, i: usize) -> (&OsStr, &OsStr);
     fn trim_left_matches(&self, b: u8) -> &OsStr;
     fn contains_byte(&self, b: u8) -> bool;
     fn split(&self, b: u8) -> OsSplit;
 }

 // A starts-with implementation that does not panic when the OsStr contains
 // invalid Unicode.
 //
 // A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can
 // re-encode it as UTF-16, and ask whether `osstr` starts with the same series
 // of u16 code units. If `prefix` is not valid UTF-8, then this comparison
 // isn't meaningful, and we just return false.
 #[cfg(target_os = "windows")]
 fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool {
     use std::os::windows::ffi::OsStrExt;
     let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) {
         s
     } else {
         return false;
     };
     let mut osstr_units = osstr.encode_wide();
     let mut prefix_units = prefix_str.encode_utf16();
     loop {
         match (osstr_units.next(), prefix_units.next()) {
             // These code units match. Keep looping.
             (Some(o), Some(p)) if o == p => continue,
             // We've reached the end of the prefix. It's a match.
             (_, None) => return true,
             // Otherwise, it's not a match.
             _ => return false,
         }
     }
 }

 #[test]
 #[cfg(target_os = "windows")]
 fn test_windows_osstr_starts_with() {
     use std::ffi::OsString;
     use std::os::windows::ffi::OsStringExt;

     fn from_ascii(ascii: &[u8]) -> OsString {
         let u16_vec: Vec<u16> = ascii.iter().map(|&c| c as u16).collect();
         OsString::from_wide(&u16_vec)
     }

     // Test all the basic cases.
     assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc"));
     assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef"));
     assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def"));
     assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd"));

     // Test the case where the candidate prefix is not valid UTF-8. Note that a
     // standalone \xff byte is valid ASCII but not valid UTF-8. Thus although
     // these strings look identical, they do not match.
     assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff"));

     // Test the case where the OsString is not valid UTF-16. It should still be
     // possible to match the valid characters at the front.
     //
     // UTF-16 surrogate characters are only valid in pairs. Including one on
     // the end by itself makes this invalid UTF-16.
     let surrogate_char: u16 = 0xDC00;
     let mut invalid_unicode =
         OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]);
     assert!(
         invalid_unicode.to_str().is_none(),
         "This string is invalid Unicode, and conversion to &str should fail.",
     );
     assert!(windows_osstr_starts_with(&invalid_unicode, b"abc"));
     assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd"));
 }

 #[cfg(any(target_os = "windows", target_arch = "wasm32"))]
 impl OsStrExt3 for OsStr {
     fn from_bytes(b: &[u8]) -> &Self {
         use std::mem;
         unsafe { mem::transmute(b) }
     }
     fn as_bytes(&self) -> &[u8] {
         self.to_str().map(|s| s.as_bytes()).expect(INVALID_UTF8)
     }
 }

 impl OsStrExt2 for OsStr {
     fn starts_with(&self, s: &[u8]) -> bool {
         #[cfg(target_os = "windows")]
         {
             // On Windows, the as_bytes() method will panic if the OsStr
             // contains invalid Unicode. To avoid this, we use a
             // Windows-specific starts-with function that doesn't rely on
             // as_bytes(). This is necessary for Windows command line
             // applications to handle non-Unicode arguments successfully. This
             // allows common cases like `clap.exe [invalid]` to succeed, though
             // cases that require string splitting will still fail, like
             // `clap.exe --arg=[invalid]`. Note that this entire module is
             // replaced in Clap 3.x, so this workaround is specific to the 2.x
             // branch.
             return windows_osstr_starts_with(self, s);
         }
         self.as_bytes().starts_with(s)
     }

     fn contains_byte(&self, byte: u8) -> bool {
         for b in self.as_bytes() {
             if b == &byte {
                 return true;
             }
         }
         false
     }

     fn split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr) {
         for (i, b) in self.as_bytes().iter().enumerate() {
             if b == &byte {
                 return (
                     OsStr::from_bytes(&self.as_bytes()[..i]),
                     OsStr::from_bytes(&self.as_bytes()[i + 1..]),
                 );
             }
         }
         (
             &*self,
             OsStr::from_bytes(&self.as_bytes()[self.len()..self.len()]),
         )
     }

     fn trim_left_matches(&self, byte: u8) -> &OsStr {
         let mut found = false;
         for (i, b) in self.as_bytes().iter().enumerate() {
             if b != &byte {
                 return OsStr::from_bytes(&self.as_bytes()[i..]);
             } else {
                 found = true;
             }
         }
         if found {
             return OsStr::from_bytes(&self.as_bytes()[self.len()..]);
         }
         &*self
     }

     fn split_at(&self, i: usize) -> (&OsStr, &OsStr) {
         (
             OsStr::from_bytes(&self.as_bytes()[..i]),
             OsStr::from_bytes(&self.as_bytes()[i..]),
         )
     }

     fn split(&self, b: u8) -> OsSplit {
         OsSplit {
             sep: b,
             val: self.as_bytes(),
             pos: 0,
         }
     }
 }

 #[doc(hidden)]
 #[derive(Clone, Debug)]
 pub struct OsSplit<'a> {
     sep: u8,
     val: &'a [u8],
     pos: usize,
 }

 impl<'a> Iterator for OsSplit<'a> {
     type Item = &'a OsStr;

     fn next(&mut self) -> Option<&'a OsStr> {
         debugln!("OsSplit::next: self={:?}", self);
         if self.pos == self.val.len() {
             return None;
         }
         let start = self.pos;
         for b in &self.val[start..] {
             self.pos += 1;
             if *b == self.sep {
                 return Some(OsStr::from_bytes(&self.val[start..self.pos - 1]));
             }
         }
         Some(OsStr::from_bytes(&self.val[start..]))
     }
 }
	use std::ffi::OsStr;
	#[cfg(not(any(target_os = "windows", target_arch = "wasm32")))]
	use std::os::unix::ffi::OsStrExt;
	#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
	use INVALID_UTF8;

	#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
	pub trait OsStrExt3 {
	fn from_bytes(b: &[u8]) -> &Self;
	fn as_bytes(&self) -> &[u8];
	}

	#[doc(hidden)]
	pub trait OsStrExt2 {
	fn starts_with(&self, s: &[u8]) -> bool;
	fn split_at_byte(&self, b: u8) -> (&OsStr, &OsStr);
	fn split_at(&self, i: usize) -> (&OsStr, &OsStr);
	fn trim_left_matches(&self, b: u8) -> &OsStr;
	fn contains_byte(&self, b: u8) -> bool;
	fn split(&self, b: u8) -> OsSplit;
	}

	// A starts-with implementation that does not panic when the OsStr contains
	// invalid Unicode.
	//
	// A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can
	// re-encode it as UTF-16, and ask whether `osstr` starts with the same series
	// of u16 code units. If `prefix` is not valid UTF-8, then this comparison
	// isn't meaningful, and we just return false.
	#[cfg(target_os = "windows")]
	fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool {
	use std::os::windows::ffi::OsStrExt;
	let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) {
	s
	} else {
	return false;
	};
	let mut osstr_units = osstr.encode_wide();
	let mut prefix_units = prefix_str.encode_utf16();
	loop {
	match (osstr_units.next(), prefix_units.next()) {
	// These code units match. Keep looping.
	(Some(o), Some(p)) if o == p => continue,
	// We've reached the end of the prefix. It's a match.
	(_, None) => return true,
	// Otherwise, it's not a match.
	_ => return false,
	}
	}
	}

	#[test]
	#[cfg(target_os = "windows")]
	fn test_windows_osstr_starts_with() {
	use std::ffi::OsString;
	use std::os::windows::ffi::OsStringExt;

	fn from_ascii(ascii: &[u8]) -> OsString {
	let u16_vec: Vec<u16> = ascii.iter().map(\|&c\| c as u16).collect();
	OsString::from_wide(&u16_vec)
	}

	// Test all the basic cases.
	assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc"));
	assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef"));
	assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def"));
	assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd"));

	// Test the case where the candidate prefix is not valid UTF-8. Note that a
	// standalone \xff byte is valid ASCII but not valid UTF-8. Thus although
	// these strings look identical, they do not match.
	assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff"));

	// Test the case where the OsString is not valid UTF-16. It should still be
	// possible to match the valid characters at the front.
	//
	// UTF-16 surrogate characters are only valid in pairs. Including one on
	// the end by itself makes this invalid UTF-16.
	let surrogate_char: u16 = 0xDC00;
	let mut invalid_unicode =
	OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]);
	assert!(
	invalid_unicode.to_str().is_none(),
	"This string is invalid Unicode, and conversion to &str should fail.",
	);
	assert!(windows_osstr_starts_with(&invalid_unicode, b"abc"));
	assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd"));
	}

	#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
	impl OsStrExt3 for OsStr {
	fn from_bytes(b: &[u8]) -> &Self {
	use std::mem;
	unsafe { mem::transmute(b) }
	}
	fn as_bytes(&self) -> &[u8] {
	self.to_str().map(\|s\| s.as_bytes()).expect(INVALID_UTF8)
	}
	}

	impl OsStrExt2 for OsStr {
	fn starts_with(&self, s: &[u8]) -> bool {
	#[cfg(target_os = "windows")]
	{
	// On Windows, the as_bytes() method will panic if the OsStr
	// contains invalid Unicode. To avoid this, we use a
	// Windows-specific starts-with function that doesn't rely on
	// as_bytes(). This is necessary for Windows command line
	// applications to handle non-Unicode arguments successfully. This
	// allows common cases like `clap.exe [invalid]` to succeed, though
	// cases that require string splitting will still fail, like
	// `clap.exe --arg=[invalid]`. Note that this entire module is
	// replaced in Clap 3.x, so this workaround is specific to the 2.x
	// branch.
	return windows_osstr_starts_with(self, s);
	}
	self.as_bytes().starts_with(s)
	}

	fn contains_byte(&self, byte: u8) -> bool {
	for b in self.as_bytes() {
	if b == &byte {
	return true;
	}
	}
	false
	}

	fn split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr) {
	for (i, b) in self.as_bytes().iter().enumerate() {
	if b == &byte {
	return (
	OsStr::from_bytes(&self.as_bytes()[..i]),
	OsStr::from_bytes(&self.as_bytes()[i + 1..]),
	);
	}
	}
	(
	&*self,
	OsStr::from_bytes(&self.as_bytes()[self.len()..self.len()]),
	)
	}

	fn trim_left_matches(&self, byte: u8) -> &OsStr {
	let mut found = false;
	for (i, b) in self.as_bytes().iter().enumerate() {
	if b != &byte {
	return OsStr::from_bytes(&self.as_bytes()[i..]);
	} else {
	found = true;
	}
	}
	if found {
	return OsStr::from_bytes(&self.as_bytes()[self.len()..]);
	}
	&*self
	}

	fn split_at(&self, i: usize) -> (&OsStr, &OsStr) {
	(
	OsStr::from_bytes(&self.as_bytes()[..i]),
	OsStr::from_bytes(&self.as_bytes()[i..]),
	)
	}

	fn split(&self, b: u8) -> OsSplit {
	OsSplit {
	sep: b,
	val: self.as_bytes(),
	pos: 0,
	}
	}
	}

	#[doc(hidden)]
	#[derive(Clone, Debug)]
	pub struct OsSplit<'a> {
	sep: u8,
	val: &'a [u8],
	pos: usize,
	}

	impl<'a> Iterator for OsSplit<'a> {
	type Item = &'a OsStr;

	fn next(&mut self) -> Option<&'a OsStr> {
	debugln!("OsSplit::next: self={:?}", self);
	if self.pos == self.val.len() {
	return None;
	}
	let start = self.pos;
	for b in &self.val[start..] {
	self.pos += 1;
	if *b == self.sep {
	return Some(OsStr::from_bytes(&self.val[start..self.pos - 1]));
	}
	}
	Some(OsStr::from_bytes(&self.val[start..]))
	}
	}