| use std::ffi::OsStr; |
| #[cfg(not(any(target_os = "windows", target_arch = "wasm32")))] |
| use std::os::unix::ffi::OsStrExt; |
| #[cfg(any(target_os = "windows", target_arch = "wasm32"))] |
| use INVALID_UTF8; |
| |
| #[cfg(any(target_os = "windows", target_arch = "wasm32"))] |
| pub trait OsStrExt3 { |
| fn from_bytes(b: &[u8]) -> &Self; |
| fn as_bytes(&self) -> &[u8]; |
| } |
| |
| #[doc(hidden)] |
| pub trait OsStrExt2 { |
| fn starts_with(&self, s: &[u8]) -> bool; |
| fn split_at_byte(&self, b: u8) -> (&OsStr, &OsStr); |
| fn split_at(&self, i: usize) -> (&OsStr, &OsStr); |
| fn trim_left_matches(&self, b: u8) -> &OsStr; |
| fn contains_byte(&self, b: u8) -> bool; |
| fn split(&self, b: u8) -> OsSplit; |
| } |
| |
| // A starts-with implementation that does not panic when the OsStr contains |
| // invalid Unicode. |
| // |
| // A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can |
| // re-encode it as UTF-16, and ask whether `osstr` starts with the same series |
| // of u16 code units. If `prefix` is not valid UTF-8, then this comparison |
| // isn't meaningful, and we just return false. |
| #[cfg(target_os = "windows")] |
| fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool { |
| use std::os::windows::ffi::OsStrExt; |
| let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) { |
| s |
| } else { |
| return false; |
| }; |
| let mut osstr_units = osstr.encode_wide(); |
| let mut prefix_units = prefix_str.encode_utf16(); |
| loop { |
| match (osstr_units.next(), prefix_units.next()) { |
| // These code units match. Keep looping. |
| (Some(o), Some(p)) if o == p => continue, |
| // We've reached the end of the prefix. It's a match. |
| (_, None) => return true, |
| // Otherwise, it's not a match. |
| _ => return false, |
| } |
| } |
| } |
| |
| #[test] |
| #[cfg(target_os = "windows")] |
| fn test_windows_osstr_starts_with() { |
| use std::ffi::OsString; |
| use std::os::windows::ffi::OsStringExt; |
| |
| fn from_ascii(ascii: &[u8]) -> OsString { |
| let u16_vec: Vec<u16> = ascii.iter().map(|&c| c as u16).collect(); |
| OsString::from_wide(&u16_vec) |
| } |
| |
| // Test all the basic cases. |
| assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc")); |
| assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef")); |
| assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def")); |
| assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd")); |
| |
| // Test the case where the candidate prefix is not valid UTF-8. Note that a |
| // standalone \xff byte is valid ASCII but not valid UTF-8. Thus although |
| // these strings look identical, they do not match. |
| assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff")); |
| |
| // Test the case where the OsString is not valid UTF-16. It should still be |
| // possible to match the valid characters at the front. |
| // |
| // UTF-16 surrogate characters are only valid in pairs. Including one on |
| // the end by itself makes this invalid UTF-16. |
| let surrogate_char: u16 = 0xDC00; |
| let mut invalid_unicode = |
| OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]); |
| assert!( |
| invalid_unicode.to_str().is_none(), |
| "This string is invalid Unicode, and conversion to &str should fail.", |
| ); |
| assert!(windows_osstr_starts_with(&invalid_unicode, b"abc")); |
| assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd")); |
| } |
| |
| #[cfg(any(target_os = "windows", target_arch = "wasm32"))] |
| impl OsStrExt3 for OsStr { |
| fn from_bytes(b: &[u8]) -> &Self { |
| use std::mem; |
| unsafe { mem::transmute(b) } |
| } |
| fn as_bytes(&self) -> &[u8] { |
| self.to_str().map(|s| s.as_bytes()).expect(INVALID_UTF8) |
| } |
| } |
| |
| impl OsStrExt2 for OsStr { |
| fn starts_with(&self, s: &[u8]) -> bool { |
| #[cfg(target_os = "windows")] |
| { |
| // On Windows, the as_bytes() method will panic if the OsStr |
| // contains invalid Unicode. To avoid this, we use a |
| // Windows-specific starts-with function that doesn't rely on |
| // as_bytes(). This is necessary for Windows command line |
| // applications to handle non-Unicode arguments successfully. This |
| // allows common cases like `clap.exe [invalid]` to succeed, though |
| // cases that require string splitting will still fail, like |
| // `clap.exe --arg=[invalid]`. Note that this entire module is |
| // replaced in Clap 3.x, so this workaround is specific to the 2.x |
| // branch. |
| return windows_osstr_starts_with(self, s); |
| } |
| self.as_bytes().starts_with(s) |
| } |
| |
| fn contains_byte(&self, byte: u8) -> bool { |
| for b in self.as_bytes() { |
| if b == &byte { |
| return true; |
| } |
| } |
| false |
| } |
| |
| fn split_at_byte(&self, byte: u8) -> (&OsStr, &OsStr) { |
| for (i, b) in self.as_bytes().iter().enumerate() { |
| if b == &byte { |
| return ( |
| OsStr::from_bytes(&self.as_bytes()[..i]), |
| OsStr::from_bytes(&self.as_bytes()[i + 1..]), |
| ); |
| } |
| } |
| ( |
| &*self, |
| OsStr::from_bytes(&self.as_bytes()[self.len()..self.len()]), |
| ) |
| } |
| |
| fn trim_left_matches(&self, byte: u8) -> &OsStr { |
| let mut found = false; |
| for (i, b) in self.as_bytes().iter().enumerate() { |
| if b != &byte { |
| return OsStr::from_bytes(&self.as_bytes()[i..]); |
| } else { |
| found = true; |
| } |
| } |
| if found { |
| return OsStr::from_bytes(&self.as_bytes()[self.len()..]); |
| } |
| &*self |
| } |
| |
| fn split_at(&self, i: usize) -> (&OsStr, &OsStr) { |
| ( |
| OsStr::from_bytes(&self.as_bytes()[..i]), |
| OsStr::from_bytes(&self.as_bytes()[i..]), |
| ) |
| } |
| |
| fn split(&self, b: u8) -> OsSplit { |
| OsSplit { |
| sep: b, |
| val: self.as_bytes(), |
| pos: 0, |
| } |
| } |
| } |
| |
| #[doc(hidden)] |
| #[derive(Clone, Debug)] |
| pub struct OsSplit<'a> { |
| sep: u8, |
| val: &'a [u8], |
| pos: usize, |
| } |
| |
| impl<'a> Iterator for OsSplit<'a> { |
| type Item = &'a OsStr; |
| |
| fn next(&mut self) -> Option<&'a OsStr> { |
| debugln!("OsSplit::next: self={:?}", self); |
| if self.pos == self.val.len() { |
| return None; |
| } |
| let start = self.pos; |
| for b in &self.val[start..] { |
| self.pos += 1; |
| if *b == self.sep { |
| return Some(OsStr::from_bytes(&self.val[start..self.pos - 1])); |
| } |
| } |
| Some(OsStr::from_bytes(&self.val[start..])) |
| } |
| } |