| // Copyright 2015 Nicholas Allegra (comex). |
| // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or |
| // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be |
| // copied, modified, or distributed except according to those terms. |
| |
| //! Parse strings like, and escape strings for, POSIX shells. |
| //! |
| //! Same idea as (but implementation not directly based on) the Python shlex module. |
| //! |
| //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in |
| //! `no_std` environments, where the `alloc` crate, and a global allocator, are available. |
| //! |
| //! ## <span style="color:red">Warning</span> |
| //! |
| //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they |
| //! cannot be quoted portably). |
| //! |
| //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or |
| //! even scripts `source`d from interactive shells). |
| //! |
| //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce |
| //! ugly outputs (which may not be copy-pastable). |
| //! |
| //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin |
| //! of an interactive shell, you should stop, because control characters can lead to arbitrary |
| //! command injection. |
| //! |
| //! For more information, and for information about more minor issues, please see [quoting_warning]. |
| //! |
| //! ## Compatibility |
| //! |
| //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**; |
| //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not |
| //! POSIX-compatible but close enough). |
| //! |
| //! It also aims to be compatible with Python `shlex` and C `wordexp`. |
| |
| #![cfg_attr(not(feature = "std"), no_std)] |
| |
| extern crate alloc; |
| use alloc::vec::Vec; |
| use alloc::borrow::Cow; |
| use alloc::string::String; |
| #[cfg(test)] |
| use alloc::vec; |
| #[cfg(test)] |
| use alloc::borrow::ToOwned; |
| |
| pub mod bytes; |
| #[cfg(all(doc, not(doctest)))] |
| #[path = "quoting_warning.md"] |
| pub mod quoting_warning; |
| |
| /// An iterator that takes an input string and splits it into the words using the same syntax as |
| /// the POSIX shell. |
| /// |
| /// See [`bytes::Shlex`]. |
| pub struct Shlex<'a>(bytes::Shlex<'a>); |
| |
| impl<'a> Shlex<'a> { |
| pub fn new(in_str: &'a str) -> Self { |
| Self(bytes::Shlex::new(in_str.as_bytes())) |
| } |
| } |
| |
| impl<'a> Iterator for Shlex<'a> { |
| type Item = String; |
| fn next(&mut self) -> Option<String> { |
| self.0.next().map(|byte_word| { |
| // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8. |
| unsafe { String::from_utf8_unchecked(byte_word) } |
| }) |
| } |
| } |
| |
| impl<'a> core::ops::Deref for Shlex<'a> { |
| type Target = bytes::Shlex<'a>; |
| |
| fn deref(&self) -> &Self::Target { |
| &self.0 |
| } |
| } |
| |
| impl<'a> core::ops::DerefMut for Shlex<'a> { |
| fn deref_mut(&mut self) -> &mut Self::Target { |
| &mut self.0 |
| } |
| } |
| |
| /// Convenience function that consumes the whole string at once. Returns None if the input was |
| /// erroneous. |
| pub fn split(in_str: &str) -> Option<Vec<String>> { |
| let mut shl = Shlex::new(in_str); |
| let res = shl.by_ref().collect(); |
| if shl.had_error { None } else { Some(res) } |
| } |
| |
| /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts). |
| /// |
| /// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call |
| /// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the |
| /// future will not be enabled by default; they will be enabled through corresponding non-default |
| /// [`Quoter`] options. |
| /// |
| /// ...In theory. In the unlikely event that additional classes of inputs are discovered that, |
| /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk |
| /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by |
| /// default. |
| #[non_exhaustive] |
| #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
| pub enum QuoteError { |
| /// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings |
| /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if |
| /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let |
| /// them pass through. |
| Nul, |
| } |
| |
| impl core::fmt::Display for QuoteError { |
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| match self { |
| QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"), |
| } |
| } |
| } |
| |
| #[cfg(feature = "std")] |
| impl std::error::Error for QuoteError {} |
| |
| /// A more configurable interface to quote strings. If you only want the default settings you can |
| /// use the convenience functions [`try_quote`] and [`try_join`]. |
| /// |
| /// The bytes equivalent is [`bytes::Quoter`]. |
| #[derive(Default, Debug, Clone)] |
| pub struct Quoter { |
| inner: bytes::Quoter, |
| } |
| |
| impl Quoter { |
| /// Create a new [`Quoter`] with default settings. |
| #[inline] |
| pub fn new() -> Self { |
| Self::default() |
| } |
| |
| /// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not |
| /// allowed and will result in an error of [`QuoteError::Nul`]. |
| #[inline] |
| pub fn allow_nul(mut self, allow: bool) -> Self { |
| self.inner = self.inner.allow_nul(allow); |
| self |
| } |
| |
| /// Convenience function that consumes an iterable of words and turns it into a single string, |
| /// quoting words when necessary. Consecutive words will be separated by a single space. |
| pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> { |
| // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8. |
| self.inner.join(words.into_iter().map(|s| s.as_bytes())) |
| .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) }) |
| } |
| |
| /// Given a single word, return a string suitable to encode it as a shell argument. |
| pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> { |
| Ok(match self.inner.quote(in_str.as_bytes())? { |
| Cow::Borrowed(out) => { |
| // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
| unsafe { core::str::from_utf8_unchecked(out) }.into() |
| } |
| Cow::Owned(out) => { |
| // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8. |
| unsafe { String::from_utf8_unchecked(out) }.into() |
| } |
| }) |
| } |
| } |
| |
| impl From<bytes::Quoter> for Quoter { |
| fn from(inner: bytes::Quoter) -> Quoter { |
| Quoter { inner } |
| } |
| } |
| |
| impl From<Quoter> for bytes::Quoter { |
| fn from(quoter: Quoter) -> bytes::Quoter { |
| quoter.inner |
| } |
| } |
| |
| /// Convenience function that consumes an iterable of words and turns it into a single string, |
| /// quoting words when necessary. Consecutive words will be separated by a single space. |
| /// |
| /// Uses default settings except that nul bytes are passed through, which [may be |
| /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
| /// |
| /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter). |
| /// |
| /// (That configuration never returns `Err`, so this function does not panic.) |
| /// |
| /// The bytes equivalent is [bytes::join]. |
| #[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")] |
| pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { |
| Quoter::new().allow_nul(true).join(words).unwrap() |
| } |
| |
| /// Convenience function that consumes an iterable of words and turns it into a single string, |
| /// quoting words when necessary. Consecutive words will be separated by a single space. |
| /// |
| /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
| /// |
| /// Equivalent to [`Quoter::new().join(words)`](Quoter). |
| /// |
| /// The bytes equivalent is [bytes::try_join]. |
| pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> { |
| Quoter::new().join(words) |
| } |
| |
| /// Given a single word, return a string suitable to encode it as a shell argument. |
| /// |
| /// Uses default settings except that nul bytes are passed through, which [may be |
| /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated. |
| /// |
| /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter). |
| /// |
| /// (That configuration never returns `Err`, so this function does not panic.) |
| /// |
| /// The bytes equivalent is [bytes::quote]. |
| #[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")] |
| pub fn quote(in_str: &str) -> Cow<str> { |
| Quoter::new().allow_nul(true).quote(in_str).unwrap() |
| } |
| |
| /// Given a single word, return a string suitable to encode it as a shell argument. |
| /// |
| /// Uses default settings. The only error that can be returned is [`QuoteError::Nul`]. |
| /// |
| /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter). |
| /// |
| /// (That configuration never returns `Err`, so this function does not panic.) |
| /// |
| /// The bytes equivalent is [bytes::try_quote]. |
| pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> { |
| Quoter::new().quote(in_str) |
| } |
| |
| #[cfg(test)] |
| static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ |
| ("foo$baz", Some(&["foo$baz"])), |
| ("foo baz", Some(&["foo", "baz"])), |
| ("foo\"bar\"baz", Some(&["foobarbaz"])), |
| ("foo \"bar\"baz", Some(&["foo", "barbaz"])), |
| (" foo \nbar", Some(&["foo", "bar"])), |
| ("foo\\\nbar", Some(&["foobar"])), |
| ("\"foo\\\nbar\"", Some(&["foobar"])), |
| ("'baz\\$b'", Some(&["baz\\$b"])), |
| ("'baz\\\''", None), |
| ("\\", None), |
| ("\"\\", None), |
| ("'\\", None), |
| ("\"", None), |
| ("'", None), |
| ("foo #bar\nbaz", Some(&["foo", "baz"])), |
| ("foo #bar", Some(&["foo"])), |
| ("foo#bar", Some(&["foo#bar"])), |
| ("foo\"#bar", None), |
| ("'\\n'", Some(&["\\n"])), |
| ("'\\\\n'", Some(&["\\\\n"])), |
| ]; |
| |
| #[test] |
| fn test_split() { |
| for &(input, output) in SPLIT_TEST_ITEMS { |
| assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); |
| } |
| } |
| |
| #[test] |
| fn test_lineno() { |
| let mut sh = Shlex::new("\nfoo\nbar"); |
| while let Some(word) = sh.next() { |
| if word == "bar" { |
| assert_eq!(sh.line_no, 3); |
| } |
| } |
| } |
| |
| #[test] |
| #[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))] |
| fn test_quote() { |
| // This is a list of (unquoted, quoted) pairs. |
| // But it's using a single long (raw) string literal with an ad-hoc format, just because it's |
| // hard to read if we have to put the test strings through Rust escaping on top of the escaping |
| // being tested. (Even raw string literals are noisy for short strings). |
| // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences. |
| let tests = r#" |
| <> => <''> |
| <foobar> => <foobar> |
| <foo bar> => <'foo bar'> |
| <"foo bar'"> => <"\"foo bar'\""> |
| <'foo bar'> => <"'foo bar'"> |
| <"> => <'"'> |
| <"'> => <"\"'"> |
| <hello!world> => <'hello!world'> |
| <'hello!world> => <"'hello"'!world'> |
| <'hello!> => <"'hello"'!'> |
| <hello ^ world> => <'hello ''^ world'> |
| <hello^> => <hello'^'> |
| <!world'> => <'!world'"'"> |
| <{a, b}> => <'{a, b}'> |
| <NL> => <'NL'> |
| <^> => <'^'> |
| <foo^bar> => <foo'^bar'> |
| <NLx^> => <'NLx''^'> |
| <NL^x> => <'NL''^x'> |
| <NL ^x> => <'NL ''^x'> |
| <{a,b}> => <'{a,b}'> |
| <a,b> => <'a,b'> |
| <a..b => <a..b> |
| <'$> => <"'"'$'> |
| <"^> => <'"''^'> |
| "#; |
| let mut ok = true; |
| for test in tests.trim().split('\n') { |
| let parts: Vec<String> = test |
| .replace("NL", "\n") |
| .split("=>") |
| .map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned()) |
| .collect(); |
| assert!(parts.len() == 2); |
| let unquoted = &*parts[0]; |
| let quoted_expected = &*parts[1]; |
| let quoted_actual = try_quote(&parts[0]).unwrap(); |
| if quoted_expected != quoted_actual { |
| #[cfg(not(feature = "std"))] |
| panic!("FAIL: for input <{}>, expected <{}>, got <{}>", |
| unquoted, quoted_expected, quoted_actual); |
| #[cfg(feature = "std")] |
| println!("FAIL: for input <{}>, expected <{}>, got <{}>", |
| unquoted, quoted_expected, quoted_actual); |
| ok = false; |
| } |
| } |
| assert!(ok); |
| } |
| |
| #[test] |
| #[allow(deprecated)] |
| fn test_join() { |
| assert_eq!(join(vec![]), ""); |
| assert_eq!(join(vec![""]), "''"); |
| assert_eq!(join(vec!["a", "b"]), "a b"); |
| assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz"); |
| } |
| |
| #[test] |
| fn test_fallible() { |
| assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul)); |
| assert_eq!(try_quote("\0"), Err(QuoteError::Nul)); |
| } |