blob: 6c07418032d6cf3909ed3c0cddfcdc85988e3811 [file] [log] [blame] [edit]
//! A pathologically simple command line argument parser.
//!
//! Most argument parsers are declarative: you tell them what to parse,
//! and they do it.
//!
//! This one provides you with a stream of options and values and lets you
//! figure out the rest.
//!
//! ## Example
//! ```no_run
//! struct Args {
//! thing: String,
//! number: u32,
//! shout: bool,
//! }
//!
//! fn parse_args() -> Result<Args, lexopt::Error> {
//! use lexopt::prelude::*;
//!
//! let mut thing = None;
//! let mut number = 1;
//! let mut shout = false;
//! let mut parser = lexopt::Parser::from_env();
//! while let Some(arg) = parser.next()? {
//! match arg {
//! Short('n') | Long("number") => {
//! number = parser.value()?.parse()?;
//! }
//! Long("shout") => {
//! shout = true;
//! }
//! Value(val) if thing.is_none() => {
//! thing = Some(val.string()?);
//! }
//! Long("help") => {
//! println!("Usage: hello [-n|--number=NUM] [--shout] THING");
//! std::process::exit(0);
//! }
//! _ => return Err(arg.unexpected()),
//! }
//! }
//!
//! Ok(Args {
//! thing: thing.ok_or("missing argument THING")?,
//! number,
//! shout,
//! })
//! }
//!
//! fn main() -> Result<(), lexopt::Error> {
//! let args = parse_args()?;
//! let mut message = format!("Hello {}", args.thing);
//! if args.shout {
//! message = message.to_uppercase();
//! }
//! for _ in 0..args.number {
//! println!("{}", message);
//! }
//! Ok(())
//! }
//! ```
//! Let's walk through this:
//! - We start parsing with [`Parser::from_env`].
//! - We call [`parser.next()`][Parser::next] in a loop to get all the arguments until they run out.
//! - We match on arguments. [`Short`][Arg::Short] and [`Long`][Arg::Long] indicate an option.
//! - To get the value that belongs to an option (like `10` in `-n 10`) we call [`parser.value()`][Parser::value].
//! - This returns a standard [`OsString`][std::ffi::OsString].
//! - For convenience, [`use lexopt::prelude::*`][prelude] adds a [`.parse()`][ValueExt::parse] method, analogous to [`str::parse`].
//! - Calling `parser.value()` is how we tell `Parser` that `-n` takes a value at all.
//! - `Value` indicates a free-standing argument.
//! - `if thing.is_none()` is a useful pattern for positional arguments. If we already found `thing` we pass it on to another case.
//! - It also contains an `OsString`.
//! - The [`.string()`][ValueExt::string] method decodes it into a plain `String`.
//! - If we don't know what to do with an argument we use [`return Err(arg.unexpected())`][Arg::unexpected] to turn it into an error message.
//! - Strings can be promoted to errors for custom error messages.
#![forbid(unsafe_code)]
#![warn(missing_docs, missing_debug_implementations, elided_lifetimes_in_paths)]
#![allow(clippy::should_implement_trait)]
use std::{
ffi::{OsStr, OsString},
fmt::Display,
mem::replace,
str::FromStr,
};
#[cfg(unix)]
use std::os::unix::ffi::{OsStrExt, OsStringExt};
#[cfg(target_os = "wasi")]
use std::os::wasi::ffi::{OsStrExt, OsStringExt};
#[cfg(windows)]
use std::os::windows::ffi::{OsStrExt, OsStringExt};
type InnerIter = std::vec::IntoIter<OsString>;
fn make_iter(iter: impl Iterator<Item = OsString>) -> InnerIter {
iter.collect::<Vec<_>>().into_iter()
}
/// A parser for command line arguments.
#[derive(Debug, Clone)]
pub struct Parser {
source: InnerIter,
state: State,
/// The last option we emitted.
last_option: LastOption,
/// The name of the command (argv\[0\]).
bin_name: Option<String>,
}
#[derive(Debug, Clone)]
enum State {
/// Nothing interesting is going on.
None,
/// We have a value left over from --option=value.
PendingValue(OsString),
/// We're in the middle of -abc.
///
/// On Windows and other non-UTF8-OsString platforms this Vec should
/// only ever contain valid UTF-8 (and could instead be a String).
Shorts(Vec<u8>, usize),
#[cfg(windows)]
/// We're in the middle of -ab� on Windows (invalid UTF-16).
ShortsU16(Vec<u16>, usize),
/// We saw -- and know no more options are coming.
FinishedOpts,
}
/// We use this to keep track of the last emitted option, for error messages when
/// an expected value is not found.
///
/// We also use this as storage for long options so we can hand out &str
/// (because String doesn't support pattern matching).
#[derive(Debug, Clone)]
enum LastOption {
None,
Short(char),
Long(String),
}
/// A command line argument found by [`Parser`], either an option or a positional argument.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Arg<'a> {
/// A short option, e.g. `Short('q')` for `-q`.
Short(char),
/// A long option, e.g. `Long("verbose")` for `--verbose`. (The dashes are not included.)
Long(&'a str),
/// A positional argument, e.g. `/dev/null`.
Value(OsString),
}
impl Parser {
/// Get the next option or positional argument.
///
/// A return value of `Ok(None)` means the command line has been exhausted.
///
/// Options that are not valid unicode are transformed with replacement
/// characters as by [`String::from_utf8_lossy`].
///
/// # Errors
///
/// [`Error::UnexpectedValue`] is returned if the last option had a
/// value that hasn't been consumed, as in `--option=value` or `-o=value`.
///
/// It's possible to continue parsing after an error (but this is rarely useful).
pub fn next(&mut self) -> Result<Option<Arg<'_>>, Error> {
match self.state {
State::PendingValue(ref mut value) => {
// Last time we got `--long=value`, and `value` hasn't been used.
let value = replace(value, OsString::new());
self.state = State::None;
return Err(Error::UnexpectedValue {
option: self
.format_last_option()
.expect("Should only have pending value after long option"),
value,
});
}
State::Shorts(ref arg, ref mut pos) => {
// We're somewhere inside a -abc chain. Because we're in .next(),
// not .value(), we can assume that the next character is another option.
match first_codepoint(&arg[*pos..]) {
Ok(None) => {
self.state = State::None;
}
// If we find "-=[...]" we interpret it as an option.
// If we find "-o=..." then there's an unexpected value.
// ('-=' as an option exists, see https://linux.die.net/man/1/a2ps.)
// clap always interprets it as a short flag in this case, but
// that feels sloppy.
Ok(Some('=')) if *pos > 1 => {
return Err(Error::UnexpectedValue {
option: self.format_last_option().unwrap(),
value: self.optional_value().unwrap(),
});
}
Ok(Some(ch)) => {
*pos += ch.len_utf8();
self.last_option = LastOption::Short(ch);
return Ok(Some(Arg::Short(ch)));
}
Err(_) => {
// Advancing may allow recovery.
// This is a little iffy, there might be more bad unicode next.
// The standard library may turn multiple bytes into a single
// replacement character, but we don't imitate that.
*pos += 1;
self.last_option = LastOption::Short('�');
return Ok(Some(Arg::Short('�')));
}
}
}
#[cfg(windows)]
State::ShortsU16(ref arg, ref mut pos) => match first_utf16_codepoint(&arg[*pos..]) {
Ok(None) => {
self.state = State::None;
}
Ok(Some('=')) if *pos > 1 => {
return Err(Error::UnexpectedValue {
option: self.format_last_option().unwrap(),
value: self.optional_value().unwrap(),
});
}
Ok(Some(ch)) => {
*pos += ch.len_utf16();
self.last_option = LastOption::Short(ch);
return Ok(Some(Arg::Short(ch)));
}
Err(_) => {
*pos += 1;
self.last_option = LastOption::Short('�');
return Ok(Some(Arg::Short('�')));
}
},
State::FinishedOpts => {
return Ok(self.source.next().map(Arg::Value));
}
State::None => (),
}
match self.state {
State::None => (),
ref state => panic!("unexpected state {:?}", state),
}
let arg = match self.source.next() {
Some(arg) => arg,
None => return Ok(None),
};
if arg == "--" {
self.state = State::FinishedOpts;
return self.next();
}
#[cfg(any(unix, target_os = "wasi"))]
{
// Fast solution for platforms where OsStrings are just UTF-8-ish bytes
let mut arg = arg.into_vec();
if arg.starts_with(b"--") {
// Long options have two forms: --option and --option=value.
if let Some(ind) = arg.iter().position(|&b| b == b'=') {
// The value can be an OsString...
self.state = State::PendingValue(OsString::from_vec(arg[ind + 1..].into()));
arg.truncate(ind);
}
// ...but the option has to be a string.
// String::from_utf8_lossy().into_owned() would work, but its
// return type is Cow: if the original was valid a borrowed
// version is returned, and then into_owned() does an
// unnecessary copy.
// By trying String::from_utf8 first we avoid that copy if arg
// is already UTF-8 (which is most of the time).
// reqwest does a similar maneuver more efficiently with unsafe:
// https://github.com/seanmonstar/reqwest/blob/e6a1a09f0904e06de4ff1317278798c4ed28af66/src/async_impl/response.rs#L194
let option = match String::from_utf8(arg) {
Ok(text) => text,
Err(err) => String::from_utf8_lossy(err.as_bytes()).into_owned(),
};
Ok(Some(self.set_long(option)))
} else if arg.len() > 1 && arg[0] == b'-' {
self.state = State::Shorts(arg, 1);
self.next()
} else {
Ok(Some(Arg::Value(OsString::from_vec(arg))))
}
}
#[cfg(not(any(unix, target_os = "wasi")))]
{
// Platforms where looking inside an OsString is harder
#[cfg(windows)]
{
// Fast path for Windows
let mut bytes = arg.encode_wide();
const DASH: u16 = b'-' as u16;
match (bytes.next(), bytes.next()) {
(Some(DASH), Some(_)) => {
// This is an option, we'll have to do more work.
// (We already checked for "--" earlier.)
}
_ => {
// Just a value, return early.
return Ok(Some(Arg::Value(arg)));
}
}
}
let mut arg = match arg.into_string() {
Ok(arg) => arg,
Err(arg) => {
// The argument is not valid unicode.
// If it's an option we'll have to do something nasty,
// otherwise we can return it as-is.
#[cfg(windows)]
{
// On Windows we can only get here if this is an option, otherwise
// we return earlier.
// Unlike on Unix, we can't efficiently process invalid unicode.
// Semantically it's UTF-16, but internally it's WTF-8 (a superset of UTF-8).
// So we only process the raw version here, when we know we really have to.
let mut arg: Vec<u16> = arg.encode_wide().collect();
const DASH: u16 = b'-' as u16;
const EQ: u16 = b'=' as u16;
if arg.starts_with(&[DASH, DASH]) {
if let Some(ind) = arg.iter().position(|&u| u == EQ) {
self.state =
State::PendingValue(OsString::from_wide(&arg[ind + 1..]));
arg.truncate(ind);
}
let long = self.set_long(String::from_utf16_lossy(&arg));
return Ok(Some(long));
} else {
assert!(arg.len() > 1);
assert_eq!(arg[0], DASH);
self.state = State::ShortsU16(arg, 1);
return self.next();
}
};
#[cfg(not(windows))]
{
// This code may be reachable on Hermit and SGX, but probably
// not on wasm32-unknown-unknown, which is unfortunate as that's
// the only one we can easily test.
// This allocates unconditionally, sadly.
let text = arg.to_string_lossy();
if text.starts_with('-') {
// Use the lossily patched version and hope for the best.
// This may be incorrect behavior. Our only other option
// is an error but I don't want to write complicated code
// I can't actually test.
// Please open an issue if this behavior affects you!
text.into_owned()
} else {
// It didn't look like an option, so return it as a value.
return Ok(Some(Arg::Value(arg)));
}
}
}
};
// The argument is valid unicode. This is the ideal version of the
// code, the previous mess was purely to deal with invalid unicode.
if arg.starts_with("--") {
if let Some(ind) = arg.find('=') {
self.state = State::PendingValue(arg[ind + 1..].into());
arg.truncate(ind);
}
Ok(Some(self.set_long(arg)))
} else if arg.starts_with('-') && arg != "-" {
self.state = State::Shorts(arg.into(), 1);
self.next()
} else {
Ok(Some(Arg::Value(arg.into())))
}
}
}
/// Get a value for an option.
///
/// This function should normally be called right after seeing an option
/// that expects a value, with positional arguments being collected
/// using [`next()`][Parser::next].
///
/// A value is collected even if it looks like an option
/// (i.e., starts with `-`).
///
/// # Errors
///
/// An [`Error::MissingValue`] is returned if the end of the command
/// line is reached.
pub fn value(&mut self) -> Result<OsString, Error> {
if let Some(value) = self.optional_value() {
return Ok(value);
}
if let Some(value) = self.source.next() {
return Ok(value);
}
Err(Error::MissingValue {
option: self.format_last_option(),
})
}
/// Gather multiple values for an option.
///
/// This is used for options that take multiple arguments, such as a
/// `--command` flag that's invoked as `app --command echo 'Hello world'`.
///
/// It will gather arguments until another option is found, or `--` is found, or
/// the end of the command line is reached. This differs from `.value()`, which
/// takes a value even if it looks like an option.
///
/// An equals sign (`=`) will limit this to a single value. That means `-a=b c` and
/// `--opt=b c` will only yield `"b"` while `-a b c`, `-ab c` and `--opt b c` will
/// yield `"b"`, `"c"`.
///
/// # Errors
/// If not at least one value is found then [`Error::MissingValue`] is returned.
///
/// # Example
/// ```
/// # fn main() -> Result<(), lexopt::Error> {
/// # use lexopt::prelude::*;
/// # use std::ffi::OsString;
/// # use std::path::PathBuf;
/// # let mut parser = lexopt::Parser::from_args(&["a", "b", "-x", "one", "two", "three", "four"]);
/// let arguments: Vec<OsString> = parser.values()?.collect();
/// # assert_eq!(arguments, &["a", "b"]);
/// # let _ = parser.next();
/// let at_most_three_files: Vec<PathBuf> = parser.values()?.take(3).map(Into::into).collect();
/// # assert_eq!(parser.raw_args()?.as_slice(), &["four"]);
/// for value in parser.values()? {
/// // ...
/// }
/// # Ok(()) }
/// ```
pub fn values(&mut self) -> Result<ValuesIter<'_>, Error> {
// This code is designed so that just calling .values() doesn't consume
// any arguments as long as you don't use the iterator. It used to work
// differently.
// "--" is treated like an option and not consumed. This seems to me the
// least unreasonable behavior, and it's the easiest to implement.
if self.has_pending() || self.next_is_normal() {
Ok(ValuesIter {
took_first: false,
parser: Some(self),
})
} else {
Err(Error::MissingValue {
option: self.format_last_option(),
})
}
}
/// Inspect an argument and consume it if it's "normal" (not an option or --).
///
/// Used by [`Parser::values`].
///
/// This method should not be called while partway through processing an
/// argument.
fn next_if_normal(&mut self) -> Option<OsString> {
if self.next_is_normal() {
self.source.next()
} else {
None
}
}
/// Execute the check for next_if_normal().
fn next_is_normal(&self) -> bool {
assert!(!self.has_pending());
let arg = match self.source.as_slice().first() {
// There has to be a next argument.
None => return false,
Some(arg) => arg,
};
if let State::FinishedOpts = self.state {
// If we already found a -- then we're really not supposed to be here,
// but we shouldn't treat the next argument as an option.
return true;
}
if arg == "-" {
// "-" is the one argument with a leading '-' that's allowed.
return true;
}
#[cfg(any(unix, target_os = "wasi"))]
let lead_dash = arg.as_bytes().first() == Some(&b'-');
#[cfg(windows)]
let lead_dash = arg.encode_wide().next() == Some(b'-' as u16);
#[cfg(not(any(unix, target_os = "wasi", windows)))]
let lead_dash = arg.to_string_lossy().as_bytes().first() == Some(&b'-');
!lead_dash
}
/// Take raw arguments from the original command line.
///
/// This returns an iterator of [`OsString`]s. Any arguments that are not
/// consumed are kept, so you can continue parsing after you're done with
/// the iterator.
///
/// To inspect an argument without consuming it, use [`RawArgs::peek`] or
/// [`RawArgs::as_slice`].
///
/// # Errors
///
/// Returns an [`Error::UnexpectedValue`] if the last option had a left-over
/// argument, as in `--option=value`, `-ovalue`, or if it was midway through
/// an option chain, as in `-abc`. The iterator only yields whole arguments.
/// To avoid this, use [`try_raw_args()`][Parser::try_raw_args].
///
/// After this error the method is guaranteed to succeed, as it consumes the
/// rest of the argument.
///
/// # Example
/// As soon as a free-standing argument is found, consume the other arguments
/// as-is, and build them into a command.
/// ```
/// # fn main() -> Result<(), lexopt::Error> {
/// # use lexopt::prelude::*;
/// # use std::ffi::OsString;
/// # use std::path::PathBuf;
/// # let mut parser = lexopt::Parser::from_args(&["-x", "echo", "-n", "'Hello, world'"]);
/// # while let Some(arg) = parser.next()? {
/// # match arg {
/// Value(prog) => {
/// let args: Vec<_> = parser.raw_args()?.collect();
/// let command = std::process::Command::new(prog).args(args);
/// }
/// # _ => (), }} Ok(()) }
pub fn raw_args(&mut self) -> Result<RawArgs<'_>, Error> {
if let Some(value) = self.optional_value() {
return Err(Error::UnexpectedValue {
option: self.format_last_option().unwrap(),
value,
});
}
Ok(RawArgs(&mut self.source))
}
/// Take raw arguments from the original command line, *if* the current argument
/// has finished processing.
///
/// Unlike [`raw_args()`][Parser::raw_args] this does not consume any value
/// in case of a left-over argument. This makes it safe to call at any time.
///
/// It returns `None` exactly when [`optional_value()`][Parser::optional_value]
/// would return `Some`.
///
/// Note: If no arguments are left then it returns an empty iterator (not `None`).
///
/// # Example
/// Process arguments of the form `-123` as numbers. For a complete runnable version of
/// this example, see
/// [`examples/nonstandard.rs`](https://github.com/blyxxyz/lexopt/blob/e3754e6f24506afb42394602fc257b1ad9258d84/examples/nonstandard.rs).
/// ```
/// # fn main() -> Result<(), lexopt::Error> {
/// # use lexopt::prelude::*;
/// # use std::ffi::OsString;
/// # use std::path::PathBuf;
/// # let mut parser = lexopt::Parser::from_iter(&["-13"]);
/// fn parse_dashnum(parser: &mut lexopt::Parser) -> Option<u64> {
/// let mut raw = parser.try_raw_args()?;
/// let arg = raw.peek()?.to_str()?;
/// let num = arg.strip_prefix('-')?.parse::<u64>().ok()?;
/// raw.next(); // Consume the argument we just parsed
/// Some(num)
/// }
///
/// loop {
/// if let Some(num) = parse_dashnum(&mut parser) {
/// println!("Got number {}", num);
/// } else if let Some(arg) = parser.next()? {
/// match arg {
/// // ...
/// # _ => (),
/// }
/// } else {
/// break;
/// }
/// }
/// # Ok(()) }
/// ```
pub fn try_raw_args(&mut self) -> Option<RawArgs<'_>> {
if self.has_pending() {
None
} else {
Some(RawArgs(&mut self.source))
}
}
/// Check whether we're halfway through an argument, or in other words,
/// if [`Parser::optional_value()`] would return `Some`.
fn has_pending(&self) -> bool {
match self.state {
State::None | State::FinishedOpts => false,
State::PendingValue(_) => true,
State::Shorts(ref arg, pos) => pos < arg.len(),
#[cfg(windows)]
State::ShortsU16(ref arg, pos) => pos < arg.len(),
}
}
#[inline(never)]
fn format_last_option(&self) -> Option<String> {
match self.last_option {
LastOption::None => None,
LastOption::Short(ch) => Some(format!("-{}", ch)),
LastOption::Long(ref option) => Some(option.clone()),
}
}
/// The name of the command, as in the zeroth argument of the process.
///
/// This is intended for use in messages. If the name is not valid unicode
/// it will be sanitized with replacement characters as by
/// [`String::from_utf8_lossy`].
///
/// To get the current executable, use [`std::env::current_exe`].
///
/// # Example
/// ```
/// let mut parser = lexopt::Parser::from_env();
/// let bin_name = parser.bin_name().unwrap_or("myapp");
/// println!("{}: Some message", bin_name);
/// ```
pub fn bin_name(&self) -> Option<&str> {
Some(self.bin_name.as_ref()?)
}
/// Get a value only if it's concatenated to an option, as in `-ovalue` or
/// `--option=value` or `-o=value`, but not `-o value` or `--option value`.
pub fn optional_value(&mut self) -> Option<OsString> {
Some(self.raw_optional_value()?.0)
}
/// [`Parser::optional_value`], but indicate whether the value was joined
/// with an = sign. This matters for [`Parser::values`].
fn raw_optional_value(&mut self) -> Option<(OsString, bool)> {
match replace(&mut self.state, State::None) {
State::PendingValue(value) => Some((value, true)),
State::Shorts(mut arg, mut pos) => {
if pos >= arg.len() {
return None;
}
let mut had_eq_sign = false;
if arg[pos] == b'=' {
// -o=value.
// clap actually strips out all leading '='s, but that seems silly.
// We allow `-xo=value`. Python's argparse doesn't strip the = in that case.
pos += 1;
had_eq_sign = true;
}
arg.drain(..pos); // Reuse allocation
#[cfg(any(unix, target_os = "wasi"))]
{
Some((OsString::from_vec(arg), had_eq_sign))
}
#[cfg(not(any(unix, target_os = "wasi")))]
{
let arg = String::from_utf8(arg)
.expect("short option args on exotic platforms must be unicode");
Some((arg.into(), had_eq_sign))
}
}
#[cfg(windows)]
State::ShortsU16(arg, mut pos) => {
if pos >= arg.len() {
return None;
}
let mut had_eq_sign = false;
if arg[pos] == b'=' as u16 {
pos += 1;
had_eq_sign = true;
}
Some((OsString::from_wide(&arg[pos..]), had_eq_sign))
}
State::FinishedOpts => {
// Not really supposed to be here, but it's benign and not our fault
self.state = State::FinishedOpts;
None
}
State::None => None,
}
}
fn new(bin_name: Option<OsString>, source: InnerIter) -> Parser {
Parser {
source,
state: State::None,
last_option: LastOption::None,
bin_name: bin_name.map(|s| match s.into_string() {
Ok(text) => text,
Err(text) => text.to_string_lossy().into_owned(),
}),
}
}
/// Create a parser from the environment using [`std::env::args_os`].
///
/// This is the usual way to create a `Parser`.
pub fn from_env() -> Parser {
let mut source = make_iter(std::env::args_os());
Parser::new(source.next(), source)
}
// The collision with `FromIterator::from_iter` is a bit unfortunate.
// This name is used because:
// - `from_args()` was taken, and changing its behavior without changing
// its signature would be evil.
// - structopt also had a method by that name, so there's a precedent.
// (clap_derive doesn't.)
// - I couldn't think of a better one.
// When this name was chosen `FromIterator` could not actually be implemented.
// It can be implemented now, but I'm not sure there's a reason to.
/// Create a parser from an iterator. This is useful for testing among other things.
///
/// The first item from the iterator **must** be the binary name, as from [`std::env::args_os`].
///
/// The iterator is consumed immediately.
///
/// # Example
/// ```
/// let mut parser = lexopt::Parser::from_iter(&["myapp", "-n", "10", "./foo.bar"]);
/// ```
pub fn from_iter<I>(args: I) -> Parser
where
I: IntoIterator,
I::Item: Into<OsString>,
{
let mut args = make_iter(args.into_iter().map(Into::into));
Parser::new(args.next(), args)
}
/// Create a parser from an iterator that does **not** include the binary name.
///
/// The iterator is consumed immediately.
///
/// [`bin_name()`](`Parser::bin_name`) will return `None`. Consider using
/// [`Parser::from_iter`] instead.
pub fn from_args<I>(args: I) -> Parser
where
I: IntoIterator,
I::Item: Into<OsString>,
{
Parser::new(None, make_iter(args.into_iter().map(Into::into)))
}
/// Store a long option so the caller can borrow it.
fn set_long(&mut self, option: String) -> Arg<'_> {
self.last_option = LastOption::Long(option);
match self.last_option {
LastOption::Long(ref option) => Arg::Long(&option[2..]),
_ => unreachable!(),
}
}
}
impl Arg<'_> {
/// Convert an unexpected argument into an error.
pub fn unexpected(self) -> Error {
match self {
Arg::Short(short) => Error::UnexpectedOption(format!("-{}", short)),
Arg::Long(long) => Error::UnexpectedOption(format!("--{}", long)),
Arg::Value(value) => Error::UnexpectedArgument(value),
}
}
}
/// An iterator for multiple option-arguments, returned by [`Parser::values`].
///
/// It's guaranteed to yield at least one value.
#[derive(Debug)]
pub struct ValuesIter<'a> {
took_first: bool,
parser: Option<&'a mut Parser>,
}
impl Iterator for ValuesIter<'_> {
type Item = OsString;
fn next(&mut self) -> Option<Self::Item> {
let parser = self.parser.as_mut()?;
if self.took_first {
parser.next_if_normal()
} else if let Some((value, had_eq_sign)) = parser.raw_optional_value() {
if had_eq_sign {
self.parser = None;
}
self.took_first = true;
Some(value)
} else {
let value = parser
.next_if_normal()
.expect("ValuesIter must yield at least one value");
self.took_first = true;
Some(value)
}
}
}
/// An iterator for the remaining raw arguments, returned by [`Parser::raw_args`].
#[derive(Debug)]
pub struct RawArgs<'a>(&'a mut InnerIter);
impl Iterator for RawArgs<'_> {
type Item = OsString;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl RawArgs<'_> {
/// Return a reference to the next() value without consuming it.
///
/// An argument you peek but do not consume will still be seen by `Parser`
/// if you resume parsing.
///
/// See [`Iterator::peekable`], [`std::iter::Peekable::peek`].
pub fn peek(&self) -> Option<&OsStr> {
Some(self.0.as_slice().first()?.as_os_str())
}
/// Consume and return the next argument if a condition is true.
///
/// See [`std::iter::Peekable::next_if`].
pub fn next_if(&mut self, func: impl FnOnce(&OsStr) -> bool) -> Option<OsString> {
match self.peek() {
Some(arg) if func(arg) => self.next(),
_ => None,
}
}
/// Return the remaining arguments as a slice.
pub fn as_slice(&self) -> &[OsString] {
self.0.as_slice()
}
}
// These would make sense:
// - fn RawArgs::iter(&self)
// - impl IntoIterator for &RawArgs
// - impl AsRef<[OsString]> for RawArgs
// But they're niche and constrain future design.
// Let's leave them out for now.
// (Open question: should iter() return std::slice::Iter<OsString> and get
// an optimized .nth() and so on for free, or should it return a novel type
// that yields &OsStr?)
/// An error during argument parsing.
///
/// This implements `From<String>` and `From<&str>`, for easy ad-hoc error
/// messages.
//
// This is not #[non_exhaustive] because of the MSRV. I'm hoping no more
// variants will turn out to be needed: this seems reasonable, if the scope
// of the library doesn't change. Worst case scenario it can be stuffed inside
// Error::Custom.
pub enum Error {
/// An option argument was expected but was not found.
MissingValue {
/// The most recently emitted option.
option: Option<String>,
},
/// An unexpected option was found.
UnexpectedOption(String),
/// A positional argument was found when none was expected.
UnexpectedArgument(OsString),
/// An option had a value when none was expected.
UnexpectedValue {
/// The option.
option: String,
/// The value.
value: OsString,
},
/// Parsing a value failed. Returned by methods on [`ValueExt`].
ParsingFailed {
/// The string that failed to parse.
value: String,
/// The error returned while parsing.
error: Box<dyn std::error::Error + Send + Sync + 'static>,
},
/// A value was found that was not valid unicode.
///
/// This can be returned by the methods on [`ValueExt`].
NonUnicodeValue(OsString),
/// For custom error messages in application code.
Custom(Box<dyn std::error::Error + Send + Sync + 'static>),
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use crate::Error::*;
match self {
MissingValue { option: None } => write!(f, "missing argument"),
MissingValue {
option: Some(option),
} => {
write!(f, "missing argument for option '{}'", option)
}
UnexpectedOption(option) => write!(f, "invalid option '{}'", option),
UnexpectedArgument(value) => write!(f, "unexpected argument {:?}", value),
UnexpectedValue { option, value } => {
write!(
f,
"unexpected argument for option '{}': {:?}",
option, value
)
}
NonUnicodeValue(value) => write!(f, "argument is invalid unicode: {:?}", value),
ParsingFailed { value, error } => {
write!(f, "cannot parse argument {:?}: {}", value, error)
}
Custom(err) => write!(f, "{}", err),
}
}
}
// This is printed when returning an error from main(), so defer to Display
impl std::fmt::Debug for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
Display::fmt(self, f)
}
}
impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::ParsingFailed { error, .. } | Error::Custom(error) => Some(error.as_ref()),
_ => None,
}
}
}
impl From<String> for Error {
fn from(msg: String) -> Self {
Error::Custom(msg.into())
}
}
impl<'a> From<&'a str> for Error {
fn from(msg: &'a str) -> Self {
Error::Custom(msg.into())
}
}
/// For [`OsString::into_string`], so it may be used with the try (`?`) operator.
///
/// [`ValueExt::string`] is the new preferred method because it's compatible with
/// catch-all error types like `anyhow::Error`.
impl From<OsString> for Error {
fn from(arg: OsString) -> Self {
Error::NonUnicodeValue(arg)
}
}
mod private {
pub trait Sealed {}
impl Sealed for std::ffi::OsString {}
}
/// An optional extension trait with methods for parsing [`OsString`]s.
///
/// They may fail in two cases:
/// - The value cannot be decoded because it's invalid unicode
/// ([`Error::NonUnicodeValue`])
/// - The value can be decoded, but parsing fails ([`Error::ParsingFailed`])
///
/// If parsing fails the error will be wrapped in lexopt's own [`Error`] type.
pub trait ValueExt: private::Sealed {
/// Decode the value and parse it using [`FromStr`].
///
/// This will fail if the value is not valid unicode or if the subsequent
/// parsing fails.
fn parse<T: FromStr>(&self) -> Result<T, Error>
where
T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>;
/// Decode the value and parse it using a custom function.
fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error>
where
F: FnOnce(&str) -> Result<T, E>,
E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>;
// There is no parse_os_with() because I can't think of any useful
// fallible operations on an OsString. Typically you'd either decode it,
// use it as is, or do an infallible conversion to a PathBuf or such.
//
// If you have a use for parse_os_with() please open an issue with an
// example.
/// Convert the `OsString` into a [`String`] if it's valid Unicode.
///
/// This is like [`OsString::into_string`] but returns an
/// [`Error::NonUnicodeValue`] on error instead of the original `OsString`.
/// This makes it easier to propagate the failure with libraries like
/// `anyhow`.
fn string(self) -> Result<String, Error>;
}
impl ValueExt for OsString {
fn parse<T: FromStr>(&self) -> Result<T, Error>
where
T::Err: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
{
self.parse_with(FromStr::from_str)
}
fn parse_with<F, T, E>(&self, func: F) -> Result<T, Error>
where
F: FnOnce(&str) -> Result<T, E>,
E: Into<Box<dyn std::error::Error + Send + Sync + 'static>>,
{
match self.to_str() {
Some(text) => match func(text) {
Ok(value) => Ok(value),
Err(err) => Err(Error::ParsingFailed {
value: text.to_owned(),
error: err.into(),
}),
},
None => Err(Error::NonUnicodeValue(self.into())),
}
}
fn string(self) -> Result<String, Error> {
match self.into_string() {
Ok(string) => Ok(string),
Err(raw) => Err(Error::NonUnicodeValue(raw)),
}
}
}
/// A small prelude for processing arguments.
///
/// It allows you to write `Short`/`Long`/`Value` without an [`Arg`] prefix
/// and adds convenience methods to [`OsString`].
///
/// If this is used it's best to import it inside a function, not in module
/// scope:
/// ```
/// # struct Args;
/// fn parse_args() -> Result<Args, lexopt::Error> {
/// use lexopt::prelude::*;
/// // ...
/// # Ok(Args)
/// }
/// ```
pub mod prelude {
pub use super::Arg::*;
pub use super::ValueExt;
}
/// Take the first codepoint of a bytestring. On error, return the first
/// (and therefore in some way invalid) byte/code unit.
///
/// The rest of the bytestring does not have to be valid unicode.
fn first_codepoint(bytes: &[u8]) -> Result<Option<char>, u8> {
// We only need the first 4 bytes
let bytes = bytes.get(..4).unwrap_or(bytes);
let text = match std::str::from_utf8(bytes) {
Ok(text) => text,
Err(err) if err.valid_up_to() > 0 => {
std::str::from_utf8(&bytes[..err.valid_up_to()]).unwrap()
}
Err(_) => return Err(bytes[0]),
};
Ok(text.chars().next())
}
#[cfg(windows)]
/// As before, but for UTF-16.
fn first_utf16_codepoint(units: &[u16]) -> Result<Option<char>, u16> {
match std::char::decode_utf16(units.iter().cloned()).next() {
Some(Ok(ch)) => Ok(Some(ch)),
Some(Err(_)) => Err(units[0]),
None => Ok(None),
}
}
#[cfg(test)]
mod tests {
use super::prelude::*;
use super::*;
fn parse(args: &'static str) -> Parser {
Parser::from_args(args.split_whitespace().map(bad_string))
}
/// Specialized backport of matches!()
macro_rules! assert_matches {
($expression: expr, $( $pattern: pat )|+) => {
match $expression {
$( $pattern )|+ => (),
_ => panic!(
"{:?} does not match {:?}",
stringify!($expression),
stringify!($( $pattern )|+)
),
}
};
}
#[test]
fn test_basic() -> Result<(), Error> {
let mut p = parse("-n 10 foo - -- baz -qux");
assert_eq!(p.next()?.unwrap(), Short('n'));
assert_eq!(p.value()?.parse::<i32>()?, 10);
assert_eq!(p.next()?.unwrap(), Value("foo".into()));
assert_eq!(p.next()?.unwrap(), Value("-".into()));
assert_eq!(p.next()?.unwrap(), Value("baz".into()));
assert_eq!(p.next()?.unwrap(), Value("-qux".into()));
assert_eq!(p.next()?, None);
assert_eq!(p.next()?, None);
assert_eq!(p.next()?, None);
Ok(())
}
#[test]
fn test_combined() -> Result<(), Error> {
let mut p = parse("-abc -fvalue -xfvalue");
assert_eq!(p.next()?.unwrap(), Short('a'));
assert_eq!(p.next()?.unwrap(), Short('b'));
assert_eq!(p.next()?.unwrap(), Short('c'));
assert_eq!(p.next()?.unwrap(), Short('f'));
assert_eq!(p.value()?, "value");
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.next()?.unwrap(), Short('f'));
assert_eq!(p.value()?, "value");
assert_eq!(p.next()?, None);
Ok(())
}
#[test]
fn test_long() -> Result<(), Error> {
let mut p = parse("--foo --bar=qux --foobar=qux=baz");
assert_eq!(p.next()?.unwrap(), Long("foo"));
assert_eq!(p.next()?.unwrap(), Long("bar"));
assert_eq!(p.value()?, "qux");
assert_eq!(p.next()?.unwrap(), Long("foobar"));
match p.next().unwrap_err() {
Error::UnexpectedValue { option, value } => {
assert_eq!(option, "--foobar");
assert_eq!(value, "qux=baz");
}
_ => panic!(),
}
assert_eq!(p.next()?, None);
Ok(())
}
#[test]
fn test_dash_args() -> Result<(), Error> {
// "--" should indicate the end of the options
let mut p = parse("-x -- -y");
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.next()?.unwrap(), Value("-y".into()));
assert_eq!(p.next()?, None);
// ...unless it's an argument of an option
let mut p = parse("-x -- -y");
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.value()?, "--");
assert_eq!(p.next()?.unwrap(), Short('y'));
assert_eq!(p.next()?, None);
// "-" is a valid value that should not be treated as an option
let mut p = parse("-x - -y");
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.next()?.unwrap(), Value("-".into()));
assert_eq!(p.next()?.unwrap(), Short('y'));
assert_eq!(p.next()?, None);
// '-' is a silly and hard to use short option, but other parsers treat
// it like an option in this position
let mut p = parse("-x-y");
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.next()?.unwrap(), Short('-'));
assert_eq!(p.next()?.unwrap(), Short('y'));
assert_eq!(p.next()?, None);
Ok(())
}
#[test]
fn test_missing_value() -> Result<(), Error> {
let mut p = parse("-o");
assert_eq!(p.next()?.unwrap(), Short('o'));
match p.value() {
Err(Error::MissingValue {
option: Some(option),
}) => assert_eq!(option, "-o"),
_ => panic!(),
}
let mut q = parse("--out");
assert_eq!(q.next()?.unwrap(), Long("out"));
match q.value() {
Err(Error::MissingValue {
option: Some(option),
}) => assert_eq!(option, "--out"),
_ => panic!(),
}
let mut r = parse("");
assert_matches!(r.value(), Err(Error::MissingValue { option: None }));
Ok(())
}
#[test]
fn test_weird_args() -> Result<(), Error> {
let mut p = Parser::from_args(&[
"", "--=", "--=3", "-", "-x", "--", "-", "-x", "--", "", "-", "-x",
]);
assert_eq!(p.next()?.unwrap(), Value(OsString::from("")));
// These are weird and questionable, but this seems to be the standard
// interpretation
// GNU getopt_long and argparse complain that it could be an abbreviation
// of every single long option
// clap complains that "--" is not expected, which matches its treatment
// of unknown long options
assert_eq!(p.next()?.unwrap(), Long(""));
assert_eq!(p.value()?, OsString::from(""));
assert_eq!(p.next()?.unwrap(), Long(""));
assert_eq!(p.value()?, OsString::from("3"));
assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.value()?, OsString::from("--"));
assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
assert_eq!(p.next()?.unwrap(), Short('x'));
assert_eq!(p.next()?.unwrap(), Value(OsString::from("")));
assert_eq!(p.next()?.unwrap(), Value(OsString::from("-")));
assert_eq!(p.next()?.unwrap(), Value(OsString::from("-x")));
assert_eq!(p.next()?, None);
#[cfg(any(unix, target_os = "wasi", windows))]
{
let mut q = parse("--=@");
assert_eq!(q.next()?.unwrap(), Long(""));
assert_eq!(q.value()?, bad_string("@"));
assert_eq!(q.next()?, None);
}
let mut r = parse("");
assert_eq!(r.next()?, None);
Ok(())
}
#[test]
fn test_unicode() -> Result<(), Error> {
let mut p = parse("-aµ --µ=10 µ --foo=µ");
assert_eq!(p.next()?.unwrap(), Short('a'));
assert_eq!(p.next()?.unwrap(), Short('µ'));
assert_eq!(p.next()?.unwrap(), Long("µ"));
assert_eq!(p.value()?, "10");
assert_eq!(p.next()?.unwrap(), Value("µ".into()));
assert_eq!(p.next()?.unwrap(), Long("foo"));
assert_eq!(p.value()?, "µ");
Ok(())
}
#[cfg(any(unix, target_os = "wasi", windows))]
#[test]
fn test_mixed_invalid() -> Result<(), Error> {
let mut p = parse("--foo=@@@");
assert_eq!(p.next()?.unwrap(), Long("foo"));
assert_eq!(p.value()?, bad_string("@@@"));
let mut q = parse("-💣@@@");
assert_eq!(q.next()?.unwrap(), Short('💣'));
assert_eq!(q.value()?, bad_string("@@@"));
let mut r = parse("-f@@@");
assert_eq!(r.next()?.unwrap(), Short('f'));
assert_eq!(r.next()?.unwrap(), Short('�'));
assert_eq!(r.next()?.unwrap(), Short('�'));
assert_eq!(r.next()?.unwrap(), Short('�'));
assert_eq!(r.next()?, None);
let mut s = parse("--foo=bar=@@@");
assert_eq!(s.next()?.unwrap(), Long("foo"));
assert_eq!(s.value()?, bad_string("bar=@@@"));
Ok(())
}
#[cfg(any(unix, target_os = "wasi", windows))]
#[test]
fn test_separate_invalid() -> Result<(), Error> {
let mut p = parse("--foo @@@");
assert_eq!(p.next()?.unwrap(), Long("foo"));
assert_eq!(p.value()?, bad_string("@@@"));
Ok(())
}
#[cfg(any(unix, target_os = "wasi", windows))]
#[test]
fn test_invalid_long_option() -> Result<(), Error> {
let mut p = parse("--@=10");
assert_eq!(p.next()?.unwrap(), Long("�"));
assert_eq!(p.value().unwrap(), OsString::from("10"));
assert_eq!(p.next()?, None);
let mut q = parse("--@");
assert_eq!(q.next()?.unwrap(), Long("�"));
assert_eq!(q.next()?, None);
Ok(())
}
#[test]
fn short_opt_equals_sign() -> Result<(), Error> {
let mut p = parse("-a=b");
assert_eq!(p.next()?.unwrap(), Short('a'));
assert_eq!(p.value()?, OsString::from("b"));
assert_eq!(p.next()?, None);
let mut p = parse("-a=b");
assert_eq!(p.next()?.unwrap(), Short('a'));
match p.next().unwrap_err() {
Error::UnexpectedValue { option, value } => {
assert_eq!(option, "-a");
assert_eq!(value, "b");
}
_ => panic!(),
}
assert_eq!(p.next()?, None);
let mut p = parse("-a=");
assert_eq!(p.next()?.unwrap(), Short('a'));
assert_eq!(p.value()?, OsString::from(""));
assert_eq!(p.next()?, None);
let mut p = parse("-a=");
assert_eq!(p.next()?.unwrap(), Short('a'));
match p.next().unwrap_err() {
Error::UnexpectedValue { option, value } => {
assert_eq!(option, "-a");
assert_eq!(value, "");
}
_ => panic!(),
}
assert_eq!(p.next()?, None);
let mut p = parse("-=");
assert_eq!(p.next()?.unwrap(), Short('='));
assert_eq!(p.next()?, None);
let mut p = parse("-=a");
assert_eq!(p.next()?.unwrap(), Short('='));
assert_eq!(p.value()?, "a");
Ok(())
}
#[cfg(any(unix, target_os = "wasi", windows))]
#[test]
fn short_opt_equals_sign_invalid() -> Result<(), Error> {
let mut p = parse("-a=@");
assert_eq!(p.next()?.unwrap(), Short('a'));
assert_eq!(p.value()?, bad_string("@"));
assert_eq!(p.next()?, None);
let mut p = parse("-a=@");
assert_eq!(p.next()?.unwrap(), Short('a'));
match p.next().unwrap_err() {
Error::UnexpectedValue { option, value } => {
assert_eq!(option, "-a");
assert_eq!(value, bad_string("@"));
}
_ => panic!(),
}
assert_eq!(p.next()?, None);
let mut p = parse("-=@");
assert_eq!(p.next()?.unwrap(), Short('='));
assert_eq!(p.value()?, bad_string("@"));
Ok(())
}
#[test]
fn multi_values() -> Result<(), Error> {
for &case in &["-a b c d", "-ab c d", "-a b c d --", "--a b c d"] {
let mut p = parse(case);
p.next()?.unwrap();
let mut iter = p.values()?;
let values: Vec<_> = iter.by_ref().collect();
assert_eq!(values, &["b", "c", "d"]);
assert!(iter.next().is_none());
assert!(p.next()?.is_none());
}
for &case in &["-a=b c", "--a=b c"] {
let mut p = parse(case);
p.next()?.unwrap();
let mut iter = p.values()?;
let values: Vec<_> = iter.by_ref().collect();
assert_eq!(values, &["b"]);
assert!(iter.next().is_none());
assert_eq!(p.next()?.unwrap(), Value("c".into()));
assert!(p.next()?.is_none());
}
for &case in &["-a", "--a", "-a -b", "-a -- b", "-a --"] {
let mut p = parse(case);
p.next()?.unwrap();
assert!(p.values().is_err());
assert!(p.next().is_ok());
assert!(p.next().unwrap().is_none());
}
for &case in &["-a=", "--a="] {
let mut p = parse(case);
p.next()?.unwrap();
let mut iter = p.values()?;
let values: Vec<_> = iter.by_ref().collect();
assert_eq!(values, &[""]);
assert!(iter.next().is_none());
assert!(p.next()?.is_none());
}
// Test that .values() does not eagerly consume the first value
for &case in &["-a=b", "--a=b", "-a b"] {
let mut p = parse(case);
p.next()?.unwrap();
assert!(p.values().is_ok());
assert_eq!(p.value()?, "b");
}
{
let mut p = parse("-ab");
p.next()?.unwrap();
assert!(p.values().is_ok());
assert_eq!(p.next()?.unwrap(), Short('b'));
}
Ok(())
}
#[test]
fn raw_args() -> Result<(), Error> {
let mut p = parse("-a b c d");
assert!(p.try_raw_args().is_some());
assert_eq!(p.raw_args()?.collect::<Vec<_>>(), &["-a", "b", "c", "d"]);
assert!(p.try_raw_args().is_some());
assert!(p.next()?.is_none());
assert!(p.try_raw_args().is_some());
assert_eq!(p.raw_args()?.as_slice().len(), 0);
let mut p = parse("-ab c d");
p.next()?;
assert!(p.try_raw_args().is_none());
assert!(p.raw_args().is_err());
assert_eq!(p.try_raw_args().unwrap().collect::<Vec<_>>(), &["c", "d"]);
assert!(p.next()?.is_none());
assert_eq!(p.try_raw_args().unwrap().as_slice().len(), 0);
let mut p = parse("-a b c d");
assert_eq!(p.raw_args()?.take(3).collect::<Vec<_>>(), &["-a", "b", "c"]);
assert_eq!(p.next()?, Some(Value("d".into())));
assert!(p.next()?.is_none());
let mut p = parse("a");
let mut it = p.raw_args()?;
assert_eq!(it.peek(), Some("a".as_ref()));
assert_eq!(it.next_if(|_| false), None);
assert_eq!(p.next()?, Some(Value("a".into())));
assert!(p.next()?.is_none());
Ok(())
}
#[test]
fn bin_name() {
assert_eq!(
Parser::from_iter(&["foo", "bar", "baz"]).bin_name(),
Some("foo")
);
assert_eq!(Parser::from_args(&["foo", "bar", "baz"]).bin_name(), None);
assert_eq!(Parser::from_iter(&[] as &[&str]).bin_name(), None);
assert_eq!(Parser::from_iter(&[""]).bin_name(), Some(""));
assert!(Parser::from_env().bin_name().is_some());
#[cfg(any(unix, target_os = "wasi", windows))]
assert_eq!(
Parser::from_iter(vec![bad_string("foo@bar")]).bin_name(),
Some("foo�bar")
);
}
#[test]
fn test_value_ext() -> Result<(), Error> {
let s = OsString::from("-10");
assert_eq!(s.parse::<i32>()?, -10);
assert_eq!(
s.parse_with(|s| match s {
"-10" => Ok(0),
_ => Err("bad"),
})?,
0,
);
match s.parse::<u32>() {
Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10"),
_ => panic!(),
}
match s.parse_with(|s| match s {
"11" => Ok(0_i32),
_ => Err("bad"),
}) {
Err(Error::ParsingFailed { value, .. }) => assert_eq!(value, "-10"),
_ => panic!(),
}
assert_eq!(s.string()?, "-10");
Ok(())
}
#[cfg(any(unix, target_os = "wasi", windows))]
#[test]
fn test_value_ext_invalid() -> Result<(), Error> {
let s = bad_string("foo@");
assert_matches!(s.parse::<i32>(), Err(Error::NonUnicodeValue(_)));
assert_matches!(
s.parse_with(<f32 as FromStr>::from_str),
Err(Error::NonUnicodeValue(_))
);
assert_matches!(s.string(), Err(Error::NonUnicodeValue(_)));
Ok(())
}
#[test]
fn test_first_codepoint() {
assert_eq!(first_codepoint(b"foo").unwrap(), Some('f'));
assert_eq!(first_codepoint(b"").unwrap(), None);
assert_eq!(first_codepoint(b"f\xFF\xFF").unwrap(), Some('f'));
assert_eq!(first_codepoint(b"\xC2\xB5bar").unwrap(), Some('µ'));
first_codepoint(b"\xFF").unwrap_err();
assert_eq!(first_codepoint(b"foo\xC2\xB5").unwrap(), Some('f'));
}
/// Transform @ characters into invalid unicode.
fn bad_string(text: &str) -> OsString {
#[cfg(any(unix, target_os = "wasi"))]
{
let mut text = text.as_bytes().to_vec();
for ch in &mut text {
if *ch == b'@' {
*ch = b'\xFF';
}
}
OsString::from_vec(text)
}
#[cfg(windows)]
{
let mut out = Vec::new();
for ch in text.chars() {
if ch == '@' {
out.push(0xD800);
} else {
let mut buf = [0; 2];
out.extend(&*ch.encode_utf16(&mut buf));
}
}
OsString::from_wide(&out)
}
#[cfg(not(any(unix, target_os = "wasi", windows)))]
{
if text.contains('@') {
unimplemented!("Don't know how to create invalid OsStrings on this platform");
}
text.into()
}
}
/// Basic exhaustive testing of short combinations of "interesting"
/// arguments. They should not panic, not hang, and pass some checks.
///
/// The advantage compared to full fuzzing is that it runs on all platforms
/// and together with the other tests. cargo-fuzz doesn't work on Windows
/// and requires a special incantation.
///
/// A disadvantage is that it's still limited by arguments I could think of
/// and only does very short sequences. Another is that it's bad at
/// reporting failure, though the println!() helps.
///
/// This test takes a while to run.
#[test]
fn basic_fuzz() {
#[cfg(any(windows, unix, target_os = "wasi"))]
const VOCABULARY: &[&str] = &[
"", "-", "--", "---", "a", "-a", "-aa", "@", "-@", "-a@", "-@a", "--a", "--@", "--a=a",
"--a=", "--a=@", "--@=a", "--=", "--=@", "--=a", "-@@", "-a=a", "-a=", "-=", "-a-",
];
#[cfg(not(any(windows, unix, target_os = "wasi")))]
const VOCABULARY: &[&str] = &[
"", "-", "--", "---", "a", "-a", "-aa", "--a", "--a=a", "--a=", "--=", "--=a", "-a=a",
"-a=", "-=", "-a-",
];
exhaust(Parser::new(None, Vec::new().into_iter()), 0);
let vocabulary: Vec<OsString> = VOCABULARY.iter().map(|&s| bad_string(s)).collect();
let mut permutations = vec![vec![]];
for _ in 0..3 {
let mut new = Vec::new();
for old in permutations {
for word in &vocabulary {
let mut extended = old.clone();
extended.push(word);
new.push(extended);
}
}
permutations = new;
for permutation in &permutations {
println!("{:?}", permutation);
let p = Parser::from_args(permutation);
exhaust(p, 0);
}
}
}
/// Run many sequences of methods on a Parser.
fn exhaust(mut parser: Parser, depth: u16) {
if depth > 100 {
panic!("Stuck in loop");
}
// has_pending() == optional_value().is_some()
if parser.has_pending() {
{
let mut parser = parser.clone();
assert!(parser.try_raw_args().is_none());
assert!(parser.try_raw_args().is_none());
assert!(parser.raw_args().is_err());
// Recovery possible
assert!(parser.raw_args().is_ok());
assert!(parser.try_raw_args().is_some());
}
{
let mut parser = parser.clone();
assert!(parser.optional_value().is_some());
exhaust(parser, depth + 1);
}
} else {
let prev_state = parser.state.clone();
let prev_remaining = parser.source.as_slice().len();
assert!(parser.optional_value().is_none());
assert!(parser.raw_args().is_ok());
assert!(parser.try_raw_args().is_some());
// Verify state transitions
match prev_state {
State::None | State::PendingValue(_) => {
assert_matches!(parser.state, State::None);
}
State::Shorts(arg, pos) => {
assert_eq!(pos, arg.len());
assert_matches!(parser.state, State::None);
}
#[cfg(windows)]
State::ShortsU16(arg, pos) => {
assert_eq!(pos, arg.len());
assert_matches!(parser.state, State::None);
}
State::FinishedOpts => assert_matches!(parser.state, State::FinishedOpts),
}
// No arguments were consumed
assert_eq!(parser.source.as_slice().len(), prev_remaining);
}
{
let mut parser = parser.clone();
match parser.next() {
Ok(None) => {
assert_matches!(parser.state, State::None | State::FinishedOpts);
assert_eq!(parser.source.as_slice().len(), 0);
}
_ => exhaust(parser, depth + 1),
}
}
{
let mut parser = parser.clone();
match parser.value() {
Err(_) => {
assert_matches!(parser.state, State::None | State::FinishedOpts);
assert_eq!(parser.source.as_slice().len(), 0);
}
Ok(_) => {
assert_matches!(parser.state, State::None | State::FinishedOpts);
exhaust(parser, depth + 1);
}
}
}
{
match parser.values() {
Err(_) => (),
Ok(iter) => {
assert!(iter.count() > 0);
exhaust(parser, depth + 1);
}
}
}
}
}