blob: 2535d3a3fcf6b214aebb82e7d7c7958af475f49f [file] [log] [blame]
// Copyright © 2019 The Rust Fuzz Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! The `Arbitrary` trait crate.
//!
//! This trait provides an [`Arbitrary`] trait to
//! produce well-typed, structured values, from raw, byte buffers. It is
//! generally intended to be used with fuzzers like AFL or libFuzzer. See the
//! [`Arbitrary`] trait's documentation for details on
//! automatically deriving, implementing, and/or using the trait.
#![deny(bad_style)]
#![deny(missing_docs)]
#![deny(future_incompatible)]
#![deny(nonstandard_style)]
#![deny(rust_2018_compatibility)]
#![deny(rust_2018_idioms)]
#![deny(unused)]
mod error;
mod foreign;
pub mod size_hint;
pub mod unstructured;
#[cfg(test)]
mod tests;
pub use error::*;
#[cfg(feature = "derive_arbitrary")]
pub use derive_arbitrary::*;
#[doc(inline)]
pub use unstructured::Unstructured;
/// Error indicating that the maximum recursion depth has been reached while calculating [`Arbitrary::size_hint`]()
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct MaxRecursionReached {}
impl core::fmt::Display for MaxRecursionReached {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("Maximum recursion depth has been reached")
}
}
impl std::error::Error for MaxRecursionReached {}
/// Generate arbitrary structured values from raw, unstructured data.
///
/// The `Arbitrary` trait allows you to generate valid structured values, like
/// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from
/// raw, unstructured bytes provided by a fuzzer.
///
/// # Deriving `Arbitrary`
///
/// Automatically deriving the `Arbitrary` trait is the recommended way to
/// implement `Arbitrary` for your types.
///
/// Using the custom derive requires that you enable the `"derive"` cargo
/// feature in your `Cargo.toml`:
///
/// ```toml
/// [dependencies]
/// arbitrary = { version = "1", features = ["derive"] }
/// ```
///
/// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or
/// `enum` type definition:
///
/// ```
/// # #[cfg(feature = "derive")] mod foo {
/// use arbitrary::Arbitrary;
/// use std::collections::HashSet;
///
/// #[derive(Arbitrary)]
/// pub struct AddressBook {
/// friends: HashSet<Friend>,
/// }
///
/// #[derive(Arbitrary, Hash, Eq, PartialEq)]
/// pub enum Friend {
/// Buddy { name: String },
/// Pal { age: usize },
/// }
/// # }
/// ```
///
/// Every member of the `struct` or `enum` must also implement `Arbitrary`.
///
/// It is also possible to change the default bounds added by the derive:
///
/// ```
/// # #[cfg(feature = "derive")] mod foo {
/// use arbitrary::Arbitrary;
///
/// trait Trait {
/// type Assoc: for<'a> Arbitrary<'a>;
/// }
///
/// #[derive(Arbitrary)]
/// // The bounds are used verbatim, so any existing trait bounds will need to be repeated.
/// #[arbitrary(bound = "T: Trait")]
/// struct Point<T: Trait> {
/// x: T::Assoc,
/// }
/// # }
/// ```
///
/// # Implementing `Arbitrary` By Hand
///
/// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary`
/// arbitrary implementations for each of your `struct` or `enum`'s members. But
/// sometimes you need some amount of raw data, or you need to generate a
/// variably-sized collection type, or something of that sort. The
/// [`Unstructured`] type helps you with these tasks.
///
/// ```
/// # #[cfg(feature = "derive")] mod foo {
/// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
/// # impl<T> MyCollection<T> {
/// # pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } }
/// # pub fn insert(&mut self, element: T) {}
/// # }
/// use arbitrary::{Arbitrary, Result, Unstructured};
///
/// impl<'a, T> Arbitrary<'a> for MyCollection<T>
/// where
/// T: Arbitrary<'a>,
/// {
/// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
/// // Get an iterator of arbitrary `T`s.
/// let iter = u.arbitrary_iter::<T>()?;
///
/// // And then create a collection!
/// let mut my_collection = MyCollection::new();
/// for elem_result in iter {
/// let elem = elem_result?;
/// my_collection.insert(elem);
/// }
///
/// Ok(my_collection)
/// }
/// }
/// # }
/// ```
///
/// # A Note On Output Distributions
///
/// There is no requirement for a particular distribution of the values. For
/// example, it is not required that every value appears with the same
/// probability. That being said, the main use for `Arbitrary` is for fuzzing,
/// so in many cases a uniform distribution will make the most sense in order to
/// provide the best coverage of the domain. In other cases this is not
/// desirable or even possible, for example when sampling from a uniform
/// distribution is computationally expensive or in the case of collections that
/// may grow indefinitely.
pub trait Arbitrary<'a>: Sized {
/// Generate an arbitrary value of `Self` from the given unstructured data.
///
/// Calling `Arbitrary::arbitrary` requires that you have some raw data,
/// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this
/// raw data in an `Unstructured`, and then you can call `<MyType as
/// Arbitrary>::arbitrary` to construct an arbitrary instance of `MyType`
/// from that unstructured data.
///
/// Implementations may return an error if there is not enough data to
/// construct a full instance of `Self`, or they may fill out the rest of
/// `Self` with dummy values. Using dummy values when the underlying data is
/// exhausted can help avoid accidentally "defeating" some of the fuzzer's
/// mutations to the underlying byte stream that might otherwise lead to
/// interesting runtime behavior or new code coverage if only we had just a
/// few more bytes. However, it also requires that implementations for
/// recursive types (e.g. `struct Foo(Option<Box<Foo>>)`) avoid infinite
/// recursion when the underlying data is exhausted.
///
/// ```
/// # #[cfg(feature = "derive")] fn foo() {
/// use arbitrary::{Arbitrary, Unstructured};
///
/// #[derive(Arbitrary)]
/// pub struct MyType {
/// // ...
/// }
///
/// // Get the raw data from the fuzzer or wherever else.
/// # let get_raw_data_from_fuzzer = || &[];
/// let raw_data: &[u8] = get_raw_data_from_fuzzer();
///
/// // Wrap that raw data in an `Unstructured`.
/// let mut unstructured = Unstructured::new(raw_data);
///
/// // Generate an arbitrary instance of `MyType` and do stuff with it.
/// if let Ok(value) = MyType::arbitrary(&mut unstructured) {
/// # let do_stuff = |_| {};
/// do_stuff(value);
/// }
/// # }
/// ```
///
/// See also the documentation for [`Unstructured`].
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self>;
/// Generate an arbitrary value of `Self` from the entirety of the given
/// unstructured data.
///
/// This is similar to Arbitrary::arbitrary, however it assumes that it is
/// the last consumer of the given data, and is thus able to consume it all
/// if it needs. See also the documentation for
/// [`Unstructured`].
fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result<Self> {
Self::arbitrary(&mut u)
}
/// Get a size hint for how many bytes out of an `Unstructured` this type
/// needs to construct itself.
///
/// This is useful for determining how many elements we should insert when
/// creating an arbitrary collection.
///
/// The return value is similar to [`Iterator::size_hint`]: it returns a
/// tuple where the first element is a lower bound on the number of bytes
/// required, and the second element is an optional upper bound.
///
/// The default implementation return `(0, None)` which is correct for any
/// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will
/// create a better implementation. If you are writing an `Arbitrary`
/// implementation by hand, and your type can be part of a dynamically sized
/// collection (such as `Vec`), you are strongly encouraged to override this
/// default with a better implementation, and also override
/// [`try_size_hint`].
///
/// ## How to implement this
///
/// If the size hint calculation is a trivial constant and does not recurse
/// into any other `size_hint` call, you should implement it in `size_hint`:
///
/// ```
/// use arbitrary::{size_hint, Arbitrary, Result, Unstructured};
///
/// struct SomeStruct(u8);
///
/// impl<'a> Arbitrary<'a> for SomeStruct {
/// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
/// let buf = &mut [0];
/// u.fill_buffer(buf)?;
/// Ok(SomeStruct(buf[0]))
/// }
///
/// #[inline]
/// fn size_hint(depth: usize) -> (usize, Option<usize>) {
/// let _ = depth;
/// (1, Some(1))
/// }
/// }
/// ```
///
/// Otherwise, it should instead be implemented in [`try_size_hint`],
/// and the `size_hint` implementation should forward to it:
///
/// ```
/// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Result, Unstructured};
///
/// struct SomeStruct<A, B> {
/// a: A,
/// b: B,
/// }
///
/// impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for SomeStruct<A, B> {
/// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
/// // ...
/// # todo!()
/// }
///
/// fn size_hint(depth: usize) -> (usize, Option<usize>) {
/// // Return the value of try_size_hint
/// //
/// // If the recursion fails, return the default, always valid `(0, None)`
/// Self::try_size_hint(depth).unwrap_or_default()
/// }
///
/// fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
/// // Protect against potential infinite recursion with
/// // `try_recursion_guard`.
/// size_hint::try_recursion_guard(depth, |depth| {
/// // If we aren't too deep, then `recursion_guard` calls
/// // this closure, which implements the natural size hint.
/// // Don't forget to use the new `depth` in all nested
/// // `try_size_hint` calls! We recommend shadowing the
/// // parameter, like what is done here, so that you can't
/// // accidentally use the wrong depth.
/// Ok(size_hint::and(
/// <A as Arbitrary>::try_size_hint(depth)?,
/// <B as Arbitrary>::try_size_hint(depth)?,
/// ))
/// })
/// }
/// }
/// ```
///
/// ## Invariant
///
/// It must be possible to construct every possible output using only inputs
/// of lengths bounded by these parameters. This applies to both
/// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
///
/// This is trivially true for `(0, None)`. To restrict this further, it
/// must be proven that all inputs that are now excluded produced redundant
/// outputs which are still possible to produce using the reduced input
/// space.
///
/// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint
/// [`try_size_hint`]: Arbitrary::try_size_hint
#[inline]
fn size_hint(depth: usize) -> (usize, Option<usize>) {
let _ = depth;
(0, None)
}
/// Get a size hint for how many bytes out of an `Unstructured` this type
/// needs to construct itself.
///
/// Unlike [`size_hint`], this function keeps the information that the
/// recursion limit was reached. This is required to "short circuit" the
/// calculation and avoid exponential blowup with recursive structures.
///
/// If you are implementing [`size_hint`] for a struct that could be
/// recursive, you should implement `try_size_hint` and call the
/// `try_size_hint` when recursing
///
///
/// The return value is similar to [`core::iter::Iterator::size_hint`]: it
/// returns a tuple where the first element is a lower bound on the number
/// of bytes required, and the second element is an optional upper bound.
///
/// The default implementation returns the value of [`size_hint`] which is
/// correct for any type, but might lead to exponential blowup when dealing
/// with recursive types.
///
/// ## Invariant
///
/// It must be possible to construct every possible output using only inputs
/// of lengths bounded by these parameters. This applies to both
/// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`].
///
/// This is trivially true for `(0, None)`. To restrict this further, it
/// must be proven that all inputs that are now excluded produced redundant
/// outputs which are still possible to produce using the reduced input
/// space.
///
/// ## When to implement `try_size_hint`
///
/// If you 100% know that the type you are implementing `Arbitrary` for is
/// not a recursive type, or your implementation is not transitively calling
/// any other `size_hint` methods, you may implement [`size_hint`], and the
/// default `try_size_hint` implementation will use it.
///
/// Note that if you are implementing `Arbitrary` for a generic type, you
/// cannot guarantee the lack of type recursion!
///
/// Otherwise, when there is possible type recursion, you should implement
/// `try_size_hint` instead.
///
/// ## The `depth` parameter
///
/// When implementing `try_size_hint`, you need to use
/// [`arbitrary::size_hint::try_recursion_guard(depth)`][crate::size_hint::try_recursion_guard]
/// to prevent potential infinite recursion when calculating size hints for
/// potentially recursive types:
///
/// ```
/// use arbitrary::{size_hint, Arbitrary, MaxRecursionReached, Unstructured};
///
/// // This can potentially be a recursive type if `L` or `R` contain
/// // something like `Box<Option<MyEither<L, R>>>`!
/// enum MyEither<L, R> {
/// Left(L),
/// Right(R),
/// }
///
/// impl<'a, L, R> Arbitrary<'a> for MyEither<L, R>
/// where
/// L: Arbitrary<'a>,
/// R: Arbitrary<'a>,
/// {
/// fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
/// // ...
/// # unimplemented!()
/// }
///
/// fn size_hint(depth: usize) -> (usize, Option<usize>) {
/// // Return the value of `try_size_hint`
/// //
/// // If the recursion fails, return the default `(0, None)` range,
/// // which is always valid.
/// Self::try_size_hint(depth).unwrap_or_default()
/// }
///
/// fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
/// // Protect against potential infinite recursion with
/// // `try_recursion_guard`.
/// size_hint::try_recursion_guard(depth, |depth| {
/// // If we aren't too deep, then `recursion_guard` calls
/// // this closure, which implements the natural size hint.
/// // Don't forget to use the new `depth` in all nested
/// // `try_size_hint` calls! We recommend shadowing the
/// // parameter, like what is done here, so that you can't
/// // accidentally use the wrong depth.
/// Ok(size_hint::or(
/// <L as Arbitrary>::try_size_hint(depth)?,
/// <R as Arbitrary>::try_size_hint(depth)?,
/// ))
/// })
/// }
/// }
/// ```
#[inline]
fn try_size_hint(depth: usize) -> Result<(usize, Option<usize>), MaxRecursionReached> {
Ok(Self::size_hint(depth))
}
}
/// Multiple conflicting arbitrary attributes are used on the same field:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// struct Point {
/// #[arbitrary(value = 2)]
/// #[arbitrary(value = 2)]
/// x: i32,
/// }
/// ```
///
/// An unknown attribute:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// struct Point {
/// #[arbitrary(unknown_attr)]
/// x: i32,
/// }
/// ```
///
/// An unknown attribute with a value:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// struct Point {
/// #[arbitrary(unknown_attr = 13)]
/// x: i32,
/// }
/// ```
///
/// `value` without RHS:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// struct Point {
/// #[arbitrary(value)]
/// x: i32,
/// }
/// ```
///
/// `with` without RHS:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// struct Point {
/// #[arbitrary(with)]
/// x: i32,
/// }
/// ```
///
/// Multiple conflicting bounds at the container-level:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// #[arbitrary(bound = "T: Default")]
/// #[arbitrary(bound = "T: Default")]
/// struct Point<T: Default> {
/// #[arbitrary(default)]
/// x: T,
/// }
/// ```
///
/// Multiple conflicting bounds in a single bound attribute:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// #[arbitrary(bound = "T: Default, T: Default")]
/// struct Point<T: Default> {
/// #[arbitrary(default)]
/// x: T,
/// }
/// ```
///
/// Multiple conflicting bounds in multiple bound attributes:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// #[arbitrary(bound = "T: Default", bound = "T: Default")]
/// struct Point<T: Default> {
/// #[arbitrary(default)]
/// x: T,
/// }
/// ```
///
/// Too many bounds supplied:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// #[arbitrary(bound = "T: Default")]
/// struct Point {
/// x: i32,
/// }
/// ```
///
/// Too many bounds supplied across multiple attributes:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// #[arbitrary(bound = "T: Default")]
/// #[arbitrary(bound = "U: Default")]
/// struct Point<T: Default> {
/// #[arbitrary(default)]
/// x: T,
/// }
/// ```
///
/// Attempt to use the derive attribute on an enum variant:
/// ```compile_fail
/// #[derive(::arbitrary::Arbitrary)]
/// enum Enum<T: Default> {
/// #[arbitrary(default)]
/// Variant(T),
/// }
/// ```
#[cfg(all(doctest, feature = "derive"))]
pub struct CompileFailTests;