blob: 3c7b2a7698a73f4255b8ee9d065aba33d9b9658f [file] [log] [blame]
//! An efficient way to check whether a given package has been yanked
use std::{
collections::{BTreeSet, HashMap},
time::Duration,
};
use crate::{
error::{Error, ErrorKind},
package::{self, Package},
};
pub use tame_index::external::reqwest::ClientBuilder;
use tame_index::utils::flock::{FileLock, LockOptions};
enum Index {
Git(tame_index::index::RemoteGitIndex),
SparseCached(tame_index::index::SparseIndex),
SparseRemote(tame_index::index::AsyncRemoteSparseIndex),
}
impl Index {
#[inline]
fn krate(
&self,
name: &package::Name,
lock: &FileLock,
) -> Result<Option<tame_index::IndexKrate>, Error> {
let name = name.as_str().try_into().map_err(Error::from_tame)?;
let res = match self {
Self::Git(gi) => gi.krate(name, true, lock),
Self::SparseCached(si) => si.cached_krate(name, lock),
Self::SparseRemote(rsi) => rsi.cached_krate(name, lock),
}
.map_err(Error::from_tame)?;
Ok(res)
}
}
/// Provides an efficient way to check if the given package has been yanked.
///
/// Operations on crates.io index are rather slow.
/// Instead of peforming an index lookup for every version of every crate,
/// this implementation looks up each crate only once and caches the result in memory.
///
/// Please note that this struct will hold a global Cargo package lock while it exists.
/// Cargo operations that download crates (e.g. `cargo update` or even `cargo build`)
/// will not be possible while this lock is held.
pub struct CachedIndex {
index: Index,
/// The inner hash map is logically HashMap<Version, IsYanked>
/// but we don't parse semver because crates.io registry contains invalid semver:
/// <https://github.com/rustsec/rustsec/issues/759>
cache: HashMap<package::Name, Result<Option<HashMap<String, bool>>, Error>>,
/// The lock we hold on the Cargo cache directory
lock: FileLock,
}
impl CachedIndex {
/// Open the local crates.io index
///
/// If this opens a git index, it will perform a fetch to get the latest index
/// information.
///
/// If this is a sparse index, it will be downloaded later on demand.
///
/// ## Locking
///
/// This function will wait for up to `lock_timeout` for the filesystem lock on the repository.
/// It will fail with [`rustsec::Error::LockTimeout`](Error) if the lock is still held
/// after that time.
///
/// If `lock_timeout` is set to `std::time::Duration::from_secs(0)`, it will not wait at all,
/// and instead return an error immediately if it fails to aquire the lock.
pub fn fetch(client: Option<ClientBuilder>, lock_timeout: Duration) -> Result<Self, Error> {
Self::fetch_inner(client, lock_timeout).map_err(Error::from_tame)
}
fn fetch_inner(
client: Option<ClientBuilder>,
lock_timeout: Duration,
) -> Result<Self, tame_index::Error> {
let index = tame_index::index::ComboIndexCache::new(tame_index::IndexLocation::new(
tame_index::IndexUrl::crates_io(None, None, None)?,
))?;
let lock = acquire_cargo_package_lock(lock_timeout)?;
let index = match index {
tame_index::index::ComboIndexCache::Git(gi) => {
let mut rgi = tame_index::index::RemoteGitIndex::new(gi, &lock)?;
rgi.fetch(&lock)?;
Index::Git(rgi)
}
tame_index::index::ComboIndexCache::Sparse(si) => {
let client_builder = client.unwrap_or_default();
// note: this would need to change if rustsec ever adds the capability
// to query other indices that _might_ not support HTTP/2, but
// hopefully that would never need to happen
let client = client_builder.build().map_err(tame_index::Error::from)?;
Index::SparseRemote(tame_index::index::AsyncRemoteSparseIndex::new(si, client))
}
_ => panic!("Unsupported crates.io index type"),
};
Ok(CachedIndex {
index,
cache: Default::default(),
lock,
})
}
/// Open the local crates.io index
///
/// If this opens a git index, it allows reading of index entries from the repository.
///
/// If this is a sparse index, it only allows reading of index entries that are already cached locally.
///
/// ## Locking
///
/// This function will wait for up to `lock_timeout` for the filesystem lock on the repository.
/// It will fail with [`rustsec::Error::LockTimeout`](Error) if the lock is still held
/// after that time.
///
/// If `lock_timeout` is set to `std::time::Duration::from_secs(0)`, it will not wait at all,
/// and instead return an error immediately if it fails to aquire the lock.
pub fn open(lock_timeout: Duration) -> Result<Self, Error> {
Self::open_inner(lock_timeout).map_err(Error::from_tame)
}
fn open_inner(lock_timeout: Duration) -> Result<Self, tame_index::Error> {
let index = tame_index::index::ComboIndexCache::new(tame_index::IndexLocation::new(
tame_index::IndexUrl::crates_io(None, None, None)?,
))?;
let lock = acquire_cargo_package_lock(lock_timeout)?;
let index = match index {
tame_index::index::ComboIndexCache::Git(gi) => {
let rgi = tame_index::index::RemoteGitIndex::new(gi, &lock)?;
Index::Git(rgi)
}
tame_index::index::ComboIndexCache::Sparse(si) => Index::SparseCached(si),
_ => panic!("Unsupported crates.io index type"),
};
Ok(CachedIndex {
index,
cache: Default::default(),
lock,
})
}
/// Populates the cache entries for all of the specified crates.
fn populate_cache(&mut self, mut packages: BTreeSet<&package::Name>) -> Result<(), Error> {
// only look up info on packages that aren't yet cached
packages.retain(|pkg| !self.cache.contains_key(pkg));
match &self.index {
Index::Git(_) | Index::SparseCached(_) => {
for pkg in packages {
self.insert(pkg.to_owned(), self.index.krate(pkg, &self.lock));
}
}
Index::SparseRemote(rsi) => {
// Ensure we have a runtime
let rt = tame_index::external::tokio::runtime::Runtime::new().map_err(|err| {
format_err!(
ErrorKind::Registry,
"unable to start a tokio runtime: {}",
err
)
})?;
let _rt = rt.enter();
/// This is the timeout per individual crate. If a crate fails to be
/// requested for a retriable reason then it will be retried until
/// this time limit is reached
const REQUEST_TIMEOUT: Option<Duration> = Some(Duration::from_secs(10));
let results = rsi
.krates_blocking(
packages
.into_iter()
.map(|p| p.as_str().to_owned())
.collect(),
true,
REQUEST_TIMEOUT,
&self.lock,
)
.map_err(|err| {
format_err!(
ErrorKind::Registry,
"unable to acquire tokio runtime: {}",
err
)
})?;
for (name, res) in results {
self.insert(
name.parse().expect("this was a package name before"),
res.map_err(Error::from_tame),
);
}
}
}
Ok(())
}
#[inline]
fn insert(
&mut self,
package: package::Name,
krate_res: Result<Option<tame_index::IndexKrate>, Error>,
) {
let krate_res = krate_res.map(|ik| {
ik.map(|ik| {
ik.versions
.into_iter()
.map(|v| (v.version.to_string(), v.is_yanked()))
.collect()
})
});
self.cache.insert(package, krate_res);
}
/// Is the given package yanked?
fn is_yanked(&mut self, package: &Package) -> Result<bool, Error> {
if !self.cache.contains_key(&package.name) {
self.insert(
package.name.to_owned(),
self.index.krate(&package.name, &self.lock),
);
}
match &self.cache[&package.name] {
Ok(Some(ik)) => match ik.get(&package.version.to_string()) {
Some(is_yanked) => Ok(*is_yanked),
None => Err(format_err!(
ErrorKind::NotFound,
"No such version in crates.io index: {} {}",
&package.name,
&package.version
)),
},
Ok(None) => Err(format_err!(
ErrorKind::NotFound,
"No such crate in crates.io index: {}",
&package.name,
)),
Err(err) => Err(format_err!(
ErrorKind::Registry,
"Failed to retrieve {} from crates.io index: {}",
&package.name,
err,
)),
}
}
/// Iterate over the provided packages, returning a vector of the
/// packages which have been yanked.
///
/// This function should be called with many packages at once rather than one by one;
/// that way it can download the status of a large number of packages at once from the sparse index
/// very quickly, orders of magnitude faster than requesting packages one by one.
pub fn find_yanked<'a, I>(&mut self, packages: I) -> Vec<Result<&'a Package, Error>>
where
I: IntoIterator<Item = &'a Package>,
{
let mut yanked = Vec::new();
let dedup_packages: BTreeSet<&Package> = packages.into_iter().collect();
let package_names: BTreeSet<&package::Name> =
dedup_packages.iter().map(|p| &p.name).collect();
if let Err(e) = self.populate_cache(package_names) {
yanked.push(Err(Error::with_source(
ErrorKind::Registry,
"Failed to download crates.io index. \
Data may be missing or stale when checking for yanked packages."
.to_owned(),
e,
)));
}
for package in dedup_packages {
match self.is_yanked(package) {
Ok(false) => {} // not yanked, nothing to report
Ok(true) => yanked.push(Ok(package)),
Err(error) => yanked.push(Err(error)),
}
}
yanked
}
}
// We cannot expose these publicly because that would leak the `tame_index` SemVer into the public API
fn acquire_cargo_package_lock(lock_timeout: Duration) -> Result<FileLock, tame_index::Error> {
let lock_opts = LockOptions::cargo_package_lock(None)?.exclusive(false);
acquire_lock(lock_opts, lock_timeout)
}
fn acquire_lock(
lock_opts: LockOptions<'_>,
lock_timeout: Duration,
) -> Result<FileLock, tame_index::Error> {
if lock_timeout == Duration::from_secs(0) {
lock_opts.try_lock()
} else {
lock_opts.lock(|_| Some(lock_timeout))
}
}