blob: 43d595e22e624ff5999854e8c51561dc8ec9c971 [file] [log] [blame]
use super::{cache::ValidCacheEntry, FileLock, IndexCache};
use crate::{Error, HttpError, IndexKrate, KrateName};
/// The default URL of the crates.io HTTP index
pub const CRATES_IO_HTTP_INDEX: &str = "sparse+https://index.crates.io/";
/// Wrapper around managing a sparse HTTP index, re-using Cargo's local disk caches.
///
/// This implementation does no network I/O at all. If you want to make requests
/// to the remote index you may use the [`Self::make_remote_request`] and
/// [`Self::parse_remote_response`] methods, or you can enable the `sparse` feature
/// and and use [`RemoteSparseIndex`](crate::index::RemoteSparseIndex) or
/// [`AsyncRemoteSparseIndex`](crate::index::AsyncRemoteSparseIndex)
pub struct SparseIndex {
cache: IndexCache,
url: String,
}
impl SparseIndex {
/// Creates a new sparse index for the specified location
#[inline]
pub fn new(il: crate::index::IndexLocation<'_>) -> Result<Self, Error> {
if !il.url.is_sparse() {
return Err(crate::InvalidUrl {
url: il.url.as_str().to_owned(),
source: crate::InvalidUrlError::MissingSparse,
}
.into());
}
let (path, url) = il.into_parts()?;
Ok(Self {
cache: IndexCache::at_path(path),
url,
})
}
/// Get the configuration of the index.
///
/// See the [cargo docs](https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration)
pub fn index_config(&self) -> Result<super::IndexConfig, Error> {
let path = self.cache.path.join("config.json");
let bytes = std::fs::read(&path).map_err(|err| Error::IoPath(err, path))?;
Ok(serde_json::from_slice(&bytes)?)
}
/// Get the URL that can be used to fetch the index entry for the specified
/// crate
///
/// The body of a successful response for the returned URL can be parsed
/// via [`IndexKrate::from_slice`]
///
/// See [`Self::make_remote_request`] for a way to make a complete request
#[inline]
pub fn crate_url(&self, name: KrateName<'_>) -> String {
let rel_path = name.relative_path(Some('/'));
format!("{}{rel_path}", self.url())
}
/// The HTTP url of the index
#[inline]
pub fn url(&self) -> &str {
self.url.strip_prefix("sparse+").unwrap_or(&self.url)
}
/// Gets the accessor to the local index cache
#[inline]
pub fn cache(&self) -> &IndexCache {
&self.cache
}
/// Attempts to read the locally cached crate information
#[inline]
pub fn cached_krate(
&self,
name: KrateName<'_>,
lock: &FileLock,
) -> Result<Option<IndexKrate>, Error> {
self.cache.cached_krate(name, None, lock)
}
/// Creates an HTTP request that can be sent via your HTTP client of choice
/// to retrieve the current metadata for the specified crate
///
/// If specified, the etag is used instead of the possible etag stored in
/// a local cache entry, resulting in no disk I/O being performed by this
/// method
///
/// See [`Self::parse_remote_response`] processing the response from the remote
/// index
///
/// It is highly recommended to assume HTTP/2 when making requests to remote
/// indices, at least crates.io
pub fn make_remote_request(
&self,
name: KrateName<'_>,
etag: Option<&str>,
lock: &FileLock,
) -> Result<http::Request<&'static [u8]>, Error> {
use http::header;
let url = self.crate_url(name);
let mut req = http::Request::get(url);
{
let headers = req.headers_mut().unwrap();
// AFAICT this does not affect responses at the moment, but could in
// the future if there are changes to the protocol
headers.insert(
"cargo-protocol",
header::HeaderValue::from_static("version=1"),
);
// All index entries are just files with lines of JSON
headers.insert(
header::ACCEPT,
header::HeaderValue::from_static("text/plain"),
);
// We need to accept both identity and gzip, as otherwise cloudfront will
// always respond to requests with strong etag's, which will differ from
// cache entries generated by cargo
headers.insert(
header::ACCEPT_ENCODING,
header::HeaderValue::from_static("gzip"),
);
// If we have a local cache entry, include its version with the
// appropriate header, this allows the server to respond with a
// cached, or even better, empty response if its version matches
// the local one making the request/response loop basically free
// If we're unable to get the cache version we can just ignore setting the
// header, guaranteeing we'll get the full index contents if the crate exists
let set_cache_version = |headers: &mut header::HeaderMap| -> Option<()> {
let contents = self.cache.read_cache_file(name, lock).ok()??;
let valid = ValidCacheEntry::read(&contents).ok()?;
let (key, value) = valid.revision.split_once(':')?;
let value = header::HeaderValue::from_str(value.trim()).ok()?;
let name = if key == header::ETAG {
header::IF_NONE_MATCH
} else if key == header::LAST_MODIFIED {
header::IF_MODIFIED_SINCE
} else {
// We could error here, but that's kind of pointless
// since the response will be sent in full if we haven't
// specified one of the above headers. Though it does
// potentially indicate something weird is going on
return None;
};
headers.insert(name, value);
None
};
if let Some(etag) = etag {
let hv =
header::HeaderValue::from_str(etag.trim()).map_err(crate::HttpError::from)?;
headers.insert(header::IF_NONE_MATCH, hv);
} else {
// Use the etag (or last modified, though crates.io does not use this AFAICT)
// from the cache entry if it exists
let _ = set_cache_version(headers);
}
}
const EMPTY: &[u8] = &[];
Ok(req.body(EMPTY).unwrap())
}
/// Process the response to a request created by [`Self::make_remote_request`]
///
/// This handles both the scenario where the local cache is missing the specified
/// crate, or it is out of date, as well as the local entry being up to date
/// and can just be read from disk
///
/// You may specify whether an updated index entry is written locally to the
/// cache or not
///
/// Note that responses from sparse HTTP indices, at least crates.io, may
/// send responses with `gzip` compression, it is your responsibility to
/// decompress it before sending to this function
pub fn parse_remote_response(
&self,
name: KrateName<'_>,
response: http::Response<Vec<u8>>,
write_cache_entry: bool,
lock: &FileLock,
) -> Result<Option<IndexKrate>, Error> {
use http::{header, StatusCode};
let (parts, body) = response.into_parts();
match parts.status {
// The server responded with the full contents of the index entry
StatusCode::OK => {
let krate = IndexKrate::from_slice(&body)?;
if write_cache_entry {
// The same as cargo, prefer etag over last-modified
let version = if let Some(etag) = parts.headers.get(header::ETAG) {
etag.to_str()
.ok()
.map(|etag| format!("{}: {etag}", header::ETAG))
} else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) {
lm.to_str()
.ok()
.map(|lm| format!("{}: {lm}", header::LAST_MODIFIED))
} else {
None
};
let revision = version.unwrap_or_else(|| "Unknown".to_owned());
// It's unfortunate if we can't write to the cache, but we
// don't treat it as a hard error since we still have the
// index metadata
let _err = self.cache.write_to_cache(&krate, &revision, lock);
}
Ok(Some(krate))
}
// The local cache entry is up to date with the latest entry on the
// server, we can just return the local one
StatusCode::NOT_MODIFIED => self.cache.cached_krate(name, None, lock),
// The server requires authorization but the user didn't provide it
StatusCode::UNAUTHORIZED => Err(HttpError::StatusCode {
code: StatusCode::UNAUTHORIZED,
msg: "the request was not authorized",
}
.into()),
// The crate does not exist, or has been removed
StatusCode::NOT_FOUND
| StatusCode::GONE
| StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None),
code => Err(HttpError::StatusCode {
code,
msg: "the status code is invalid for this protocol",
}
.into()),
}
}
}