blob: b21ce675b71ad29a9013406f17dde48ffdd466c0 [file] [log] [blame]
use super::{FileLock, GitIndex};
use crate::{Error, IndexKrate, KrateName};
use std::sync::atomic::AtomicBool;
/// Uses a "bare" git index that fetches files directly from the repo instead of
/// using a local checkout, the same as cargo itself.
///
/// Uses cargo's cache
pub struct RemoteGitIndex {
index: GitIndex,
repo: gix::Repository,
head_commit: gix::ObjectId,
}
const DIR: gix::remote::Direction = gix::remote::Direction::Fetch;
impl RemoteGitIndex {
/// Creates a new [`Self`] that can access and write local cache entries,
/// and contact the remote index to retrieve the latest index information
///
/// Note that if a repository does not exist at the local disk path of the
/// provided [`GitIndex`], a full clone will be performed.
#[inline]
pub fn new(index: GitIndex, lock: &FileLock) -> Result<Self, Error> {
Self::with_options(
index,
gix::progress::Discard,
&gix::interrupt::IS_INTERRUPTED,
lock,
)
}
/// Breaks [`Self`] into its component parts
///
/// This method is useful if you need thread safe access to the repository
#[inline]
pub fn into_parts(self) -> (GitIndex, gix::Repository) {
(self.index, self.repo)
}
/// Creates a new [`Self`] that allows showing of progress of the the potential
/// fetch if the disk location is empty, as well as allowing interruption
/// of the fetch operation.
pub fn with_options<P>(
mut index: GitIndex,
progress: P,
should_interrupt: &AtomicBool,
_lock: &FileLock,
) -> Result<Self, Error>
where
P: gix::NestedProgress,
P::SubProgress: 'static,
{
let open_or_clone_repo = || -> Result<_, GitError> {
let mut mapping = gix::sec::trust::Mapping::default();
let open_with_complete_config =
gix::open::Options::default().permissions(gix::open::Permissions {
config: gix::open::permissions::Config {
// Be sure to get all configuration, some of which is only known by the git binary.
// That way we are sure to see all the systems credential helpers
git_binary: true,
..Default::default()
},
..Default::default()
});
mapping.reduced = open_with_complete_config.clone();
mapping.full = open_with_complete_config.clone();
// Attempt to open the repository, if it fails for any reason,
// attempt to perform a fresh clone instead
let repo = gix::ThreadSafeRepository::discover_opts(
&index.cache.path,
gix::discover::upwards::Options::default().apply_environment(),
mapping,
)
.ok()
.map(|repo| repo.to_thread_local())
.filter(|repo| {
// The `cargo` standard registry clone has no configured origin (when created with `git2`).
repo.find_remote("origin").map_or(true, |remote| {
remote
.url(DIR)
.map_or(false, |remote_url| remote_url.to_bstring() == index.url)
})
})
.or_else(|| gix::open_opts(&index.cache.path, open_with_complete_config).ok());
let res = if let Some(repo) = repo {
(repo, None)
} else {
// We need to create the directory chain ourselves, gix will fail
// if any parent directory is missing
if !index.cache.path.exists() {
std::fs::create_dir_all(&index.cache.path).map_err(|source| {
GitError::ClonePrep(Box::new(gix::clone::Error::Init(
gix::init::Error::Init(gix::create::Error::CreateDirectory {
source,
path: index.cache.path.clone().into(),
}),
)))
})?;
}
let (repo, out) = gix::prepare_clone_bare(index.url.as_str(), &index.cache.path)
.map_err(Box::new)?
.with_remote_name("origin")
.map_err(Box::new)?
.configure_remote(|remote| {
Ok(remote.with_refspecs(["+HEAD:refs/remotes/origin/HEAD"], DIR)?)
})
.fetch_only(progress, should_interrupt)
.map_err(|err| GitError::from(Box::new(err)))?;
(repo, Some(out))
};
Ok(res)
};
let (mut repo, fetch_outcome) = open_or_clone_repo()?;
if let Some(fetch_outcome) = fetch_outcome {
crate::utils::git::write_fetch_head(
&repo,
&fetch_outcome,
&repo.find_remote("origin").unwrap(),
)?;
}
repo.object_cache_size_if_unset(4 * 1024 * 1024);
let head_commit = Self::set_head(&mut index, &repo)?;
Ok(Self {
repo,
index,
head_commit,
})
}
/// Gets the local index
#[inline]
pub fn local(&self) -> &GitIndex {
&self.index
}
/// Get the configuration of the index.
///
/// See the [cargo docs](https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration)
pub fn index_config(&self) -> Result<super::IndexConfig, Error> {
let blob = self.read_blob("config.json")?.ok_or_else(|| {
Error::Io(std::io::Error::new(
std::io::ErrorKind::NotFound,
"unable to find config.json",
))
})?;
Ok(serde_json::from_slice(&blob.data)?)
}
/// Sets the head commit in the wrapped index so that cache entries can be
/// properly filtered
#[inline]
fn set_head(index: &mut GitIndex, repo: &gix::Repository) -> Result<gix::ObjectId, Error> {
let find_remote_head = || -> Result<gix::ObjectId, GitError> {
const CANDIDATE_REFS: &[&str] = &[
"FETCH_HEAD", /* the location with the most-recent updates, as written by git2 */
"origin/HEAD", /* typical refspecs update this symbolic ref to point to the actual remote ref with the fetched commit */
"origin/master", /* for good measure, resolve this branch by hand in case origin/HEAD is broken */
"HEAD",
];
let mut candidates: Vec<_> = CANDIDATE_REFS
.iter()
.enumerate()
.filter_map(|(i, refname)| {
let ref_id = repo
.find_reference(*refname)
.ok()?
.into_fully_peeled_id()
.ok()?;
let commit = ref_id.object().ok()?.try_into_commit().ok()?;
let commit_time = commit.time().ok()?.seconds;
Some((i, commit.id, commit_time))
})
.collect();
// Sort from oldest to newest, the last one will be the best reference
// we could reasonably locate, and since we are on second resolution,
// prefer the ordering of candidates if times are equal.
//
// This allows FETCH_HEAD to be authoritative, unless one of the other
// references is more up to date, which can occur in (at least) 2 scenarios:
//
// 1. The repo is a fresh clone by cargo either via git or libgit2,
// neither of which write FETCH_HEAD during clone
// 2. A fetch was performed by an external crate/program to cargo or
// ourselves that didn't update FETCH_HEAD
candidates.sort_by(|a, b| match a.2.cmp(&b.2) {
std::cmp::Ordering::Equal => b.0.cmp(&a.0),
o => o,
});
// get the most recent commit, the one with most time passed since unix epoch.
Ok(candidates
.last()
.ok_or_else(|| GitError::UnableToFindRemoteHead)?
.1)
};
let gix::ObjectId::Sha1(sha1) = find_remote_head()?;
index.set_head_commit(Some(sha1));
Ok(gix::ObjectId::Sha1(sha1))
}
/// Attempts to read the specified crate's index metadata
///
/// An attempt is first made to read the cache entry for the crate, and
/// falls back to reading the metadata from the git blob it is stored in
///
/// This method does no network I/O
pub fn krate(
&self,
name: KrateName<'_>,
write_cache_entry: bool,
lock: &FileLock,
) -> Result<Option<IndexKrate>, Error> {
if let Ok(Some(cached)) = self.cached_krate(name, lock) {
return Ok(Some(cached));
}
let Some(blob) = self.read_blob(&name.relative_path(None))? else {
return Ok(None);
};
let krate = IndexKrate::from_slice(&blob.data)?;
if write_cache_entry {
// It's unfortunate if fail to write to the cache, but we still were
// able to retrieve the contents from git
let mut hex_id = [0u8; 40];
let gix::ObjectId::Sha1(sha1) = blob.id;
let blob_id = crate::utils::encode_hex(&sha1, &mut hex_id);
let _ = self.index.write_to_cache(&krate, Some(blob_id), lock);
}
Ok(Some(krate))
}
fn read_blob(&self, path: &str) -> Result<Option<gix::ObjectDetached>, GitError> {
let tree = self
.repo
.find_object(self.head_commit)
.map_err(Box::new)?
.try_into_commit()?
.tree()?;
let mut buf = Vec::new();
let Some(entry) = tree
.lookup_entry_by_path(path, &mut buf)
.map_err(|err| GitError::BlobLookup(Box::new(err)))?
else {
return Ok(None);
};
let blob = entry
.object()
.map_err(|err| GitError::BlobLookup(Box::new(err)))?;
// Sanity check this is a blob, it _shouldn't_ be possible to get anything
// else (like a subtree), but better safe than sorry
if blob.kind != gix::object::Kind::Blob {
return Ok(None);
}
Ok(Some(blob.detach()))
}
/// Attempts to read the locally cached crate information
///
/// Note this method has improvements over using [`GitIndex::cached_krate`].
///
/// In older versions of cargo, only the head commit hash is used as the version
/// for cached crates, which means a fetch invalidates _all_ cached crates,
/// even if they have not been modified in any commits since the previous
/// fetch.
///
/// This method does the same thing as cargo, which is to allow _either_
/// the head commit oid _or_ the blob oid as a version, which is more
/// granular and means the cached crate can remain valid as long as it is
/// not updated in a subsequent fetch. [`GitIndex::cached_krate`] cannot take
/// advantage of that though as it does not have access to git and thus
/// cannot know the blob id.
#[inline]
pub fn cached_krate(
&self,
name: KrateName<'_>,
lock: &FileLock,
) -> Result<Option<IndexKrate>, Error> {
let Some(cached) = self.index.cache.read_cache_file(name, lock)? else {
return Ok(None);
};
let valid = crate::index::cache::ValidCacheEntry::read(&cached)?;
if Some(valid.revision) != self.index.head_commit() {
let Some(blob) = self.read_blob(&name.relative_path(None))? else {
return Ok(None);
};
let mut hex_id = [0u8; 40];
let gix::ObjectId::Sha1(sha1) = blob.id;
let blob_id = crate::utils::encode_hex(&sha1, &mut hex_id);
if valid.revision != blob_id {
return Ok(None);
}
}
valid.to_krate(None)
}
/// Performs a fetch from the remote index repository.
///
/// This method performs network I/O.
#[inline]
pub fn fetch(&mut self, lock: &FileLock) -> Result<(), Error> {
self.fetch_with_options(
gix::progress::Discard,
&gix::interrupt::IS_INTERRUPTED,
lock,
)
}
/// Same as [`Self::fetch`] but allows specifying a progress implementation
/// and allows interruption of the network operations
pub fn fetch_with_options<P>(
&mut self,
mut progress: P,
should_interrupt: &AtomicBool,
_lock: &FileLock,
) -> Result<(), Error>
where
P: gix::NestedProgress,
P::SubProgress: 'static,
{
// We're updating the reflog which requires a committer be set, which might
// not be the case, particular in a CI environment, but also would default
// the the git config for the current directory/global, which on a normal
// user machine would show the user was the one who updated the database which
// is kind of misleading, so we just override the config for this operation
let mut config = self.repo.config_snapshot_mut();
config
.set_raw_value("committer", None, "name", "tame-index")
.map_err(GitError::from)?;
// Note we _have_ to set the email as well, but luckily gix does not actually
// validate if it's a proper email or not :)
config
.set_raw_value("committer", None, "email", "")
.map_err(GitError::from)?;
let repo = config
.commit_auto_rollback()
.map_err(|err| GitError::from(Box::new(err)))?;
let mut remote = repo.find_remote("origin").ok().unwrap_or_else(|| {
repo.remote_at(self.index.url.as_str())
.expect("owned URL is always valid")
});
remote
.replace_refspecs(Some("+HEAD:refs/remotes/origin/HEAD"), DIR)
.expect("valid statically known refspec");
// Perform the actual fetch
let outcome = remote
.connect(DIR)
.map_err(|err| GitError::from(Box::new(err)))?
.prepare_fetch(&mut progress, Default::default())
.map_err(|err| GitError::from(Box::new(err)))?
.receive(&mut progress, should_interrupt)
.map_err(|err| GitError::from(Box::new(err)))?;
crate::utils::git::write_fetch_head(&repo, &outcome, &remote)?;
self.head_commit = Self::set_head(&mut self.index, &repo)?;
Ok(())
}
}
/// Errors that can occur during a git operation
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum GitError {
#[error(transparent)]
ClonePrep(#[from] Box<gix::clone::Error>),
#[error(transparent)]
CloneFetch(#[from] Box<gix::clone::fetch::Error>),
#[error(transparent)]
Connect(#[from] Box<gix::remote::connect::Error>),
#[error(transparent)]
FetchPrep(#[from] Box<gix::remote::fetch::prepare::Error>),
#[error(transparent)]
Fetch(#[from] Box<gix::remote::fetch::Error>),
#[error(transparent)]
Open(#[from] Box<gix::open::Error>),
#[error(transparent)]
Commit(#[from] gix::object::commit::Error),
#[error(transparent)]
InvalidObject(#[from] gix::object::try_into::Error),
#[error(transparent)]
ReferenceLookup(#[from] Box<gix::reference::find::existing::Error>),
#[error(transparent)]
BlobLookup(#[from] Box<gix::object::find::existing::Error>),
#[error(transparent)]
RemoteLookup(#[from] Box<gix::remote::find::existing::Error>),
#[error(transparent)]
Lock(#[from] gix::lock::acquire::Error),
#[error(transparent)]
RemoteName(#[from] Box<gix::remote::name::Error>),
#[error(transparent)]
Config(#[from] Box<gix::config::Error>),
#[error(transparent)]
ConfigValue(#[from] gix::config::file::set_raw_value::Error),
#[error("unable to locate remote HEAD")]
UnableToFindRemoteHead,
#[error("unable to update HEAD to remote HEAD")]
UnableToUpdateHead,
}
impl GitError {
/// Returns true if the error is a (potentially) spurious network error that
/// indicates a retry of the operation could succeed
#[inline]
pub fn is_spurious(&self) -> bool {
use gix::protocol::transport::IsSpuriousError;
match self {
Self::Fetch(fe) => return fe.is_spurious(),
Self::CloneFetch(cf) => {
if let gix::clone::fetch::Error::Fetch(fe) = &**cf {
return fe.is_spurious();
}
}
_ => {}
}
false
}
/// Returns true if a fetch could not be completed successfully due to the
/// repo being locked, and could succeed if retried
#[inline]
pub fn is_locked(&self) -> bool {
let ure = match self {
Self::Fetch(fe) => {
if let gix::remote::fetch::Error::UpdateRefs(ure) = &**fe {
ure
} else {
return false;
}
}
Self::CloneFetch(cf) => {
if let gix::clone::fetch::Error::Fetch(gix::remote::fetch::Error::UpdateRefs(ure)) =
&**cf
{
ure
} else {
return false;
}
}
Self::Lock(le) => {
return !matches!(le, gix::lock::acquire::Error::PermanentlyLocked { .. })
}
_ => return false,
};
if let gix::remote::fetch::refs::update::Error::EditReferences(ere) = ure {
match ere {
gix::reference::edit::Error::FileTransactionPrepare(ftpe) => {
use gix::refs::file::transaction::prepare::Error as PrepError;
if let PrepError::LockAcquire { source, .. }
| PrepError::PackedTransactionAcquire(source) = ftpe
{
// currently this is either io or permanentlylocked, but just in case
// more variants are added, we just assume it's possible to retry
// in anything but the permanentlylocked variant
!matches!(source, gix::lock::acquire::Error::PermanentlyLocked { .. })
} else {
false
}
}
gix::reference::edit::Error::FileTransactionCommit(ftce) => {
matches!(
ftce,
gix::refs::file::transaction::commit::Error::LockCommit { .. }
)
}
_ => false,
}
} else {
false
}
}
}