blob: 7714f5686173127a936f2affac9411647efb4efc [file] [log] [blame]
//! Contains helpers for deduplicating dependencies and features for a single
//! crate during parsing as each individual version of a crate tends to be mostly
//! the same
//!
//! Copied from <https://github.com/frewsxcv/rust-crates-index/blob/master/src/dedupe.rs>
use crate::krate::IndexDependency;
use std::{
hash::{BuildHasherDefault, Hash, Hasher},
sync::Arc,
};
type XxSet<K> = std::collections::HashSet<K, BuildHasherDefault<twox_hash::XxHash64>>;
use super::FeatureMap;
/// Many crate versions have the same features and dependencies
#[derive(Default)]
pub(crate) struct DedupeContext {
features: XxSet<HashedFeatureMap>,
deps: XxSet<Arc<[IndexDependency]>>,
}
impl DedupeContext {
pub(crate) fn features(&mut self, features: &mut Arc<FeatureMap>) {
let features_to_dedupe = HashedFeatureMap::new(Arc::clone(features));
if let Some(has_feats) = self.features.get(&features_to_dedupe) {
*features = Arc::clone(&has_feats.map);
} else {
// keeps peak memory low (must clear, remove is leaving tombstones)
if self.features.len() > 16 * 1024 {
self.features.clear();
}
self.features.insert(features_to_dedupe);
}
}
pub(crate) fn deps(&mut self, deps: &mut Arc<[IndexDependency]>) {
if let Some(has_deps) = self.deps.get(&*deps) {
*deps = Arc::clone(has_deps);
} else {
// keeps peak memory low (must clear, remove is leaving tombstones)
if self.deps.len() > 16 * 1024 {
self.deps.clear();
}
self.deps.insert(Arc::clone(deps));
}
}
}
/// Newtype that caches hash of the hashmap (the default hashmap has a random order of the keys, so it's not cheap to hash)
#[derive(PartialEq, Eq)]
pub struct HashedFeatureMap {
pub map: Arc<FeatureMap>,
hash: u64,
}
impl Hash for HashedFeatureMap {
#[inline]
fn hash<H>(&self, hasher: &mut H)
where
H: Hasher,
{
hasher.write_u64(self.hash);
}
}
impl HashedFeatureMap {
#[inline]
pub(crate) fn new(map: Arc<FeatureMap>) -> Self {
let mut hash = 0;
for (k, v) in map.iter() {
let mut hasher = twox_hash::XxHash64::default();
k.hash(&mut hasher);
v.hash(&mut hasher);
hash ^= hasher.finish(); // XOR makes it order-independent
}
Self { map, hash }
}
}