bootstrap/utils/
cache.rs

1//! This module helps you efficiently store and retrieve values using interning.
2//!
3//! Interning is a neat trick that keeps only one copy of identical values, saving memory
4//! and making comparisons super fast. Here, we provide the `Interned<T>` struct and the `Internable` trait
5//! to make interning easy for different data types.
6//!
7//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec<String>`,
8//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results.
9//!
10//! # Thread Safety
11//!
12//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is
13//! interned, it sticks around for the entire lifetime of the program.
14
15use std::any::{Any, TypeId};
16use std::borrow::Borrow;
17use std::cell::RefCell;
18use std::cmp::Ordering;
19use std::collections::HashMap;
20use std::hash::{Hash, Hasher};
21use std::marker::PhantomData;
22use std::ops::Deref;
23use std::sync::{LazyLock, Mutex};
24use std::{fmt, mem};
25
26use crate::core::builder::Step;
27
28/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval.
29///
30/// This struct stores a unique index referencing the interned value within an internal cache.
31pub struct Interned<T>(usize, PhantomData<*const T>);
32
33impl<T: Internable + Default> Default for Interned<T> {
34    fn default() -> Self {
35        T::default().intern()
36    }
37}
38
39impl<T> Copy for Interned<T> {}
40impl<T> Clone for Interned<T> {
41    fn clone(&self) -> Interned<T> {
42        *self
43    }
44}
45
46impl<T> PartialEq for Interned<T> {
47    fn eq(&self, other: &Self) -> bool {
48        self.0 == other.0
49    }
50}
51impl<T> Eq for Interned<T> {}
52
53impl PartialEq<&str> for Interned<String> {
54    fn eq(&self, other: &&str) -> bool {
55        **self == **other
56    }
57}
58
59unsafe impl<T> Send for Interned<T> {}
60unsafe impl<T> Sync for Interned<T> {}
61
62impl fmt::Display for Interned<String> {
63    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64        let s: &str = self;
65        f.write_str(s)
66    }
67}
68
69impl<T, U: ?Sized + fmt::Debug> fmt::Debug for Interned<T>
70where
71    Self: Deref<Target = U>,
72{
73    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74        let s: &U = self;
75        f.write_fmt(format_args!("{s:?}"))
76    }
77}
78
79impl<T: Internable + Hash> Hash for Interned<T> {
80    fn hash<H: Hasher>(&self, state: &mut H) {
81        let l = T::intern_cache().lock().unwrap();
82        l.get(*self).hash(state)
83    }
84}
85
86impl<T: Internable + Deref> Deref for Interned<T> {
87    type Target = T::Target;
88    fn deref(&self) -> &Self::Target {
89        let l = T::intern_cache().lock().unwrap();
90        unsafe { mem::transmute::<&Self::Target, &Self::Target>(l.get(*self)) }
91    }
92}
93
94impl<T: Internable + AsRef<U>, U: ?Sized> AsRef<U> for Interned<T> {
95    fn as_ref(&self) -> &U {
96        let l = T::intern_cache().lock().unwrap();
97        unsafe { mem::transmute::<&U, &U>(l.get(*self).as_ref()) }
98    }
99}
100
101impl<T: Internable + PartialOrd> PartialOrd for Interned<T> {
102    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
103        let l = T::intern_cache().lock().unwrap();
104        l.get(*self).partial_cmp(l.get(*other))
105    }
106}
107
108impl<T: Internable + Ord> Ord for Interned<T> {
109    fn cmp(&self, other: &Self) -> Ordering {
110        let l = T::intern_cache().lock().unwrap();
111        l.get(*self).cmp(l.get(*other))
112    }
113}
114
115/// A structure for managing the interning of values of type `T`.
116///
117/// `TyIntern<T>` maintains a mapping between values and their interned representations,
118/// ensuring that duplicate values are not stored multiple times.
119struct TyIntern<T: Clone + Eq> {
120    items: Vec<T>,
121    set: HashMap<T, Interned<T>>,
122}
123
124impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
125    fn default() -> Self {
126        TyIntern { items: Vec::new(), set: Default::default() }
127    }
128}
129
130impl<T: Hash + Clone + Eq> TyIntern<T> {
131    /// Interns a borrowed value, ensuring it is stored uniquely.
132    ///
133    /// If the value has been previously interned, the same `Interned<T>` instance is returned.
134    fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
135    where
136        B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
137        T: Borrow<B>,
138    {
139        if let Some(i) = self.set.get(item) {
140            return *i;
141        }
142        let item = item.to_owned();
143        let interned = Interned(self.items.len(), PhantomData::<*const T>);
144        self.set.insert(item.clone(), interned);
145        self.items.push(item);
146        interned
147    }
148
149    /// Interns an owned value, storing it uniquely.
150    ///
151    /// If the value has been previously interned, the existing `Interned<T>` is returned.
152    fn intern(&mut self, item: T) -> Interned<T> {
153        if let Some(i) = self.set.get(&item) {
154            return *i;
155        }
156        let interned = Interned(self.items.len(), PhantomData::<*const T>);
157        self.set.insert(item.clone(), interned);
158        self.items.push(item);
159        interned
160    }
161
162    /// Retrieves a reference to the interned value associated with the given `Interned<T>` instance.
163    fn get(&self, i: Interned<T>) -> &T {
164        &self.items[i.0]
165    }
166}
167
168/// A global interner for managing interned values of common types.
169///
170/// This structure maintains caches for `String`, `PathBuf`, and `Vec<String>`, ensuring efficient storage
171/// and retrieval of frequently used values.
172#[derive(Default)]
173pub struct Interner {
174    strs: Mutex<TyIntern<String>>,
175}
176
177/// Defines the behavior required for a type to be internable.
178///
179/// Types implementing this trait must provide access to a static cache and define an `intern` method
180/// that ensures values are stored uniquely.
181trait Internable: Clone + Eq + Hash + 'static {
182    fn intern_cache() -> &'static Mutex<TyIntern<Self>>;
183
184    fn intern(self) -> Interned<Self> {
185        Self::intern_cache().lock().unwrap().intern(self)
186    }
187}
188
189impl Internable for String {
190    fn intern_cache() -> &'static Mutex<TyIntern<Self>> {
191        &INTERNER.strs
192    }
193}
194
195impl Interner {
196    /// Interns a string reference, ensuring it is stored uniquely.
197    ///
198    /// If the string has been previously interned, the same `Interned<String>` instance is returned.
199    pub fn intern_str(&self, s: &str) -> Interned<String> {
200        self.strs.lock().unwrap().intern_borrow(s)
201    }
202}
203
204/// A global instance of `Interner` that caches common interned values.
205pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);
206
207/// This is essentially a `HashMap` which allows storing any type in its input and
208/// any type in its output. It is a write-once cache; values are never evicted,
209/// which means that references to the value can safely be returned from the
210/// `get()` method.
211#[derive(Debug)]
212pub struct Cache(
213    RefCell<
214        HashMap<
215            TypeId,
216            Box<dyn Any>, // actually a HashMap<Step, Interned<Step::Output>>
217        >,
218    >,
219);
220
221impl Cache {
222    /// Creates a new empty cache.
223    pub fn new() -> Cache {
224        Cache(RefCell::new(HashMap::new()))
225    }
226
227    /// Stores the result of a computation step in the cache.
228    pub fn put<S: Step>(&self, step: S, value: S::Output) {
229        let mut cache = self.0.borrow_mut();
230        let type_id = TypeId::of::<S>();
231        let stepcache = cache
232            .entry(type_id)
233            .or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
234            .downcast_mut::<HashMap<S, S::Output>>()
235            .expect("invalid type mapped");
236        assert!(!stepcache.contains_key(&step), "processing {step:?} a second time");
237        stepcache.insert(step, value);
238    }
239
240    /// Retrieves a cached result for the given step, if available.
241    pub fn get<S: Step>(&self, step: &S) -> Option<S::Output> {
242        let mut cache = self.0.borrow_mut();
243        let type_id = TypeId::of::<S>();
244        let stepcache = cache
245            .entry(type_id)
246            .or_insert_with(|| Box::<HashMap<S, S::Output>>::default())
247            .downcast_mut::<HashMap<S, S::Output>>()
248            .expect("invalid type mapped");
249        stepcache.get(step).cloned()
250    }
251}
252
253#[cfg(test)]
254impl Cache {
255    pub fn all<S: Ord + Clone + Step>(&mut self) -> Vec<(S, S::Output)> {
256        let cache = self.0.get_mut();
257        let type_id = TypeId::of::<S>();
258        let mut v = cache
259            .remove(&type_id)
260            .map(|b| b.downcast::<HashMap<S, S::Output>>().expect("correct type"))
261            .map(|m| m.into_iter().collect::<Vec<_>>())
262            .unwrap_or_default();
263        v.sort_by_key(|(s, _)| s.clone());
264        v
265    }
266
267    pub fn contains<S: Step>(&self) -> bool {
268        self.0.borrow().contains_key(&TypeId::of::<S>())
269    }
270}
271
272#[cfg(test)]
273mod tests;