miri/
range_map.rs

1//! Implements a map from integer indices to data.
2//! Rather than storing data for every index, internally, this maps entire ranges to the data.
3//! To this end, the APIs all work on ranges, not on individual integers. Ranges are split as
4//! necessary (e.g., when [0,5) is first associated with X, and then [1,2) is mutated).
5//! Users must not depend on whether a range is coalesced or not, even though this is observable
6//! via the iteration APIs.
7
8use std::ops;
9
10use rustc_abi::Size;
11
12#[derive(Clone, Debug)]
13struct Elem<T> {
14    /// The range covered by this element; never empty.
15    range: ops::Range<u64>,
16    /// The data stored for this element.
17    data: T,
18}
19#[derive(Clone, Debug)]
20pub struct RangeMap<T> {
21    v: Vec<Elem<T>>,
22}
23
24impl<T> RangeMap<T> {
25    /// Creates a new `RangeMap` for the given size, and with the given initial value used for
26    /// the entire range.
27    #[inline(always)]
28    pub fn new(size: Size, init: T) -> RangeMap<T> {
29        let size = size.bytes();
30        let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() };
31        RangeMap { v }
32    }
33
34    pub fn size(&self) -> Size {
35        let size = self.v.last().map(|x| x.range.end).unwrap_or(0);
36        Size::from_bytes(size)
37    }
38
39    /// Finds the index containing the given offset.
40    fn find_offset(&self, offset: u64) -> usize {
41        self.v
42            .binary_search_by(|elem| -> std::cmp::Ordering {
43                if offset < elem.range.start {
44                    // We are too far right (offset is further left).
45                    // (`Greater` means that `elem` is greater than the desired target.)
46                    std::cmp::Ordering::Greater
47                } else if offset >= elem.range.end {
48                    // We are too far left (offset is further right).
49                    std::cmp::Ordering::Less
50                } else {
51                    // This is it!
52                    std::cmp::Ordering::Equal
53                }
54            })
55            .unwrap()
56    }
57
58    /// Provides read-only iteration over everything in the given range. This does
59    /// *not* split items if they overlap with the edges. Do not use this to mutate
60    /// through interior mutability.
61    ///
62    /// The iterator also provides the range of the given element.
63    /// How exactly the ranges are split can differ even for otherwise identical
64    /// maps, so user-visible behavior should never depend on the exact range.
65    pub fn iter(&self, offset: Size, len: Size) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
66        let offset = offset.bytes();
67        let len = len.bytes();
68        // Compute a slice starting with the elements we care about.
69        let slice: &[Elem<T>] = if len == 0 {
70            // We just need any empty iterator. We don't even want to
71            // yield the element that surrounds this position.
72            &[]
73        } else {
74            let first_idx = self.find_offset(offset);
75            &self.v[first_idx..]
76        };
77        // The first offset that is not included any more.
78        let end = offset + len;
79        assert!(end <= self.size().bytes(), "iterating beyond the bounds of this RangeMap");
80        slice
81            .iter()
82            .take_while(move |elem| elem.range.start < end)
83            .map(|elem| (elem.range.clone(), &elem.data))
84    }
85
86    /// Provides mutable iteration over all elements.
87    /// The iterator also provides the range of the given element.
88    /// How exactly the ranges are split can differ even for otherwise identical
89    /// maps, so user-visible behavior should never depend on the exact range.
90    pub fn iter_mut_all(&mut self) -> impl Iterator<Item = (ops::Range<u64>, &mut T)> {
91        self.v.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
92    }
93
94    /// Provides iteration over all elements.
95    /// The iterator also provides the range of the given element.
96    /// How exactly the ranges are split can differ even for otherwise identical
97    /// maps, so user-visible behavior should never depend on the exact range.
98    pub fn iter_all(&self) -> impl Iterator<Item = (ops::Range<u64>, &T)> {
99        self.v.iter().map(|elem| (elem.range.clone(), &elem.data))
100    }
101
102    // Splits the element situated at the given `index`, such that the 2nd one starts at offset
103    // `split_offset`. Do nothing if the element already starts there.
104    // Returns whether a split was necessary.
105    fn split_index(&mut self, index: usize, split_offset: u64) -> bool
106    where
107        T: Clone,
108    {
109        let elem = &mut self.v[index];
110        if split_offset == elem.range.start || split_offset == elem.range.end {
111            // Nothing to do.
112            return false;
113        }
114        debug_assert!(
115            elem.range.contains(&split_offset),
116            "the `split_offset` is not in the element to be split"
117        );
118
119        // Now we really have to split. Reduce length of first element.
120        let second_range = split_offset..elem.range.end;
121        elem.range.end = split_offset;
122        // Copy the data, and insert second element.
123        let second = Elem { range: second_range, data: elem.data.clone() };
124        self.v.insert(index + 1, second);
125        true
126    }
127
128    /// Provides mutable iteration over everything in the given range. As a side-effect,
129    /// this will split entries in the map that are only partially hit by the given range,
130    /// to make sure that when they are mutated, the effect is constrained to the given range.
131    /// Moreover, this will opportunistically merge neighbouring equal blocks.
132    ///
133    /// The iterator also provides the range of the given element.
134    /// How exactly the ranges are split (both prior to and resulting from the execution of this
135    /// function) can differ even for otherwise identical maps,
136    /// so user-visible behavior should never depend on the exact range.
137    pub fn iter_mut(
138        &mut self,
139        offset: Size,
140        len: Size,
141    ) -> impl Iterator<Item = (ops::Range<u64>, &mut T)>
142    where
143        T: Clone + PartialEq,
144    {
145        let offset = offset.bytes();
146        let len = len.bytes();
147        // Compute a slice containing exactly the elements we care about
148        let slice: &mut [Elem<T>] = if len == 0 {
149            // We just need any empty iterator. We don't even want to
150            // yield the element that surrounds this position, nor do
151            // any splitting.
152            &mut []
153        } else {
154            // Make sure we got a clear beginning
155            let mut first_idx = self.find_offset(offset);
156            if self.split_index(first_idx, offset) {
157                // The newly created 2nd element is ours
158                first_idx += 1;
159            }
160            // No more mutation.
161            let first_idx = first_idx;
162            // Find our end. Linear scan, but that's ok because the iteration
163            // is doing the same linear scan anyway -- no increase in complexity.
164            // We combine this scan with a scan for duplicates that we can merge, to reduce
165            // the number of elements.
166            // We stop searching after the first "block" of size 1, to avoid spending excessive
167            // amounts of time on the merging.
168            let mut equal_since_idx = first_idx;
169            // Once we see too many non-mergeable blocks, we stop.
170            // The initial value is chosen via... magic. Benchmarking and magic.
171            let mut successful_merge_count = 3usize;
172            // When the loop is done, this is the first excluded element.
173            let mut end_idx = first_idx;
174            loop {
175                // Compute if `end` is the last element we need to look at.
176                let done = self.v[end_idx].range.end >= offset + len;
177                // We definitely need to include `end`, so move the index.
178                end_idx += 1;
179                debug_assert!(
180                    done || end_idx < self.v.len(),
181                    "iter_mut: end-offset {} is out-of-bounds",
182                    offset + len
183                );
184                // see if we want to merge everything in `equal_since..end` (exclusive at the end!)
185                if successful_merge_count > 0 {
186                    if done || self.v[end_idx].data != self.v[equal_since_idx].data {
187                        // Everything in `equal_since..end` was equal. Make them just one element covering
188                        // the entire range.
189                        let removed_elems = end_idx - equal_since_idx - 1; // number of elements that we would remove
190                        if removed_elems > 0 {
191                            // Adjust the range of the first element to cover all of them.
192                            let equal_until = self.v[end_idx - 1].range.end; // end of range of last of the equal elements
193                            self.v[equal_since_idx].range.end = equal_until;
194                            // Delete the rest of them.
195                            self.v.splice(equal_since_idx + 1..end_idx, std::iter::empty());
196                            // Adjust `end_idx` because we made the list shorter.
197                            end_idx -= removed_elems;
198                            // Adjust the count for the cutoff.
199                            successful_merge_count += removed_elems;
200                        } else {
201                            // Adjust the count for the cutoff.
202                            successful_merge_count -= 1;
203                        }
204                        // Go on scanning for the next block starting here.
205                        equal_since_idx = end_idx;
206                    }
207                }
208                // Leave loop if this is the last element.
209                if done {
210                    break;
211                }
212            }
213            // Move to last included instead of first excluded index.
214            let end_idx = end_idx - 1;
215            // We need to split the end as well. Even if this performs a
216            // split, we don't have to adjust our index as we only care about
217            // the first part of the split.
218            self.split_index(end_idx, offset + len);
219            // Now we yield the slice. `end` is inclusive.
220            &mut self.v[first_idx..=end_idx]
221        };
222        slice.iter_mut().map(|elem| (elem.range.clone(), &mut elem.data))
223    }
224
225    /// Remove all adjacent duplicates
226    pub fn merge_adjacent_thorough(&mut self)
227    where
228        T: PartialEq,
229    {
230        let clean = Vec::with_capacity(self.v.len());
231        for elem in std::mem::replace(&mut self.v, clean) {
232            if let Some(prev) = self.v.last_mut() {
233                if prev.data == elem.data {
234                    assert_eq!(prev.range.end, elem.range.start);
235                    prev.range.end = elem.range.end;
236                    continue;
237                }
238            }
239            self.v.push(elem);
240        }
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247
248    /// Query the map at every offset in the range and collect the results.
249    fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> {
250        (offset..offset + len)
251            .map(|i| {
252                map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
253            })
254            .collect()
255    }
256
257    #[test]
258    fn basic_insert() {
259        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
260        // Insert.
261        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
262            *x = 42;
263        }
264        // Check.
265        assert_eq!(to_vec(&map, 10, 1), vec![42]);
266        assert_eq!(map.v.len(), 3);
267
268        // Insert with size 0.
269        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(0)) {
270            *x = 19;
271        }
272        for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(0)) {
273            *x = 19;
274        }
275        assert_eq!(to_vec(&map, 10, 2), vec![42, -1]);
276        assert_eq!(map.v.len(), 3);
277    }
278
279    #[test]
280    fn gaps() {
281        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
282        for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
283            *x = 42;
284        }
285        for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(1)) {
286            *x = 43;
287        }
288        assert_eq!(map.v.len(), 5);
289        assert_eq!(to_vec(&map, 10, 10), vec![-1, 42, -1, -1, -1, 43, -1, -1, -1, -1]);
290
291        for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(10)) {
292            if *x < 42 {
293                *x = 23;
294            }
295        }
296        assert_eq!(map.v.len(), 6);
297        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 43, 23, 23, 23, 23]);
298        assert_eq!(to_vec(&map, 13, 5), vec![23, 23, 43, 23, 23]);
299
300        for (_, x) in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {
301            *x = 19;
302        }
303        assert_eq!(map.v.len(), 6);
304        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
305        // Should be seeing two blocks with 19.
306        assert_eq!(
307            map.iter(Size::from_bytes(15), Size::from_bytes(2))
308                .map(|(_, &t)| t)
309                .collect::<Vec<_>>(),
310            vec![19, 19]
311        );
312
313        // A NOP `iter_mut` should trigger merging.
314        for _ in map.iter_mut(Size::from_bytes(15), Size::from_bytes(5)) {}
315        assert_eq!(map.v.len(), 5);
316        assert_eq!(to_vec(&map, 10, 10), vec![23, 42, 23, 23, 23, 19, 19, 19, 19, 19]);
317    }
318
319    #[test]
320    #[should_panic]
321    fn out_of_range_iter_mut() {
322        let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
323        let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
324    }
325
326    #[test]
327    #[should_panic]
328    fn out_of_range_iter() {
329        let map = RangeMap::<i32>::new(Size::from_bytes(20), -1);
330        let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
331    }
332
333    #[test]
334    fn empty_map_iter() {
335        let map = RangeMap::<i32>::new(Size::from_bytes(0), -1);
336        let _ = map.iter(Size::from_bytes(0), Size::from_bytes(0));
337    }
338
339    #[test]
340    fn empty_map_iter_mut() {
341        let mut map = RangeMap::<i32>::new(Size::from_bytes(0), -1);
342        let _ = map.iter_mut(Size::from_bytes(0), Size::from_bytes(0));
343    }
344}