rustc_middle/mir/interpret/allocation/
provenance_map.rs

1//! Store the provenance for each byte in the range, with a more efficient
2//! representation for the common case where PTR_SIZE consecutive bytes have the same provenance.
3
4use std::cmp;
5use std::ops::Range;
6
7use rustc_abi::{HasDataLayout, Size};
8use rustc_data_structures::sorted_map::SortedMap;
9use rustc_macros::HashStable;
10use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
11use tracing::trace;
12
13use super::{AllocRange, CtfeProvenance, Provenance, alloc_range};
14use crate::mir::interpret::{AllocError, AllocResult};
15
16/// Stores the provenance information of pointers stored in memory.
17#[derive(Clone, PartialEq, Eq, Hash, Debug)]
18#[derive(HashStable)]
19pub struct ProvenanceMap<Prov = CtfeProvenance> {
20    /// `Provenance` in this map applies from the given offset for an entire pointer-size worth of
21    /// bytes. Two entries in this map are always at least a pointer size apart.
22    ptrs: SortedMap<Size, Prov>,
23    /// This stores byte-sized provenance fragments.
24    /// The `u8` indicates the position of this byte inside its original pointer.
25    /// If the bytes are re-assembled in their original order, the pointer can be used again.
26    /// Wildcard provenance is allowed to have index 0 everywhere.
27    bytes: Option<Box<SortedMap<Size, (Prov, u8)>>>,
28}
29
30// These impls are generic over `Prov` since `CtfeProvenance` is only decodable/encodable
31// for some particular `D`/`S`.
32impl<D: Decoder, Prov: Provenance + Decodable<D>> Decodable<D> for ProvenanceMap<Prov> {
33    fn decode(d: &mut D) -> Self {
34        // `bytes` is not in the serialized format
35        Self { ptrs: Decodable::decode(d), bytes: None }
36    }
37}
38impl<S: Encoder, Prov: Provenance + Encodable<S>> Encodable<S> for ProvenanceMap<Prov> {
39    fn encode(&self, s: &mut S) {
40        let Self { ptrs, bytes } = self;
41        assert!(bytes.is_none()); // interning refuses allocations with pointer fragments
42        ptrs.encode(s)
43    }
44}
45
46impl<Prov> ProvenanceMap<Prov> {
47    pub fn new() -> Self {
48        ProvenanceMap { ptrs: SortedMap::new(), bytes: None }
49    }
50
51    /// The caller must guarantee that the given provenance list is already sorted
52    /// by address and contain no duplicates.
53    pub fn from_presorted_ptrs(r: Vec<(Size, Prov)>) -> Self {
54        ProvenanceMap { ptrs: SortedMap::from_presorted_elements(r), bytes: None }
55    }
56}
57
58impl ProvenanceMap {
59    /// Give access to the ptr-sized provenances (which can also be thought of as relocations, and
60    /// indeed that is how codegen treats them).
61    ///
62    /// Only use on interned allocations, as other allocations may have per-byte provenance!
63    #[inline]
64    pub fn ptrs(&self) -> &SortedMap<Size, CtfeProvenance> {
65        assert!(self.bytes.is_none(), "`ptrs()` called on non-interned allocation");
66        &self.ptrs
67    }
68}
69
70impl<Prov: Provenance> ProvenanceMap<Prov> {
71    fn adjusted_range_ptrs(range: AllocRange, cx: &impl HasDataLayout) -> Range<Size> {
72        // We have to go back `pointer_size - 1` bytes, as that one would still overlap with
73        // the beginning of this range.
74        let adjusted_start = Size::from_bytes(
75            range.start.bytes().saturating_sub(cx.data_layout().pointer_size().bytes() - 1),
76        );
77        adjusted_start..range.end()
78    }
79
80    /// Returns all ptr-sized provenance in the given range.
81    /// If the range has length 0, returns provenance that crosses the edge between `start-1` and
82    /// `start`.
83    pub(super) fn range_ptrs_get(
84        &self,
85        range: AllocRange,
86        cx: &impl HasDataLayout,
87    ) -> &[(Size, Prov)] {
88        self.ptrs.range(Self::adjusted_range_ptrs(range, cx))
89    }
90
91    /// `pm.range_ptrs_is_empty(r, cx)` == `pm.range_ptrs_get(r, cx).is_empty()`, but is faster.
92    fn range_ptrs_is_empty(&self, range: AllocRange, cx: &impl HasDataLayout) -> bool {
93        self.ptrs.range_is_empty(Self::adjusted_range_ptrs(range, cx))
94    }
95
96    /// Returns all byte-wise provenance in the given range.
97    fn range_bytes_get(&self, range: AllocRange) -> &[(Size, (Prov, u8))] {
98        if let Some(bytes) = self.bytes.as_ref() {
99            bytes.range(range.start..range.end())
100        } else {
101            &[]
102        }
103    }
104
105    /// Same as `range_bytes_get(range).is_empty()`, but faster.
106    fn range_bytes_is_empty(&self, range: AllocRange) -> bool {
107        self.bytes.as_ref().is_none_or(|bytes| bytes.range_is_empty(range.start..range.end()))
108    }
109
110    /// Get the provenance of a single byte.
111    pub fn get_byte(&self, offset: Size, cx: &impl HasDataLayout) -> Option<(Prov, u8)> {
112        let prov = self.range_ptrs_get(alloc_range(offset, Size::from_bytes(1)), cx);
113        debug_assert!(prov.len() <= 1);
114        if let Some(entry) = prov.first() {
115            // If it overlaps with this byte, it is on this byte.
116            debug_assert!(self.bytes.as_ref().is_none_or(|b| !b.contains_key(&offset)));
117            Some((entry.1, (offset - entry.0).bytes() as u8))
118        } else {
119            // Look up per-byte provenance.
120            self.bytes.as_ref().and_then(|b| b.get(&offset).copied())
121        }
122    }
123
124    /// Gets the provenances of all bytes (including from pointers) in a range.
125    pub fn get_range(
126        &self,
127        cx: &impl HasDataLayout,
128        range: AllocRange,
129    ) -> impl Iterator<Item = Prov> {
130        let ptr_provs = self.range_ptrs_get(range, cx).iter().map(|(_, p)| *p);
131        let byte_provs = self.range_bytes_get(range).iter().map(|(_, (p, _))| *p);
132        ptr_provs.chain(byte_provs)
133    }
134
135    /// Attempt to merge per-byte provenance back into ptr chunks, if the right fragments
136    /// sit next to each other. Return `false` is that is not possible due to partial pointers.
137    pub fn merge_bytes(&mut self, cx: &impl HasDataLayout) -> bool {
138        let Some(bytes) = self.bytes.as_deref_mut() else {
139            return true;
140        };
141        if !Prov::OFFSET_IS_ADDR {
142            // FIXME(#146291): We need to ensure that we don't mix different pointers with
143            // the same provenance.
144            return false;
145        }
146        let ptr_size = cx.data_layout().pointer_size();
147        while let Some((offset, (prov, _))) = bytes.iter().next().copied() {
148            // Check if this fragment starts a pointer.
149            let range = offset..offset + ptr_size;
150            let frags = bytes.range(range.clone());
151            if frags.len() != ptr_size.bytes_usize() {
152                return false;
153            }
154            for (idx, (_offset, (frag_prov, frag_idx))) in frags.iter().copied().enumerate() {
155                if frag_prov != prov || frag_idx != idx as u8 {
156                    return false;
157                }
158            }
159            // Looks like a pointer! Move it over to the ptr provenance map.
160            bytes.remove_range(range);
161            self.ptrs.insert(offset, prov);
162        }
163        // We managed to convert everything into whole pointers.
164        self.bytes = None;
165        true
166    }
167
168    /// Check if there is ptr-sized provenance at the given index.
169    /// Does not mean anything for bytewise provenance! But can be useful as an optimization.
170    pub fn get_ptr(&self, offset: Size) -> Option<Prov> {
171        self.ptrs.get(&offset).copied()
172    }
173
174    /// Returns whether this allocation has provenance overlapping with the given range.
175    ///
176    /// Note: this function exists to allow `range_get_provenance` to be private, in order to somewhat
177    /// limit access to provenance outside of the `Allocation` abstraction.
178    ///
179    pub fn range_empty(&self, range: AllocRange, cx: &impl HasDataLayout) -> bool {
180        self.range_ptrs_is_empty(range, cx) && self.range_bytes_is_empty(range)
181    }
182
183    /// Yields all the provenances stored in this map.
184    pub fn provenances(&self) -> impl Iterator<Item = Prov> {
185        let bytes = self.bytes.iter().flat_map(|b| b.values().map(|(p, _i)| p));
186        self.ptrs.values().chain(bytes).copied()
187    }
188
189    pub fn insert_ptr(&mut self, offset: Size, prov: Prov, cx: &impl HasDataLayout) {
190        debug_assert!(self.range_empty(alloc_range(offset, cx.data_layout().pointer_size()), cx));
191        self.ptrs.insert(offset, prov);
192    }
193
194    /// Removes all provenance inside the given range.
195    /// If there is provenance overlapping with the edges, might result in an error.
196    pub fn clear(&mut self, range: AllocRange, cx: &impl HasDataLayout) {
197        let start = range.start;
198        let end = range.end();
199        // Clear the bytewise part -- this is easy.
200        if let Some(bytes) = self.bytes.as_mut() {
201            bytes.remove_range(start..end);
202        }
203
204        let pointer_size = cx.data_layout().pointer_size();
205
206        // For the ptr-sized part, find the first (inclusive) and last (exclusive) byte of
207        // provenance that overlaps with the given range.
208        let (first, last) = {
209            // Find all provenance overlapping the given range.
210            if self.range_ptrs_is_empty(range, cx) {
211                // No provenance in this range, we are done. This is the common case.
212                return;
213            }
214
215            // This redoes some of the work of `range_get_ptrs_is_empty`, but this path is much
216            // colder than the early return above, so it's worth it.
217            let provenance = self.range_ptrs_get(range, cx);
218            (provenance.first().unwrap().0, provenance.last().unwrap().0 + pointer_size)
219        };
220
221        // We need to handle clearing the provenance from parts of a pointer.
222        if first < start {
223            // Insert the remaining part in the bytewise provenance.
224            let prov = self.ptrs[&first];
225            let bytes = self.bytes.get_or_insert_with(Box::default);
226            for offset in first..start {
227                bytes.insert(offset, (prov, (offset - first).bytes() as u8));
228            }
229        }
230        if last > end {
231            let begin_of_last = last - pointer_size;
232            // Insert the remaining part in the bytewise provenance.
233            let prov = self.ptrs[&begin_of_last];
234            let bytes = self.bytes.get_or_insert_with(Box::default);
235            for offset in end..last {
236                bytes.insert(offset, (prov, (offset - begin_of_last).bytes() as u8));
237            }
238        }
239
240        // Forget all the provenance.
241        // Since provenance do not overlap, we know that removing until `last` (exclusive) is fine,
242        // i.e., this will not remove any other provenance just after the ones we care about.
243        self.ptrs.remove_range(first..last);
244    }
245
246    /// Overwrites all provenance in the given range with wildcard provenance.
247    /// Pointers partially overwritten will have their provenances preserved
248    /// bytewise on their remaining bytes.
249    ///
250    /// Provided for usage in Miri and panics otherwise.
251    pub fn write_wildcards(&mut self, cx: &impl HasDataLayout, range: AllocRange) {
252        let wildcard = Prov::WILDCARD.unwrap();
253
254        let bytes = self.bytes.get_or_insert_with(Box::default);
255
256        // Remove pointer provenances that overlap with the range, then readd the edge ones bytewise.
257        let ptr_range = Self::adjusted_range_ptrs(range, cx);
258        let ptrs = self.ptrs.range(ptr_range.clone());
259        if let Some((offset, prov)) = ptrs.first().copied() {
260            for byte_ofs in offset..range.start {
261                bytes.insert(byte_ofs, (prov, (byte_ofs - offset).bytes() as u8));
262            }
263        }
264        if let Some((offset, prov)) = ptrs.last().copied() {
265            for byte_ofs in range.end()..offset + cx.data_layout().pointer_size() {
266                bytes.insert(byte_ofs, (prov, (byte_ofs - offset).bytes() as u8));
267            }
268        }
269        self.ptrs.remove_range(ptr_range);
270
271        // Overwrite bytewise provenance.
272        for offset in range.start..range.end() {
273            // The fragment index does not matter for wildcard provenance.
274            bytes.insert(offset, (wildcard, 0));
275        }
276    }
277}
278
279/// A partial, owned list of provenance to transfer into another allocation.
280///
281/// Offsets are already adjusted to the destination allocation.
282pub struct ProvenanceCopy<Prov> {
283    dest_ptrs: Option<Box<[(Size, Prov)]>>,
284    dest_bytes: Option<Box<[(Size, (Prov, u8))]>>,
285}
286
287impl<Prov: Provenance> ProvenanceMap<Prov> {
288    pub fn prepare_copy(
289        &self,
290        src: AllocRange,
291        dest: Size,
292        count: u64,
293        cx: &impl HasDataLayout,
294    ) -> AllocResult<ProvenanceCopy<Prov>> {
295        let shift_offset = move |idx, offset| {
296            // compute offset for current repetition
297            let dest_offset = dest + src.size * idx; // `Size` operations
298            // shift offsets from source allocation to destination allocation
299            (offset - src.start) + dest_offset // `Size` operations
300        };
301        let ptr_size = cx.data_layout().pointer_size();
302
303        // # Pointer-sized provenances
304        // Get the provenances that are entirely within this range.
305        // (Different from `range_get_ptrs` which asks if they overlap the range.)
306        // Only makes sense if we are copying at least one pointer worth of bytes.
307        let mut dest_ptrs_box = None;
308        if src.size >= ptr_size {
309            let adjusted_end = Size::from_bytes(src.end().bytes() - (ptr_size.bytes() - 1));
310            let ptrs = self.ptrs.range(src.start..adjusted_end);
311            // If `count` is large, this is rather wasteful -- we are allocating a big array here, which
312            // is mostly filled with redundant information since it's just N copies of the same `Prov`s
313            // at slightly adjusted offsets. The reason we do this is so that in `mark_provenance_range`
314            // we can use `insert_presorted`. That wouldn't work with an `Iterator` that just produces
315            // the right sequence of provenance for all N copies.
316            // Basically, this large array would have to be created anyway in the target allocation.
317            let mut dest_ptrs = Vec::with_capacity(ptrs.len() * (count as usize));
318            for i in 0..count {
319                dest_ptrs
320                    .extend(ptrs.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc)));
321            }
322            debug_assert_eq!(dest_ptrs.len(), dest_ptrs.capacity());
323            dest_ptrs_box = Some(dest_ptrs.into_boxed_slice());
324        };
325
326        // # Byte-sized provenances
327        // This includes the existing bytewise provenance in the range, and ptr provenance
328        // that overlaps with the begin/end of the range.
329        let mut dest_bytes_box = None;
330        let begin_overlap = self.range_ptrs_get(alloc_range(src.start, Size::ZERO), cx).first();
331        let end_overlap = self.range_ptrs_get(alloc_range(src.end(), Size::ZERO), cx).first();
332        // We only need to go here if there is some overlap or some bytewise provenance.
333        if begin_overlap.is_some() || end_overlap.is_some() || self.bytes.is_some() {
334            let mut bytes: Vec<(Size, (Prov, u8))> = Vec::new();
335            // First, if there is a part of a pointer at the start, add that.
336            if let Some(entry) = begin_overlap {
337                trace!("start overlapping entry: {entry:?}");
338                // For really small copies, make sure we don't run off the end of the `src` range.
339                let entry_end = cmp::min(entry.0 + ptr_size, src.end());
340                for offset in src.start..entry_end {
341                    bytes.push((offset, (entry.1, (offset - entry.0).bytes() as u8)));
342                }
343            } else {
344                trace!("no start overlapping entry");
345            }
346
347            // Then the main part, bytewise provenance from `self.bytes`.
348            bytes.extend(self.range_bytes_get(src));
349
350            // And finally possibly parts of a pointer at the end.
351            if let Some(entry) = end_overlap {
352                trace!("end overlapping entry: {entry:?}");
353                // For really small copies, make sure we don't start before `src` does.
354                let entry_start = cmp::max(entry.0, src.start);
355                for offset in entry_start..src.end() {
356                    if bytes.last().is_none_or(|bytes_entry| bytes_entry.0 < offset) {
357                        // The last entry, if it exists, has a lower offset than us, so we
358                        // can add it at the end and remain sorted.
359                        bytes.push((offset, (entry.1, (offset - entry.0).bytes() as u8)));
360                    } else {
361                        // There already is an entry for this offset in there! This can happen when the
362                        // start and end range checks actually end up hitting the same pointer, so we
363                        // already added this in the "pointer at the start" part above.
364                        assert!(entry.0 <= src.start);
365                    }
366                }
367            } else {
368                trace!("no end overlapping entry");
369            }
370            trace!("byte provenances: {bytes:?}");
371
372            if !bytes.is_empty() && !Prov::OFFSET_IS_ADDR {
373                // FIXME(#146291): We need to ensure that we don't mix different pointers with
374                // the same provenance.
375                return Err(AllocError::ReadPartialPointer(src.start));
376            }
377
378            // And again a buffer for the new list on the target side.
379            let mut dest_bytes = Vec::with_capacity(bytes.len() * (count as usize));
380            for i in 0..count {
381                dest_bytes
382                    .extend(bytes.iter().map(|&(offset, reloc)| (shift_offset(i, offset), reloc)));
383            }
384            debug_assert_eq!(dest_bytes.len(), dest_bytes.capacity());
385            dest_bytes_box = Some(dest_bytes.into_boxed_slice());
386        }
387
388        Ok(ProvenanceCopy { dest_ptrs: dest_ptrs_box, dest_bytes: dest_bytes_box })
389    }
390
391    /// Applies a provenance copy.
392    /// The affected range, as defined in the parameters to `prepare_copy` is expected
393    /// to be clear of provenance.
394    pub fn apply_copy(&mut self, copy: ProvenanceCopy<Prov>) {
395        if let Some(dest_ptrs) = copy.dest_ptrs {
396            self.ptrs.insert_presorted(dest_ptrs.into());
397        }
398        if let Some(dest_bytes) = copy.dest_bytes
399            && !dest_bytes.is_empty()
400        {
401            self.bytes.get_or_insert_with(Box::default).insert_presorted(dest_bytes.into());
402        }
403    }
404}